1
1
openmpi/ompi/mca/btl/ugni/btl_ugni_component.c
Nathan Hjelm 0ccfd3e6db ugni btl update. changes:
- re-enable sendi
 - move smsg common code into btl_ugni_smsg.h
 - added new parameters for smsg/eager frags
 - use get for frags larger than the smsg_limit
 - bug fixes
 - code cleanup

This commit was SVN r25897.
2012-02-10 00:47:29 +00:00

720 строки
24 KiB
C

/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "btl_ugni.h"
#include "btl_ugni_frag.h"
#include "btl_ugni_rdma.h"
#include "btl_ugni_smsg.h"
#include "opal/include/opal/align.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/memoryhooks/memory.h"
#include "ompi/runtime/params.h"
static int btl_ugni_component_register(void);
static int btl_ugni_component_open(void);
static int btl_ugni_component_close(void);
static mca_btl_base_module_t **mca_btl_ugni_component_init(int *, bool, bool);
static int mca_btl_ugni_component_progress(void);
mca_btl_ugni_component_t mca_btl_ugni_component = {
{
/* First, the mca_base_component_t struct containing meta information
about the component itself */
{
MCA_BTL_BASE_VERSION_2_0_0,
"ugni", /* MCA component name */
OMPI_MAJOR_VERSION, /* MCA component major version */
OMPI_MINOR_VERSION, /* MCA component minor version */
OMPI_RELEASE_VERSION, /* MCA component release version */
btl_ugni_component_open, /* component open */
btl_ugni_component_close, /* component close */
NULL, /* component query */
btl_ugni_component_register, /* component register */
},
{
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
mca_btl_ugni_component_init,
mca_btl_ugni_component_progress,
}
};
static inline int
mca_btl_ugni_param_register_int (const char *param_name, const char *help,
int value)
{
mca_base_param_reg_int(&mca_btl_ugni_component.super.btl_version,
param_name, help, false, false, value, &value);
return value;
}
static int
btl_ugni_component_register(void)
{
mca_btl_ugni_component.ugni_free_list_num =
mca_btl_ugni_param_register_int("free_list_num", NULL, 8);
mca_btl_ugni_component.ugni_free_list_max =
mca_btl_ugni_param_register_int("free_list_max", NULL, -1);
mca_btl_ugni_component.ugni_free_list_inc =
mca_btl_ugni_param_register_int("free_list_inc", NULL, 64);
mca_btl_ugni_component.ugni_eager_num =
mca_btl_ugni_param_register_int("eager_num", NULL, 16);
mca_btl_ugni_component.ugni_eager_max =
mca_btl_ugni_param_register_int("eager_max", NULL, 64);
mca_btl_ugni_component.ugni_eager_inc =
mca_btl_ugni_param_register_int("eager_inc", NULL, 16);
mca_btl_ugni_component.cq_size =
mca_btl_ugni_param_register_int("cq_size", NULL, 25000);
/* SMSG limit. 0 - autoselect */
mca_btl_ugni_component.ugni_smsg_limit =
mca_btl_ugni_param_register_int("smsg_limit", "Maximum size message that "
"will be sent using the SMSG/MSGQ protocol "
"(0 - autoselect(default))", 0);
mca_btl_ugni_component.smsg_max_credits =
mca_btl_ugni_param_register_int("smsg_max_credits", "Maximum number of "
"outstanding SMSG/MSGQ message (default 32)",
32);
mca_btl_ugni_component.ugni_fma_limit =
mca_btl_ugni_param_register_int("fma_limit", "Maximum size message that "
"will be sent using the FMA (Fast Memory "
"Access) protocol (default 4095)",
4 * 1024 - 1);
mca_btl_ugni_component.ugni_get_limit =
mca_btl_ugni_param_register_int("get_limit", NULL, 8 * 1024);
mca_btl_ugni_component.rdma_max_retries =
mca_btl_ugni_param_register_int("rdma_max_retries", NULL, 8);
mca_btl_ugni_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH;
/* smsg threshold */
mca_btl_ugni_module.super.btl_eager_limit = 8 * 1024;
mca_btl_ugni_module.super.btl_rndv_eager_limit = 8 * 1024;
mca_btl_ugni_module.super.btl_rdma_pipeline_frag_size = 2 * 1024 * 1024;
mca_btl_ugni_module.super.btl_max_send_size = 8 * 1024;
mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = 8 * 1024;
/* threshold for put */
mca_btl_ugni_module.super.btl_min_rdma_pipeline_size = 8 * 1024;
mca_btl_ugni_module.super.btl_flags = MCA_BTL_FLAGS_SEND |
MCA_BTL_FLAGS_RDMA |
MCA_BTL_FLAGS_RDMA_MATCHED |
MCA_BTL_FLAGS_SEND_INPLACE;
mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */
mca_btl_ugni_module.super.btl_latency = 2; /* Microsecs */
/* Call the BTL based to register its MCA params */
mca_btl_base_param_register(&mca_btl_ugni_component.super.btl_version,
&mca_btl_ugni_module.super);
return OMPI_SUCCESS;
}
static int
btl_ugni_component_open(void)
{
mca_btl_ugni_component.ugni_num_btls = 0;
mca_btl_ugni_component.modules = NULL;
OBJ_CONSTRUCT(&mca_btl_ugni_component.ugni_frags_smsg, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_btl_ugni_component.ugni_frags_rdma, ompi_free_list_t);
return OMPI_SUCCESS;
}
/*
* component cleanup - sanity checking of queue lengths
*/
static int
btl_ugni_component_close(void)
{
ompi_common_ugni_fini ();
OBJ_DESTRUCT(&mca_btl_ugni_component.ugni_frags_smsg);
OBJ_DESTRUCT(&mca_btl_ugni_component.ugni_frags_rdma);
return OMPI_SUCCESS;
}
static void mca_btl_ugni_autoset_leave_pinned (void) {
mca_base_param_source_t source;
int index, rc, value;
/* If we have a memory manager available, and
mpi_leave_pinned==-1, then unless the user explicitly set
mpi_leave_pinned_pipeline==0, then set mpi_leave_pinned to 1.
We have a memory manager if we have both FREE and MUNMAP
support */
value = opal_mem_hooks_support_level();
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
rc = 0;
index = mca_base_param_find("mpi", NULL, "leave_pinned");
if (index >= 0) {
if (OPAL_SUCCESS == mca_base_param_lookup_int(index, &value) &&
-1 == value) {
++rc;
}
}
index = mca_base_param_find("mpi", NULL, "leave_pinned_pipeline");
if (index >= 0) {
if (OPAL_SUCCESS == mca_base_param_lookup_int(index, &value) &&
OPAL_SUCCESS == mca_base_param_lookup_source(index, &source,
NULL)) {
if (0 == value && MCA_BASE_PARAM_SOURCE_DEFAULT == source) {
++rc;
}
}
}
/* If we were good on both parameters, then set leave_pinned=1 */
if (2 == rc) {
ompi_mpi_leave_pinned = 1;
ompi_mpi_leave_pinned_pipeline = 0;
}
}
}
static int mca_btl_ugni_smsg_setup (void) {
gni_smsg_attr_t tmp_smsg_attrib;
unsigned int mbox_size;
int rc;
/* calculate mailbox size */
tmp_smsg_attrib.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
tmp_smsg_attrib.msg_maxsize = mca_btl_ugni_component.ugni_smsg_limit;
tmp_smsg_attrib.mbox_maxcredit = mca_btl_ugni_component.smsg_max_credits;
rc = GNI_SmsgBufferSizeNeeded (&tmp_smsg_attrib, &mbox_size);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
BTL_ERROR(("error in GNI_SmsgBufferSizeNeeded"));
return ompi_common_rc_ugni_to_ompi (rc);
}
mca_btl_ugni_component.smsg_mbox_size = OPAL_ALIGN(mbox_size, 64, unsigned int);
return OMPI_SUCCESS;
}
static mca_btl_base_module_t **
mca_btl_ugni_component_init (int *num_btl_modules,
bool enable_progress_threads,
bool enable_mpi_threads)
{
struct mca_btl_base_module_t **base_modules;
mca_btl_ugni_module_t *ugni_modules;
unsigned int i;
size_t nprocs;
int rc;
signal (SIGSEGV, SIG_DFL);
/* Initialize ugni library and create communication domain */
rc = ompi_common_ugni_init();
if (OMPI_SUCCESS != rc) {
return NULL;
}
/* Create and initialize modules
* Create one module per device
* One btl == One module
*/
/* Manju: I should set this automatically, not hardcoded */
mca_btl_ugni_component.ugni_num_btls = ompi_common_ugni_module.device_count;
BTL_VERBOSE(("btl/ugni initializing"));
ugni_modules = mca_btl_ugni_component.modules = (mca_btl_ugni_module_t *)
calloc (mca_btl_ugni_component.ugni_num_btls,
sizeof (mca_btl_ugni_module_t));
if (OPAL_UNLIKELY(NULL == mca_btl_ugni_component.modules)) {
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
return NULL;
}
base_modules = (struct mca_btl_base_module_t **)
calloc (mca_btl_ugni_component.ugni_num_btls,
sizeof (struct mca_btl_base_module_t *));
if (OPAL_UNLIKELY(NULL == base_modules)) {
BTL_ERROR(("Malloc failed : %s:%d", __FILE__, __LINE__));
return NULL;
}
mca_btl_ugni_autoset_leave_pinned ();
(void) ompi_proc_world (&nprocs);
if (0 == mca_btl_ugni_component.ugni_smsg_limit) {
/* auto-set the smsg limit based on the number of ranks */
if (nprocs <= 1024) {
mca_btl_ugni_component.ugni_smsg_limit = 1024;
} else if (nprocs <= 16384) {
mca_btl_ugni_component.ugni_smsg_limit = 512;
} else {
mca_btl_ugni_component.ugni_smsg_limit = 256;
}
}
mca_btl_ugni_component.smsg_max_data = mca_btl_ugni_component.ugni_smsg_limit -
sizeof (mca_btl_ugni_send_frag_hdr_t);
/* module settings */
mca_btl_ugni_module.super.btl_max_send_size = mca_btl_ugni_module.super.btl_eager_limit;
mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = mca_btl_ugni_module.super.btl_eager_limit;
rc = mca_btl_ugni_smsg_setup ();
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
return NULL;
}
for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {
mca_btl_ugni_module_t *ugni_module = ugni_modules + i;
rc = mca_btl_ugni_module_init (ugni_module,
ompi_common_ugni_module.devices + i);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
BTL_ERROR(("Failed to initialize uGNI module @ %s:%d", __FILE__,
__LINE__));
return NULL;
}
base_modules[i] = (mca_btl_base_module_t *) ugni_module;
}
*num_btl_modules = mca_btl_ugni_component.ugni_num_btls;
BTL_VERBOSE(("btl/ugni done initializing modules"));
return base_modules;
}
static void mca_btl_ugni_callback_reverse_get (ompi_common_ugni_post_desc_t *desc, int rc)
{
mca_btl_ugni_base_frag_t *frag = MCA_BTL_UGNI_DESC_TO_FRAG(desc);
BTL_VERBOSE(("reverse get (put) for rem_ctx %p complete", frag->hdr.rdma.ctx));
/* tell peer the put is complete */
rc = ompi_mca_btl_ugni_smsg_send (frag, false, &frag->hdr.rdma, sizeof (frag->hdr.rdma),
NULL, 0, MCA_BTL_UGNI_TAG_PUT_COMPLETE);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
opal_list_append (&frag->endpoint->btl->failed_frags, (opal_list_item_t *) frag);
}
}
static void mca_btl_ugni_callback_eager_get (ompi_common_ugni_post_desc_t *desc, int rc)
{
mca_btl_ugni_base_frag_t *frag = MCA_BTL_UGNI_DESC_TO_FRAG(desc);
mca_btl_active_message_callback_t *reg;
BTL_VERBOSE(("eager get for rem_ctx %p complete", frag->hdr.eager.ctx));
/* the frag is already set up for the send callback */
frag->segments[0].seg_len = frag->hdr.eager.len;
reg = mca_btl_base_active_message_trigger + frag->hdr.eager.tag;
reg->cbfunc(&frag->endpoint->btl->super, frag->hdr.eager.tag, &(frag->base), reg->cbdata);
/* tell peer the get is complete */
rc = ompi_mca_btl_ugni_smsg_send (frag, false, &frag->hdr.eager, sizeof (frag->hdr.eager),
NULL, 0, MCA_BTL_UGNI_TAG_GET_COMPLETE);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
opal_list_append (&frag->endpoint->btl->failed_frags, (opal_list_item_t *) frag);
}
}
static inline int mca_btl_ugni_start_reverse_get (mca_btl_base_endpoint_t *ep,
mca_btl_ugni_rdma_frag_hdr_t hdr)
{
mca_btl_ugni_base_frag_t *frag;
int rc;
BTL_VERBOSE(("starting reverse get (put) for remote ctx: %p", hdr.ctx));
rc = MCA_BTL_UGNI_FRAG_ALLOC_RDMA(ep, frag);
if (OPAL_UNLIKELY(NULL == frag)) {
BTL_ERROR(("error allocating rdma frag for reverse get"));
return rc;
}
frag->hdr.rdma = hdr;
frag->base.des_cbfunc = NULL;
frag->base.des_flags = MCA_BTL_DES_FLAGS_BTL_OWNERSHIP;
frag->segments[0] = hdr.src_seg;
frag->base.des_src = frag->segments;
frag->base.des_src_cnt = 1;
frag->segments[1] = hdr.dst_seg;
frag->base.des_dst = frag->segments + 1;
frag->base.des_dst_cnt = 1;
rc = mca_btl_ugni_put (&ep->btl->super, ep, &frag->base);
assert (OMPI_SUCCESS == rc);
frag->post_desc.cbfunc = mca_btl_ugni_callback_reverse_get;
return rc;
}
static inline int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *ep,
mca_btl_ugni_eager_frag_hdr_t hdr);
static void mca_btl_ugni_callback_eager_get_retry (ompi_common_ugni_post_desc_t *desc, int rc)
{
mca_btl_ugni_base_frag_t *frag = MCA_BTL_UGNI_DESC_TO_FRAG(desc);
(void) mca_btl_ugni_start_eager_get(frag->endpoint, frag->hdr.eager);
mca_btl_ugni_frag_return (frag);
}
static inline int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *ep,
mca_btl_ugni_eager_frag_hdr_t hdr)
{
mca_btl_ugni_base_frag_t *frag;
int rc;
BTL_VERBOSE(("starting eager get for remote ctx: %p", hdr.ctx));
rc = MCA_BTL_UGNI_FRAG_ALLOC_EAGER_RECV(ep, frag);
if (OPAL_UNLIKELY(NULL == frag)) {
(void) MCA_BTL_UGNI_FRAG_ALLOC_RDMA(ep, frag);
assert (NULL != frag);
frag->hdr.eager = hdr;
frag->post_desc.cbfunc = mca_btl_ugni_callback_eager_get_retry;
opal_list_append (&ep->btl->failed_frags, (opal_list_item_t *) frag);
return rc;
}
frag->hdr.eager = hdr;
frag->base.des_cbfunc = NULL;
frag->base.des_flags = MCA_BTL_DES_FLAGS_BTL_OWNERSHIP;
frag->base.des_dst = frag->segments;
frag->base.des_dst_cnt = 1;
frag->segments[1] = hdr.src_seg;
frag->base.des_src = frag->segments + 1;
frag->base.des_src_cnt = 1;
/* increase size to a multiple of 4 bytes (required for get) */
frag->segments[0].seg_len = (hdr.len + 3) & ~3;
frag->segments[1].seg_len = (hdr.len + 3) & ~3;
if (frag->segments[0].seg_len <= mca_btl_ugni_component.ugni_fma_limit) {
rc = mca_btl_ugni_post_fma (frag, GNI_POST_FMA_GET, frag->base.des_dst, frag->base.des_src);
} else {
rc = mca_btl_ugni_post_bte (frag, GNI_POST_RDMA_GET, frag->base.des_dst, frag->base.des_src);
}
assert (OMPI_SUCCESS == rc);
frag->post_desc.cbfunc = mca_btl_ugni_callback_eager_get;
return rc;
}
static inline int
mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
{
mca_btl_active_message_callback_t *reg;
mca_btl_ugni_base_frag_t frag;
bool disconnect = false;
uintptr_t data_ptr;
gni_return_t rc;
int count = 0;
/* per uGNI documentation we loop until the mailbox is empty */
do {
uint8_t tag = GNI_SMSG_ANY_TAG;
rc = GNI_SmsgGetNextWTag (ep->common->ep_handle, (void **) &data_ptr, &tag);
if (GNI_RC_NOT_DONE == rc) {
BTL_VERBOSE(("no smsg message waiting. rc = %d", rc));
return count;
}
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
fprintf (stderr, "Unhandled Smsg error: %d\n", rc);
assert (0);
return OMPI_ERROR;
}
if (OPAL_UNLIKELY(0 == data_ptr)) {
BTL_ERROR(("null data ptr!"));
assert (0);
return OMPI_ERROR;
}
count++;
BTL_VERBOSE(("got smsg fragment. tag = %d\n", tag));
switch (tag) {
case MCA_BTL_UGNI_TAG_SEND:
frag.hdr.send = ((mca_btl_ugni_send_frag_hdr_t *) data_ptr)[0];
BTL_VERBOSE(("received smsg fragment. hdr = {len = %u, tag = %d}",
(unsigned int) frag.hdr.send.len, frag.hdr.send.tag));
reg = mca_btl_base_active_message_trigger + frag.hdr.send.tag;
frag.base.des_dst = frag.segments;
frag.base.des_dst_cnt = 1;
frag.segments[0].seg_addr.pval = (void *)((uintptr_t)data_ptr + sizeof (mca_btl_ugni_send_frag_hdr_t));
frag.segments[0].seg_len = frag.hdr.send.len;
reg->cbfunc(&ep->btl->super, frag.hdr.send.tag, &(frag.base), reg->cbdata);
break;
case MCA_BTL_UGNI_TAG_PUT_INIT:
frag.hdr.rdma = ((mca_btl_ugni_rdma_frag_hdr_t *) data_ptr)[0];
mca_btl_ugni_start_reverse_get (ep, frag.hdr.rdma);
break;
case MCA_BTL_UGNI_TAG_PUT_COMPLETE:
frag.hdr.rdma = ((mca_btl_ugni_rdma_frag_hdr_t *) data_ptr)[0];
mca_btl_ugni_post_frag_complete (frag.hdr.rdma.ctx, OMPI_SUCCESS);
break;
case MCA_BTL_UGNI_TAG_GET_INIT:
frag.hdr.eager = ((mca_btl_ugni_eager_frag_hdr_t *) data_ptr)[0];
mca_btl_ugni_start_eager_get (ep, frag.hdr.eager);
break;
case MCA_BTL_UGNI_TAG_GET_COMPLETE:
frag.hdr.eager = ((mca_btl_ugni_eager_frag_hdr_t *) data_ptr)[0];
mca_btl_ugni_post_frag_complete (frag.hdr.eager.ctx, OMPI_SUCCESS);
break;
case MCA_BTL_UGNI_TAG_DISCONNECT:
/* remote endpoint has disconnected */
disconnect = true;
break;
default:
BTL_ERROR(("unknown tag %d\n", tag));
break;
}
rc = GNI_SmsgRelease (ep->common->ep_handle);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
BTL_ERROR(("Smsg release failed!"));
return OMPI_ERROR;
}
} while (!disconnect);
/* disconnect if we get here */
mca_btl_ugni_ep_disconnect (ep, false);
return count;
}
static inline int
mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *btl)
{
uint32_t remote_addr, remote_id;
mca_btl_base_endpoint_t *ep;
gni_post_state_t post_state;
gni_ep_handle_t handle;
uint64_t datagram_id;
gni_return_t grc;
int count = 0;
/* check for datagram completion */
grc = GNI_PostDataProbeById (btl->device->dev_handle, &datagram_id);
if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) {
return 0;
}
if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) ==
MCA_BTL_UGNI_CONNECT_WILDCARD_ID) {
handle = btl->wildcard_ep;
} else {
handle =
btl->endpoints[(uint32_t)(datagram_id & 0xffffffffull)]->common->ep_handle;
}
/* wait for the incoming datagram to complete (in case it isn't) */
grc = GNI_EpPostDataWaitById (handle, datagram_id, -1, &post_state,
&remote_addr, &remote_id);
if (GNI_RC_SUCCESS != grc) {
BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc));
return ompi_common_rc_ugni_to_ompi (grc);
}
BTL_VERBOSE(("got a datagram completion: id = %" PRIx64 ", state = %d, "
"peer = %d", datagram_id, post_state, remote_id));
ep = btl->endpoints[remote_id];
OPAL_THREAD_LOCK(&ep->common->lock);
/* NTH: TODO -- error handling */
(void) mca_btl_ugni_ep_connect_progress (ep);
if (OMPI_COMMON_UGNI_CONNECTED == MCA_BTL_UGNI_EP_STATE(ep)) {
/* process messages waiting in the endpoint's smsg mailbox */
count = mca_btl_ugni_smsg_process (ep);
}
OPAL_THREAD_UNLOCK(&ep->common->lock);
if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) ==
MCA_BTL_UGNI_CONNECT_WILDCARD_ID) {
mca_btl_ugni_wildcard_ep_post (btl);
}
return count;
}
static inline int
mca_btl_ugni_handle_smsg_overrun (mca_btl_ugni_module_t *btl)
{
gni_cq_entry_t event_data;
unsigned int ep_index;
int count, rc;
BTL_VERBOSE(("btl/ugni_component detect SMSG CQ overrun. "
"processing message backlog..."));
/* we don't know which endpoint lost an smsg completion. clear the
smsg remote cq and check all mailboxes */
/* clear out remote cq */
do {
rc = GNI_CqGetEvent (btl->smsg_remote_cq, &event_data);
} while (GNI_RC_SUCCESS == rc);
count = 0;
for (ep_index = 0 ; ep_index < btl->endpoint_count ; ++ep_index) {
mca_btl_base_endpoint_t *ep = btl->endpoints[ep_index];
if (NULL == ep || OMPI_COMMON_UGNI_CONNECTED != MCA_BTL_UGNI_EP_STATE(ep)) {
continue;
}
do {
/* clear out smsg mailbox */
rc = mca_btl_ugni_smsg_process (ep);
if (rc > 0)
count += rc;
} while (rc > 0);
}
return count;
}
static inline int
mca_btl_ugni_progress_smsg (mca_btl_ugni_module_t *btl)
{
mca_btl_base_endpoint_t *ep;
gni_cq_entry_t event_data;
int rc;
rc = GNI_CqGetEvent (btl->smsg_remote_cq, &event_data);
if (GNI_RC_NOT_DONE == rc) {
return 0;
}
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc || !GNI_CQ_STATUS_OK(event_data) ||
GNI_CQ_OVERRUN(event_data))) {
if (GNI_RC_ERROR_RESOURCE == rc ||
(GNI_RC_SUCCESS == rc && GNI_CQ_OVERRUN(event_data))) {
/* recover from smsg cq overrun */
return mca_btl_ugni_handle_smsg_overrun (btl);
}
BTL_ERROR(("unhandled error in GNI_CqGetEvent"));
/* unhandled error: crash */
assert (0);
return OMPI_ERROR;
}
BTL_VERBOSE(("REMOTE CQ: Got event 0x%" PRIx64 ". msg id = %" PRIu64
". ok = %d, type = %" PRIu64 "\n", (uint64_t) event_data,
GNI_CQ_GET_MSG_ID(event_data), GNI_CQ_STATUS_OK(event_data),
GNI_CQ_GET_TYPE(event_data)));
/* we could check the message type here but it seems to always be a POST */
ep = btl->endpoints[GNI_CQ_GET_MSG_ID(event_data)];
if (OPAL_UNLIKELY(OMPI_COMMON_UGNI_CONNECTED != MCA_BTL_UGNI_EP_STATE(ep))) {
/* due to the nature of datagrams we may get a smsg completion before
we get mailbox info from the peer */
BTL_VERBOSE(("event occurred on an unconnected endpoint! ep state = %d", MCA_BTL_UGNI_EP_STATE(ep)));
return 0;
}
return mca_btl_ugni_smsg_process (ep);
}
static inline int
mca_btl_ugni_progress_bte (mca_btl_ugni_module_t *btl)
{
return ompi_common_ugni_process_completed_post (btl->device, btl->bte_local_cq);
}
static int
mca_btl_ugni_retry_failed (mca_btl_ugni_module_t *btl)
{
int count = opal_list_get_size (&btl->failed_frags);
opal_list_item_t *item;
while (count-- && NULL != (item = opal_list_remove_first (&btl->failed_frags))) {
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) item;
frag->post_desc.cbfunc (&frag->post_desc, OMPI_SUCCESS);
}
return 0;
}
static int
mca_btl_ugni_component_progress (void)
{
mca_btl_ugni_module_t *btl;
unsigned int i;
int count;
count = ompi_common_ugni_progress ();
for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {
btl = mca_btl_ugni_component.modules + i;
mca_btl_ugni_retry_failed (btl);
count += mca_btl_ugni_progress_datagram (btl);
count += mca_btl_ugni_progress_smsg (btl);
count += mca_btl_ugni_progress_bte (btl);
}
return count;
}