71527c8058
This commit was SVN r25618.
617 строки
20 KiB
C
617 строки
20 KiB
C
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
/*
|
|
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
|
* reserved.
|
|
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "btl_ugni.h"
|
|
#include "btl_ugni_endpoint.h"
|
|
#include "btl_ugni_frag.h"
|
|
#include "btl_ugni_rdma.h"
|
|
|
|
#include "opal/mca/base/mca_base_param.h"
|
|
#include "opal/memoryhooks/memory.h"
|
|
#include "ompi/runtime/params.h"
|
|
|
|
int mca_btl_ugni_smsg_max_credits = 32;
|
|
int mca_btl_ugni_smsg_mbox_size;
|
|
|
|
static int btl_ugni_component_register(void);
|
|
static int btl_ugni_component_open(void);
|
|
static int btl_ugni_component_close(void);
|
|
static mca_btl_base_module_t **mca_btl_ugni_component_init(int *, bool, bool);
|
|
static int mca_btl_ugni_component_progress(void);
|
|
|
|
mca_btl_ugni_component_t mca_btl_ugni_component = {
|
|
{
|
|
/* First, the mca_base_component_t struct containing meta information
|
|
about the component itself */
|
|
|
|
{
|
|
MCA_BTL_BASE_VERSION_2_0_0,
|
|
|
|
"ugni", /* MCA component name */
|
|
OMPI_MAJOR_VERSION, /* MCA component major version */
|
|
OMPI_MINOR_VERSION, /* MCA component minor version */
|
|
OMPI_RELEASE_VERSION, /* MCA component release version */
|
|
btl_ugni_component_open, /* component open */
|
|
btl_ugni_component_close, /* component close */
|
|
NULL, /* component query */
|
|
btl_ugni_component_register, /* component register */
|
|
},
|
|
{
|
|
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
|
},
|
|
mca_btl_ugni_component_init,
|
|
mca_btl_ugni_component_progress,
|
|
}
|
|
};
|
|
|
|
static inline char *
|
|
mca_btl_ugni_param_register_string(const char *param_name,
|
|
const char *default_value)
|
|
{
|
|
char *param_value;
|
|
int id = mca_base_param_register_string("btl", "ugni", param_name, NULL,
|
|
default_value);
|
|
mca_base_param_lookup_string(id, ¶m_value);
|
|
return param_value;
|
|
}
|
|
|
|
static inline int
|
|
mca_btl_ugni_param_register_int (const char *param_name, int value)
|
|
{
|
|
int id = mca_base_param_register_int("btl", "ugni", param_name, NULL, value);
|
|
mca_base_param_lookup_int(id, &value);
|
|
return value;
|
|
}
|
|
|
|
static int
|
|
btl_ugni_component_register(void)
|
|
{
|
|
mca_btl_ugni_component.ugni_free_list_num =
|
|
mca_btl_ugni_param_register_int("free_list_num", 8);
|
|
mca_btl_ugni_component.ugni_free_list_max =
|
|
mca_btl_ugni_param_register_int("free_list_max", -1);
|
|
mca_btl_ugni_component.ugni_free_list_inc =
|
|
mca_btl_ugni_param_register_int("free_list_inc", 64);
|
|
|
|
mca_btl_ugni_component.cq_size =
|
|
mca_btl_ugni_param_register_int("cq_size", 25000);
|
|
|
|
mca_btl_ugni_component.btl_fma_limit =
|
|
mca_btl_ugni_param_register_int("fma_limit", 4 * 1024);
|
|
|
|
mca_btl_ugni_component.btl_get_limit =
|
|
mca_btl_ugni_param_register_int("get_limit", 8 * 1024);
|
|
|
|
mca_btl_ugni_component.rdma_max_retries =
|
|
mca_btl_ugni_param_register_int("rdma_max_retries", 8);
|
|
|
|
mca_btl_ugni_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH;
|
|
|
|
/* smsg threshold */
|
|
mca_btl_ugni_module.super.btl_eager_limit = 0; /* set dynamically in module_init */
|
|
mca_btl_ugni_module.super.btl_rndv_eager_limit = 8 * 1024;
|
|
mca_btl_ugni_module.super.btl_rdma_pipeline_frag_size = 2 * 1024 * 1024;
|
|
mca_btl_ugni_module.super.btl_max_send_size = 0; /* set this later */
|
|
mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = 0; /* set this later */
|
|
|
|
/* threshold for put */
|
|
mca_btl_ugni_module.super.btl_min_rdma_pipeline_size = 0;
|
|
|
|
mca_btl_ugni_module.super.btl_flags = MCA_BTL_FLAGS_SEND |
|
|
MCA_BTL_FLAGS_RDMA |
|
|
MCA_BTL_FLAGS_RDMA_MATCHED;
|
|
|
|
mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */
|
|
mca_btl_ugni_module.super.btl_latency = 2; /* Microsecs */
|
|
|
|
/* Call the BTL based to register its MCA params */
|
|
mca_btl_base_param_register(&mca_btl_ugni_component.super.btl_version,
|
|
&mca_btl_ugni_module.super);
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
static int
|
|
btl_ugni_component_open(void)
|
|
{
|
|
mca_btl_ugni_component.ugni_num_btls = 0;
|
|
mca_btl_ugni_component.modules = NULL;
|
|
|
|
OBJ_CONSTRUCT(&mca_btl_ugni_component.ugni_frags_eager, ompi_free_list_t);
|
|
OBJ_CONSTRUCT(&mca_btl_ugni_component.ugni_frags_rdma, ompi_free_list_t);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
/*
|
|
* component cleanup - sanity checking of queue lengths
|
|
*/
|
|
static int
|
|
btl_ugni_component_close(void)
|
|
{
|
|
ompi_common_ugni_fini ();
|
|
|
|
OBJ_DESTRUCT(&mca_btl_ugni_component.ugni_frags_eager);
|
|
OBJ_DESTRUCT(&mca_btl_ugni_component.ugni_frags_rdma);
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
static void mca_btl_ugni_autoset_leave_pinned (void) {
|
|
mca_base_param_source_t source;
|
|
int index, rc, value;
|
|
|
|
/* If we have a memory manager available, and
|
|
mpi_leave_pinned==-1, then unless the user explicitly set
|
|
mpi_leave_pinned_pipeline==0, then set mpi_leave_pinned to 1.
|
|
We have a memory manager if we have both FREE and MUNMAP
|
|
support */
|
|
value = opal_mem_hooks_support_level();
|
|
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
|
|
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
|
|
rc = 0;
|
|
index = mca_base_param_find("mpi", NULL, "leave_pinned");
|
|
if (index >= 0) {
|
|
if (OPAL_SUCCESS == mca_base_param_lookup_int(index, &value) &&
|
|
-1 == value) {
|
|
++rc;
|
|
}
|
|
}
|
|
index = mca_base_param_find("mpi", NULL, "leave_pinned_pipeline");
|
|
if (index >= 0) {
|
|
if (OPAL_SUCCESS == mca_base_param_lookup_int(index, &value) &&
|
|
OPAL_SUCCESS == mca_base_param_lookup_source(index, &source,
|
|
NULL)) {
|
|
if (0 == value && MCA_BASE_PARAM_SOURCE_DEFAULT == source) {
|
|
++rc;
|
|
}
|
|
}
|
|
}
|
|
/* If we were good on both parameters, then set leave_pinned=1 */
|
|
if (2 == rc) {
|
|
ompi_mpi_leave_pinned = 1;
|
|
ompi_mpi_leave_pinned_pipeline = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
static int mca_btl_ugni_smsg_setup (void) {
|
|
gni_smsg_attr_t tmp_smsg_attrib;
|
|
unsigned int mbox_size;
|
|
int rc;
|
|
|
|
/* calculate mailbox size */
|
|
tmp_smsg_attrib.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
|
|
tmp_smsg_attrib.msg_maxsize = mca_btl_ugni_component.eager_limit + sizeof (mca_btl_ugni_frag_hdr_t);
|
|
tmp_smsg_attrib.mbox_maxcredit = mca_btl_ugni_smsg_max_credits;
|
|
|
|
rc = GNI_SmsgBufferSizeNeeded (&tmp_smsg_attrib, &mbox_size);
|
|
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
|
BTL_ERROR(("error in GNI_SmsgBufferSizeNeeded"));
|
|
return ompi_common_rc_ugni_to_ompi (rc);
|
|
}
|
|
|
|
mca_btl_ugni_smsg_mbox_size = ((mbox_size + opal_cache_line_size - 1)/opal_cache_line_size) * opal_cache_line_size;
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
static mca_btl_base_module_t **
|
|
mca_btl_ugni_component_init (int *num_btl_modules,
|
|
bool enable_progress_threads,
|
|
bool enable_mpi_threads)
|
|
{
|
|
struct mca_btl_base_module_t **base_modules;
|
|
mca_btl_ugni_module_t *ugni_modules;
|
|
unsigned int i;
|
|
size_t nprocs;
|
|
int rc;
|
|
|
|
/* Initialize ugni library and create communication domain */
|
|
rc = ompi_common_ugni_init();
|
|
if (OMPI_SUCCESS != rc) {
|
|
return NULL;
|
|
}
|
|
|
|
/* Create and initialize modules
|
|
* Create one module per device
|
|
* One btl == One module
|
|
*/
|
|
/* Manju: I should set this automatically, not hardcoded */
|
|
mca_btl_ugni_component.ugni_num_btls = ompi_common_ugni_module.device_count;
|
|
|
|
BTL_VERBOSE(("btl/ugni initializing"));
|
|
|
|
ugni_modules = mca_btl_ugni_component.modules = (mca_btl_ugni_module_t *)
|
|
calloc (mca_btl_ugni_component.ugni_num_btls,
|
|
sizeof (mca_btl_ugni_module_t));
|
|
|
|
if (OPAL_UNLIKELY(NULL == mca_btl_ugni_component.modules)) {
|
|
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
|
|
return NULL;
|
|
}
|
|
|
|
base_modules = (struct mca_btl_base_module_t **)
|
|
calloc (mca_btl_ugni_component.ugni_num_btls,
|
|
sizeof (struct mca_btl_base_module_t *));
|
|
if (OPAL_UNLIKELY(NULL == base_modules)) {
|
|
BTL_ERROR(("Malloc failed : %s:%d", __FILE__, __LINE__));
|
|
return NULL;
|
|
}
|
|
|
|
mca_btl_ugni_autoset_leave_pinned ();
|
|
|
|
(void) ompi_proc_world (&nprocs);
|
|
|
|
if (0 == mca_btl_ugni_component.eager_limit) {
|
|
/* auto-set the eager limit based on the number of ranks */
|
|
if (nprocs <= 1024) {
|
|
mca_btl_ugni_component.eager_limit = 1024;
|
|
} else if (nprocs <= 16384) {
|
|
mca_btl_ugni_component.eager_limit = 512;
|
|
} else {
|
|
mca_btl_ugni_component.eager_limit = 256;
|
|
}
|
|
}
|
|
|
|
rc = mca_btl_ugni_smsg_setup ();
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
return NULL;
|
|
}
|
|
|
|
for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {
|
|
mca_btl_ugni_module_t *ugni_module = ugni_modules + i;
|
|
|
|
rc = mca_btl_ugni_module_init (ugni_module,
|
|
ompi_common_ugni_module.devices + i);
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
BTL_ERROR(("Failed to initialize uGNI module @ %s:%d", __FILE__,
|
|
__LINE__));
|
|
return NULL;
|
|
}
|
|
|
|
base_modules[i] = (mca_btl_base_module_t *) ugni_module;
|
|
}
|
|
|
|
*num_btl_modules = mca_btl_ugni_component.ugni_num_btls;
|
|
|
|
BTL_VERBOSE(("btl/ugni done initializing modules"));
|
|
|
|
return base_modules;
|
|
}
|
|
|
|
static inline void mca_btl_ugni_callback_reverse_get (mca_btl_base_module_t *btl,
|
|
mca_btl_base_endpoint_t *ep,
|
|
mca_btl_base_descriptor_t *des,
|
|
int rc)
|
|
{
|
|
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
|
|
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) des;
|
|
uint32_t msg_id = ORTE_PROC_MY_NAME->vpid;
|
|
|
|
BTL_VERBOSE(("reverse get (put) for rem_ctx %p complete", des->des_cbdata));
|
|
|
|
/* tell peer the put is complete */
|
|
rc = GNI_SmsgSendWTag (frag->endpoint->common->ep_handle, &des->des_cbdata, sizeof (void *),
|
|
NULL, 0, msg_id, MCA_BTL_UGNI_TAG_PUT_COMPLETE);
|
|
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
|
/* turn off btl ownership for now */
|
|
des->des_flags &= ~MCA_BTL_DES_FLAGS_BTL_OWNERSHIP;
|
|
opal_list_append (&ugni_module->failed_frags, (opal_list_item_t *) des);
|
|
} else {
|
|
des->des_flags |= MCA_BTL_DES_FLAGS_BTL_OWNERSHIP;
|
|
}
|
|
}
|
|
|
|
static inline int mca_btl_ugni_start_progress_reverse_get (mca_btl_base_endpoint_t *ep,
|
|
mca_btl_base_segment_t *segments,
|
|
void *rem_ctx)
|
|
{
|
|
mca_btl_ugni_base_frag_t *frag;
|
|
int rc;
|
|
|
|
BTL_VERBOSE(("starting reverse get (put) for remote ctx: %p", rem_ctx));
|
|
|
|
MCA_BTL_UGNI_FRAG_ALLOC_RDMA(ep->btl, frag, rc);
|
|
if (OPAL_UNLIKELY(NULL == frag)) {
|
|
BTL_ERROR(("error allocating rdma frag for reverse get"));
|
|
return rc;
|
|
}
|
|
|
|
frag->base.des_cbfunc = mca_btl_ugni_callback_reverse_get;
|
|
frag->base.des_cbdata = rem_ctx;
|
|
frag->endpoint = ep;
|
|
|
|
memmove (&frag->segments, segments, 2 * sizeof (segments[0]));
|
|
|
|
frag->base.des_src = frag->segments;
|
|
frag->base.des_src_cnt = 1;
|
|
frag->base.des_dst = frag->segments + 1;
|
|
frag->base.des_dst_cnt = 1;
|
|
|
|
rc = mca_btl_ugni_put (&ep->btl->super, ep, &frag->base);
|
|
assert (OMPI_SUCCESS == rc);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static inline int
|
|
mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
|
|
{
|
|
mca_btl_active_message_callback_t *reg;
|
|
mca_btl_ugni_base_frag_t frag;
|
|
mca_btl_base_segment_t *segments;
|
|
mca_btl_ugni_frag_hdr_t *hdr;
|
|
uintptr_t data_ptr;
|
|
int tries = 3;
|
|
int count = 0;
|
|
int rc;
|
|
|
|
do {
|
|
uint8_t tag = GNI_SMSG_ANY_TAG;
|
|
|
|
rc = GNI_SmsgGetNextWTag (ep->common->ep_handle, (void **) &data_ptr, &tag);
|
|
if (GNI_RC_SUCCESS != rc) {
|
|
BTL_VERBOSE(("no smsg message waiting. rc = %d", rc));
|
|
continue;
|
|
}
|
|
|
|
if (OPAL_UNLIKELY(0 == data_ptr)) {
|
|
BTL_ERROR(("null data ptr!"));
|
|
return OMPI_ERROR;
|
|
}
|
|
|
|
count++;
|
|
|
|
BTL_VERBOSE(("got smsg fragment. tag = %d\n", tag));
|
|
|
|
switch (tag) {
|
|
case MCA_BTL_UGNI_TAG_SEND:
|
|
hdr = (mca_btl_ugni_frag_hdr_t *) data_ptr;
|
|
|
|
BTL_VERBOSE(("received smsg fragment. hdr = {len = %u, tag = %d}",
|
|
(unsigned int) hdr->len, hdr->tag));
|
|
|
|
reg = mca_btl_base_active_message_trigger + hdr->tag;
|
|
frag.base.des_dst = frag.segments;
|
|
frag.base.des_dst_cnt = 1;
|
|
|
|
frag.segments[0].seg_addr.pval = (void *)(data_ptr + sizeof (*hdr));
|
|
frag.segments[0].seg_len = hdr->len;
|
|
|
|
reg->cbfunc(&ep->btl->super, hdr->tag, &(frag.base), reg->cbdata);
|
|
|
|
break;
|
|
case MCA_BTL_UGNI_TAG_DISCONNECT:
|
|
/* remote endpoint has disconnected */
|
|
rc = GNI_SmsgRelease (ep->common->ep_handle);
|
|
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
|
BTL_ERROR(("Smsg release failed!"));
|
|
return OMPI_ERROR;
|
|
}
|
|
|
|
mca_btl_ugni_ep_disconnect (ep, false);
|
|
|
|
return count;
|
|
case MCA_BTL_UGNI_TAG_PUT_INIT:
|
|
segments = (mca_btl_base_segment_t *) data_ptr;
|
|
|
|
mca_btl_ugni_start_progress_reverse_get (ep, segments,
|
|
((void **)(segments + 2))[0]);
|
|
|
|
break;
|
|
case MCA_BTL_UGNI_TAG_PUT_COMPLETE:
|
|
mca_btl_ugni_post_frag_complete (((void **)data_ptr)[0], OMPI_SUCCESS);
|
|
|
|
break;
|
|
default:
|
|
BTL_ERROR(("unknown tag %d\n", tag));
|
|
break;
|
|
}
|
|
|
|
rc = GNI_SmsgRelease (ep->common->ep_handle);
|
|
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
|
BTL_ERROR(("Smsg release failed!"));
|
|
return OMPI_ERROR;
|
|
}
|
|
} while (tries--);
|
|
|
|
/* finished processing events */
|
|
return count;
|
|
}
|
|
|
|
static inline int
|
|
mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *btl)
|
|
{
|
|
uint32_t remote_addr, remote_id;
|
|
uint64_t datagram_id;
|
|
mca_btl_base_endpoint_t *ep;
|
|
gni_ep_handle_t handle;
|
|
gni_post_state_t post_state;
|
|
int rc, count;
|
|
|
|
count = 0;
|
|
|
|
post_state = GNI_POST_PENDING;
|
|
rc = GNI_PostDataProbeById (btl->device->dev_handle, &datagram_id);
|
|
if (OPAL_LIKELY(GNI_RC_SUCCESS != rc)) {
|
|
return 0;
|
|
}
|
|
|
|
if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) ==
|
|
MCA_BTL_UGNI_CONNECT_WILDCARD_ID) {
|
|
handle = btl->wildcard_ep;
|
|
} else {
|
|
handle =
|
|
btl->endpoints[(uint32_t)(datagram_id & 0xffffffffull)]->common->ep_handle;
|
|
}
|
|
|
|
/* wait for the incoming datagram to complete (in case it isn't) */
|
|
rc = GNI_EpPostDataWaitById (handle, datagram_id, -1, &post_state,
|
|
&remote_addr, &remote_id);
|
|
if (GNI_RC_SUCCESS != rc) {
|
|
BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", rc));
|
|
return ompi_common_rc_ugni_to_ompi (rc);
|
|
}
|
|
|
|
BTL_VERBOSE(("got a datagram completion: id = %" PRIx64 ", state = %d, "
|
|
"peer = %d", datagram_id, post_state, remote_id));
|
|
|
|
ep = btl->endpoints[remote_id];
|
|
|
|
OPAL_THREAD_LOCK(&ep->common->lock);
|
|
|
|
/* NTH: TODO -- error handling */
|
|
(void) mca_btl_ugni_ep_connect_progress (ep);
|
|
|
|
if (ep->smsgs_waiting && OMPI_COMMON_UGNI_CONNECTED == MCA_BTL_UGNI_EP_STATE(ep)) {
|
|
/* process messages waiting in the endpoint's smsg mailbox */
|
|
while ((rc = mca_btl_ugni_smsg_process (ep) > 0)) count += rc;
|
|
ep->smsgs_waiting = false;
|
|
}
|
|
|
|
OPAL_THREAD_UNLOCK(&ep->common->lock);
|
|
|
|
if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) ==
|
|
MCA_BTL_UGNI_CONNECT_WILDCARD_ID) {
|
|
mca_btl_ugni_wildcard_ep_post (btl);
|
|
}
|
|
|
|
return count;
|
|
}
|
|
|
|
static inline int
|
|
mca_btl_ugni_handle_smsg_overrun (mca_btl_ugni_module_t *btl)
|
|
{
|
|
gni_cq_entry_t event_data;
|
|
unsigned int ep_index;
|
|
int count, rc;
|
|
|
|
BTL_VERBOSE(("btl/ugni_component detect SMSG CQ overrun. "
|
|
"processing message backlog..."));
|
|
|
|
/* we don't know which endpoint lost an smsg completion. clear the
|
|
smsg cq and check all mailboxes */
|
|
|
|
/* clear out remote cq */
|
|
do {
|
|
rc = GNI_CqGetEvent (btl->smsg_remote_cq, &event_data);
|
|
} while (GNI_RC_SUCCESS == rc);
|
|
|
|
count = 0;
|
|
|
|
for (ep_index = 0 ; ep_index < btl->endpoint_count ; ++ep_index) {
|
|
mca_btl_base_endpoint_t *ep = btl->endpoints[ep_index];
|
|
|
|
if (NULL == ep || OMPI_COMMON_UGNI_CONNECTED != MCA_BTL_UGNI_EP_STATE(ep)) {
|
|
continue;
|
|
}
|
|
|
|
do {
|
|
/* clear out smsg mailbox */
|
|
rc = mca_btl_ugni_smsg_process (ep);
|
|
if (rc > 0)
|
|
count += rc;
|
|
} while (rc > 0);
|
|
}
|
|
|
|
return count;
|
|
}
|
|
|
|
static inline int
|
|
mca_btl_ugni_progress_smsg (mca_btl_ugni_module_t *btl)
|
|
{
|
|
mca_btl_base_endpoint_t *ep;
|
|
gni_cq_entry_t event_data;
|
|
int rc;
|
|
|
|
rc = GNI_CqGetEvent (btl->smsg_remote_cq, &event_data);
|
|
if (GNI_RC_NOT_DONE == rc) {
|
|
return 0;
|
|
}
|
|
|
|
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc || !GNI_CQ_STATUS_OK(event_data) ||
|
|
GNI_CQ_OVERRUN(event_data))) {
|
|
if (GNI_RC_ERROR_RESOURCE == rc ||
|
|
(GNI_RC_SUCCESS == rc && GNI_CQ_OVERRUN(event_data))) {
|
|
/* recover from smsg cq overrun */
|
|
return mca_btl_ugni_handle_smsg_overrun (btl);
|
|
}
|
|
|
|
BTL_ERROR(("unhandled error in GNI_CqGetEvent"));
|
|
|
|
/* unhandled error: crash */
|
|
assert (0);
|
|
return OMPI_ERROR;
|
|
}
|
|
|
|
BTL_VERBOSE(("REMOTE CQ: Got event 0x%" PRIx64 ". msg id = %" PRIu64
|
|
". ok = %d, type = %" PRIu64 "\n", (uint64_t) event_data,
|
|
GNI_CQ_GET_MSG_ID(event_data), GNI_CQ_STATUS_OK(event_data),
|
|
GNI_CQ_GET_TYPE(event_data)));
|
|
|
|
/* we could check the message type here but it seems to always be a POST */
|
|
|
|
ep = btl->endpoints[GNI_CQ_GET_MSG_ID(event_data)];
|
|
if (OPAL_UNLIKELY(OMPI_COMMON_UGNI_CONNECTED != MCA_BTL_UGNI_EP_STATE(ep))) {
|
|
/* due to the nature of datagrams we may get a smsg completion before
|
|
we get mailbox info from the peer */
|
|
BTL_VERBOSE(("event occurred on an unconnected endpoint! ep state = %d", MCA_BTL_UGNI_EP_STATE(ep)));
|
|
|
|
/* flag the endpoint as having messages waiting */
|
|
ep->smsgs_waiting = true;
|
|
return 0;
|
|
}
|
|
|
|
return mca_btl_ugni_smsg_process (ep);
|
|
}
|
|
|
|
static inline int
|
|
mca_btl_ugni_progress_bte (mca_btl_ugni_module_t *btl)
|
|
{
|
|
return ompi_common_ugni_process_completed_post (btl->device, btl->bte_local_cq);
|
|
}
|
|
|
|
static int
|
|
mca_btl_ugni_retry_failed (mca_btl_ugni_module_t *btl)
|
|
{
|
|
int count = opal_list_get_size (&btl->failed_frags);
|
|
opal_list_item_t *item;
|
|
|
|
while (count-- && NULL != (item = opal_list_remove_first (&btl->failed_frags))) {
|
|
mca_btl_ugni_post_frag_complete ((void *) item, OMPI_SUCCESS);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
mca_btl_ugni_component_progress (void)
|
|
{
|
|
mca_btl_ugni_module_t *btl;
|
|
unsigned int i;
|
|
int count;
|
|
|
|
count = ompi_common_ugni_progress ();
|
|
|
|
for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {
|
|
btl = mca_btl_ugni_component.modules + i;
|
|
|
|
mca_btl_ugni_retry_failed (btl);
|
|
|
|
count += mca_btl_ugni_progress_datagram (btl);
|
|
count += mca_btl_ugni_progress_smsg (btl);
|
|
count += mca_btl_ugni_progress_bte (btl);
|
|
}
|
|
|
|
return count;
|
|
}
|