bd5cd62df9
Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
580 строки
23 KiB
C
580 строки
23 KiB
C
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
/*
|
|
* Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights
|
|
* reserved.
|
|
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
|
* Copyright (c) 2014 Research Organization for Information Science
|
|
* and Technology (RIST). All rights reserved.
|
|
* Copyright (c) 2017 Intel, Inc. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
/**
|
|
* @file This file contains wrappers for uGNI functionality. These wrappers are thread-safe
|
|
* and intended to provide a way to measure various different ways to handle mutual exclusion
|
|
* into the uGNI library (which is not thread safe). These functions are all defined to be
|
|
* inline to limit the cost to non-threaded users.
|
|
*/
|
|
|
|
#if !defined(BTL_UGNI_DEVICE_H)
|
|
#define BTL_UGNI_DEVICE_H
|
|
|
|
#include "btl_ugni_endpoint.h"
|
|
#include "btl_ugni_frag.h"
|
|
|
|
/* helper functions */
|
|
/**
|
|
* @brief Output an error message on CQ or completion error.
|
|
*
|
|
* @param[in] grc GNI error from GNI_CqGetEvent or GNI_GetCompleted
|
|
* @param[in] event_data event data from GNI_CqGetEvent
|
|
*
|
|
* This is a small function to print out an error if an error
|
|
* was detected on a CQ event.
|
|
*/
|
|
int mca_btl_ugni_event_fatal_error (gni_return_t grc, gni_cq_entry_t event_data);
|
|
|
|
/**
|
|
* @brief Attempt to re-post an rdma descriptor
|
|
*
|
|
* @param[in] rdma_desc RDMA descriptor that failed
|
|
* @param[in] event_data CQ event data
|
|
*
|
|
* @returns OPAL_SUCCESS if the descriptor was re-posted
|
|
* @returns OPAL_ERROR otherwise
|
|
*
|
|
* This function checks if the error is recoverable and re-posts the
|
|
* descriptor if possible. The device lock MUST be held when this
|
|
* function is called.
|
|
*/
|
|
int mca_btl_ugni_device_handle_event_error (struct mca_btl_ugni_rdma_desc_t *rdma_desc, gni_cq_entry_t event_data);
|
|
|
|
typedef struct mca_btl_ugni_smsg_send_wtag_arg_t {
|
|
gni_ep_handle_t ep_handle;
|
|
void *hdr;
|
|
size_t hdr_len;
|
|
void *payload;
|
|
size_t payload_len;
|
|
uint32_t msg_id;
|
|
int tag;
|
|
} mca_btl_ugni_smsg_send_wtag_arg_t;
|
|
|
|
static inline int mca_btl_ugni_smsg_send_wtag_device (mca_btl_ugni_device_t *device, void *arg)
|
|
{
|
|
mca_btl_ugni_smsg_send_wtag_arg_t *args = (mca_btl_ugni_smsg_send_wtag_arg_t *) arg;
|
|
gni_return_t grc;
|
|
|
|
grc = GNI_SmsgSendWTag (args->ep_handle, args->hdr, args->hdr_len, args->payload,
|
|
args->payload_len, args->msg_id, args->tag);
|
|
device->dev_smsg_local_cq.active_operations += (GNI_RC_SUCCESS == grc);
|
|
return grc;
|
|
}
|
|
|
|
typedef struct mca_btl_ugni_smsg_get_next_wtag_arg_t {
|
|
gni_ep_handle_t ep_handle;
|
|
uintptr_t *data_ptr;
|
|
uint8_t *tag;
|
|
} mca_btl_ugni_smsg_get_next_wtag_arg_t;
|
|
|
|
static inline intptr_t mca_btl_ugni_smsg_get_next_wtag_device (mca_btl_ugni_device_t *device, void *arg)
|
|
{
|
|
mca_btl_ugni_smsg_get_next_wtag_arg_t *args = (mca_btl_ugni_smsg_get_next_wtag_arg_t *) arg;
|
|
return GNI_SmsgGetNextWTag(args->ep_handle, (void **) args->data_ptr, args->tag);
|
|
}
|
|
|
|
static inline intptr_t mca_btl_ugni_smsg_release_device (mca_btl_ugni_device_t *device, void *arg)
|
|
{
|
|
mca_btl_ugni_endpoint_handle_t *ep_handle = (mca_btl_ugni_endpoint_handle_t *) arg;
|
|
|
|
return GNI_SmsgRelease (ep_handle->gni_handle);
|
|
}
|
|
|
|
typedef struct mca_btl_ugni_cq_get_event_args_t {
|
|
mca_btl_ugni_cq_t *cq;
|
|
gni_cq_entry_t *event_data;
|
|
} mca_btl_ugni_cq_get_event_args_t;
|
|
|
|
static inline intptr_t mca_btl_ugni_cq_get_event_device (mca_btl_ugni_device_t *device, void *arg)
|
|
{
|
|
mca_btl_ugni_cq_get_event_args_t *args = (mca_btl_ugni_cq_get_event_args_t *) arg;
|
|
gni_return_t rc;
|
|
|
|
rc = GNI_CqGetEvent (args->cq->gni_handle, args->event_data);
|
|
args->cq->active_operations -= (GNI_RC_NOT_DONE != rc);
|
|
return rc;
|
|
}
|
|
|
|
static inline intptr_t mca_btl_ugni_cq_clear_device (mca_btl_ugni_device_t *device, void *arg)
|
|
{
|
|
gni_cq_handle_t cq = (gni_cq_handle_t) (intptr_t) arg;
|
|
gni_cq_entry_t event_data;
|
|
int rc;
|
|
|
|
do {
|
|
rc = GNI_CqGetEvent (cq, &event_data);
|
|
} while (GNI_RC_NOT_DONE != rc);
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
typedef struct mca_btl_ugni_gni_cq_get_event_args_t {
|
|
gni_cq_handle_t cq;
|
|
gni_cq_entry_t *event_data;
|
|
} mca_btl_ugni_gni_cq_get_event_args_t;
|
|
|
|
static inline intptr_t mca_btl_ugni_gni_cq_get_event_device (mca_btl_ugni_device_t *device, void *arg)
|
|
{
|
|
mca_btl_ugni_gni_cq_get_event_args_t *args = (mca_btl_ugni_gni_cq_get_event_args_t *) arg;
|
|
|
|
return GNI_CqGetEvent (args->cq, args->event_data);
|
|
}
|
|
|
|
typedef struct mca_btl_ugni_cq_get_completed_desc_arg_t {
|
|
mca_btl_ugni_cq_t *cq;
|
|
mca_btl_ugni_post_descriptor_t *post_desc;
|
|
int count;
|
|
} mca_btl_ugni_cq_get_completed_desc_arg_t;
|
|
|
|
__opal_attribute_always_inline__
|
|
static inline int _mca_btl_ugni_repost_rdma_desc_device (mca_btl_ugni_device_t *device, mca_btl_ugni_rdma_desc_t *rdma_desc)
|
|
{
|
|
mca_btl_ugni_post_descriptor_t *post_desc = &rdma_desc->btl_ugni_desc;
|
|
int rc;
|
|
|
|
if (post_desc->use_bte) {
|
|
rc = GNI_PostRdma (rdma_desc->gni_handle, &post_desc->gni_desc);
|
|
} else {
|
|
rc = GNI_PostFma (rdma_desc->gni_handle, &post_desc->gni_desc);
|
|
}
|
|
|
|
return mca_btl_rc_ugni_to_opal (rc);
|
|
}
|
|
|
|
static inline intptr_t _mca_btl_ugni_cq_get_completed_desc_device (mca_btl_ugni_device_t *device, mca_btl_ugni_cq_t *cq,
|
|
mca_btl_ugni_post_descriptor_t *post_desc,
|
|
const int count, bool block)
|
|
{
|
|
mca_btl_ugni_rdma_desc_t *rdma_desc;
|
|
gni_post_descriptor_t *desc;
|
|
gni_cq_entry_t event_data;
|
|
int rc, desc_index = 0;
|
|
|
|
for (desc_index = 0 ; desc_index < count && cq->active_operations ; ) {
|
|
int desc_rc = OPAL_SUCCESS;
|
|
|
|
rc = GNI_CqGetEvent (cq->gni_handle, &event_data);
|
|
if (GNI_RC_NOT_DONE == rc) {
|
|
if (block) {
|
|
/* try again */
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
|
|
block = false;
|
|
|
|
rc = GNI_GetCompleted (cq->gni_handle, event_data, &desc);
|
|
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc && GNI_RC_TRANSACTION_ERROR != rc)) {
|
|
return mca_btl_ugni_event_fatal_error (rc, event_data);
|
|
}
|
|
|
|
rdma_desc = MCA_BTL_UGNI_GNI_DESC_TO_RDMA_DESC(desc);
|
|
|
|
if (OPAL_UNLIKELY(!GNI_CQ_STATUS_OK(event_data))) {
|
|
desc_rc = mca_btl_ugni_device_handle_event_error (rdma_desc, event_data);
|
|
if (OPAL_LIKELY(OPAL_SUCCESS == desc_rc)) {
|
|
/* descriptor was re-posted */
|
|
continue;
|
|
}
|
|
}
|
|
|
|
/* copy back the descriptor only if additional processing is needed. in this case more processing
|
|
* is needed if a user callback is specified or the bte was in use. */
|
|
if (rdma_desc->btl_ugni_desc.cbfunc || rdma_desc->btl_ugni_desc.use_bte || OPAL_SUCCESS != desc_rc) {
|
|
post_desc[desc_index] = rdma_desc->btl_ugni_desc;
|
|
post_desc[desc_index++].rc = desc_rc;
|
|
}
|
|
|
|
/* return the descriptor while we have the lock. this is done so we can avoid using the
|
|
* free list atomics (as both push and pop are done with the lock) */
|
|
mca_btl_ugni_return_rdma_desc (rdma_desc);
|
|
--cq->active_operations;
|
|
}
|
|
|
|
return desc_index;
|
|
}
|
|
|
|
static inline intptr_t mca_btl_ugni_cq_get_completed_desc_device (mca_btl_ugni_device_t *device, void *arg0)
|
|
{
|
|
mca_btl_ugni_cq_get_completed_desc_arg_t *args = (mca_btl_ugni_cq_get_completed_desc_arg_t *) arg0;
|
|
|
|
return _mca_btl_ugni_cq_get_completed_desc_device (device, args->cq, args->post_desc, args->count, false);
|
|
}
|
|
|
|
/* NTH: When posting FMA or RDMA descriptors it makes sense to try and clear out a completion
|
|
* event after posting the descriptor. This probably gives us a couple of things:
|
|
* 1) Good locality on the associated data structures (especially with FMA which may
|
|
* complete fairly quickly).
|
|
* 2) Since we are already holding the lock it could mean fewer attempts to
|
|
* lock the device over the course of the program.
|
|
*
|
|
* As far as I can tell there is not reason to try and clear out more than a couple
|
|
* completiong events. The code has been written to allow us to easily modify the
|
|
* number reaped if we determine that there is a benefit to clearing a different
|
|
* number of events. */
|
|
|
|
/**
|
|
* @brief Number of events to clear after posting a descriptor
|
|
*/
|
|
#define MCA_BTL_UGNI_DEVICE_REAP_COUNT 4
|
|
|
|
struct mca_btl_ugni_post_device_args_t {
|
|
mca_btl_ugni_post_descriptor_t *desc;
|
|
mca_btl_ugni_device_t *device;
|
|
int count;
|
|
mca_btl_ugni_post_descriptor_t completed[MCA_BTL_UGNI_DEVICE_REAP_COUNT];
|
|
};
|
|
|
|
static inline mca_btl_ugni_rdma_desc_t *
|
|
mca_btl_ugni_get_rdma_desc_device (mca_btl_ugni_device_t *device, struct mca_btl_ugni_post_device_args_t *args, bool use_bte)
|
|
{
|
|
mca_btl_ugni_post_descriptor_t *desc = args->desc;
|
|
mca_btl_ugni_rdma_desc_t *rdma_desc;
|
|
|
|
args->device = device;
|
|
args->count = 0;
|
|
|
|
do {
|
|
rdma_desc = mca_btl_ugni_alloc_rdma_desc (device, desc, use_bte);
|
|
if (OPAL_LIKELY(NULL != rdma_desc)) {
|
|
return rdma_desc;
|
|
}
|
|
|
|
if (OPAL_LIKELY(NULL == rdma_desc && !args->count)) {
|
|
args->count = _mca_btl_ugni_cq_get_completed_desc_device (device, &device->dev_rdma_local_cq,
|
|
args->completed, MCA_BTL_UGNI_DEVICE_REAP_COUNT,
|
|
true);
|
|
continue;
|
|
}
|
|
|
|
return NULL;
|
|
} while (1);
|
|
}
|
|
|
|
|
|
static inline intptr_t mca_btl_ugni_post_fma_device (mca_btl_ugni_device_t *device, void *arg)
|
|
{
|
|
struct mca_btl_ugni_post_device_args_t *args = (struct mca_btl_ugni_post_device_args_t *) arg;
|
|
mca_btl_ugni_rdma_desc_t *rdma_desc;
|
|
int rc;
|
|
|
|
rdma_desc = mca_btl_ugni_get_rdma_desc_device (device, args, false);
|
|
if (OPAL_UNLIKELY(NULL == rdma_desc)) {
|
|
return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
BTL_VERBOSE(("Posting FMA descriptor %p with op_type %d, amo %d, remote_addr 0x%lx, "
|
|
"length %lu", (void*)rdma_desc, rdma_desc->btl_ugni_desc.gni_desc.type, rdma_desc->btl_ugni_desc.gni_desc.amo_cmd,
|
|
rdma_desc->btl_ugni_desc.gni_desc.remote_addr, rdma_desc->btl_ugni_desc.gni_desc.length));
|
|
|
|
rc = GNI_PostFma (rdma_desc->gni_handle, &rdma_desc->btl_ugni_desc.gni_desc);
|
|
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
|
mca_btl_ugni_return_rdma_desc (rdma_desc);
|
|
return mca_btl_rc_ugni_to_opal (rc);
|
|
}
|
|
|
|
++device->dev_rdma_local_cq.active_operations;
|
|
|
|
/* to improve bandwidth and latency it is ideal for all posting threads to also reap completions from
|
|
* the rdma completion queue. there are two optimizations here. 1) for bandwidth we only want to
|
|
* reap what is available now so more messages can be posted quickly, and 2) for latency (single
|
|
* put/get before flushing) we want to ensure the operation is complete. To some degree this is
|
|
* gaming the benchmark but it may benefit some application communication patterns without really
|
|
* hurting others (in theory). */
|
|
if (opal_using_threads ()) {
|
|
int count = args->count;
|
|
args->count += _mca_btl_ugni_cq_get_completed_desc_device (device, &device->dev_rdma_local_cq,
|
|
args->completed + count,
|
|
MCA_BTL_UGNI_DEVICE_REAP_COUNT - count,
|
|
device->flushed);
|
|
device->flushed = false;
|
|
}
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static inline intptr_t mca_btl_ugni_post_rdma_device (mca_btl_ugni_device_t *device, void *arg)
|
|
{
|
|
struct mca_btl_ugni_post_device_args_t *args = (struct mca_btl_ugni_post_device_args_t *) arg;
|
|
mca_btl_ugni_rdma_desc_t *rdma_desc;
|
|
int rc;
|
|
|
|
rdma_desc = mca_btl_ugni_get_rdma_desc_device (device, args, true);
|
|
if (OPAL_UNLIKELY(NULL == rdma_desc)) {
|
|
return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
/* pick the appropriate CQ */
|
|
rdma_desc->btl_ugni_desc.cq = mca_btl_ugni_component.progress_thread_enabled ? &device->dev_rdma_local_irq_cq :
|
|
&device->dev_rdma_local_cq;
|
|
|
|
BTL_VERBOSE(("Posting RDMA descriptor %p with op_type %d, amo %d, remote_addr 0x%lx, "
|
|
"length %lu", (void*)rdma_desc, rdma_desc->btl_ugni_desc.gni_desc.type, rdma_desc->btl_ugni_desc.gni_desc.amo_cmd,
|
|
rdma_desc->btl_ugni_desc.gni_desc.remote_addr, rdma_desc->btl_ugni_desc.gni_desc.length));
|
|
|
|
rc = GNI_PostRdma (rdma_desc->gni_handle, &rdma_desc->btl_ugni_desc.gni_desc);
|
|
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
|
mca_btl_ugni_return_rdma_desc (rdma_desc);
|
|
return mca_btl_rc_ugni_to_opal (rc);
|
|
}
|
|
|
|
++rdma_desc->btl_ugni_desc.cq->active_operations;
|
|
|
|
/* to improve bandwidth and latency it is ideal for all posting threads to also reap completions from
|
|
* the rdma completion queue. there are two optimizations here. 1) for bandwidth we only want to
|
|
* reap what is available now so more messages can be posted quickly, and 2) for latency (single
|
|
* put/get before flushing) we want to ensure the operation is complete. To some degree this is
|
|
* gaming the benchmark but it may benefit some application communication patterns without really
|
|
* hurting others (in theory). */
|
|
if (opal_using_threads ()) {
|
|
int count = args->count;
|
|
args->count += _mca_btl_ugni_cq_get_completed_desc_device (device, &device->dev_rdma_local_cq,
|
|
args->completed + count,
|
|
MCA_BTL_UGNI_DEVICE_REAP_COUNT - count,
|
|
device->flushed);
|
|
device->flushed = false;
|
|
}
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static inline intptr_t mca_btl_ugni_post_cqwrite_device (mca_btl_ugni_device_t *device, void *arg)
|
|
{
|
|
mca_btl_ugni_post_descriptor_t *desc = (mca_btl_ugni_post_descriptor_t *) arg;
|
|
mca_btl_ugni_rdma_desc_t *rdma_desc;
|
|
int rc;
|
|
|
|
desc->gni_desc.src_cq_hndl = device->dev_rdma_local_cq.gni_handle;
|
|
|
|
rdma_desc = mca_btl_ugni_alloc_rdma_desc (device, desc, false);
|
|
if (OPAL_UNLIKELY(NULL == rdma_desc)) {
|
|
return OPAL_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
rc = GNI_PostCqWrite (rdma_desc->gni_handle, &rdma_desc->btl_ugni_desc.gni_desc);
|
|
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
|
mca_btl_ugni_return_rdma_desc (rdma_desc);
|
|
}
|
|
|
|
return mca_btl_rc_ugni_to_opal (rc);
|
|
}
|
|
|
|
typedef struct mca_btl_ugni_get_datagram_args_t {
|
|
mca_btl_ugni_module_t *ugni_module;
|
|
gni_ep_handle_t *handle;
|
|
mca_btl_base_endpoint_t **ep;
|
|
} mca_btl_ugni_get_datagram_args_t;
|
|
|
|
static inline intptr_t mca_btl_ugni_get_datagram_device (mca_btl_ugni_device_t *device, void *arg0)
|
|
{
|
|
mca_btl_ugni_get_datagram_args_t *args = (mca_btl_ugni_get_datagram_args_t *) arg0;
|
|
uint32_t remote_addr, remote_id;
|
|
uint64_t datagram_id;
|
|
gni_post_state_t post_state;
|
|
gni_return_t grc;
|
|
uint64_t data;
|
|
|
|
grc = GNI_PostDataProbeById (device->dev_handle, &datagram_id);
|
|
if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) {
|
|
return 0;
|
|
}
|
|
|
|
data = datagram_id & ~(MCA_BTL_UGNI_DATAGRAM_MASK);
|
|
|
|
BTL_VERBOSE(("rc: %d, datgram_id: %" PRIx64 ", mask: %" PRIx64, grc, datagram_id, (uint64_t) (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK)));
|
|
|
|
if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID) {
|
|
*(args->ep) = (mca_btl_base_endpoint_t *) opal_pointer_array_get_item (&args->ugni_module->endpoints, data);
|
|
*(args->handle) = (*args->ep)->smsg_ep_handle.gni_handle;
|
|
} else {
|
|
*(args->handle) = args->ugni_module->wildcard_ep;
|
|
}
|
|
|
|
/* wait for the incoming datagram to complete (in case it isn't) */
|
|
grc = GNI_EpPostDataWaitById (*args->handle, datagram_id, -1, &post_state,
|
|
&remote_addr, &remote_id);
|
|
if (GNI_RC_SUCCESS != grc) {
|
|
BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc));
|
|
return mca_btl_rc_ugni_to_opal (grc);
|
|
}
|
|
|
|
BTL_VERBOSE(("handled datagram completion. post_state: %d, remote_addr: %u, remote_id: %u, directed?: %d",
|
|
post_state, remote_addr, remote_id, (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID));
|
|
|
|
return 1;
|
|
}
|
|
|
|
typedef struct mca_btl_ugni_reg_mem_args_t {
|
|
mca_btl_ugni_module_t *ugni_module;
|
|
void *base;
|
|
size_t size;
|
|
mca_btl_ugni_reg_t *ugni_reg;
|
|
gni_cq_handle_t cq;
|
|
int flags;
|
|
} mca_btl_ugni_reg_mem_args_t;
|
|
|
|
static intptr_t mca_btl_ugni_reg_mem_device (mca_btl_ugni_device_t *device, void *arg)
|
|
{
|
|
mca_btl_ugni_reg_mem_args_t *args = (mca_btl_ugni_reg_mem_args_t *) arg;
|
|
gni_return_t rc;
|
|
|
|
rc = GNI_MemRegister (device->dev_handle, (uint64_t) args->base, args->size, args->cq,
|
|
args->flags, -1, &args->ugni_reg->handle.gni_handle);
|
|
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
|
return OPAL_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
typedef struct mca_btl_ugni_dereg_mem_arg_t {
|
|
mca_btl_ugni_module_t *ugni_module;
|
|
mca_btl_ugni_reg_t *ugni_reg;
|
|
} mca_btl_ugni_dereg_mem_arg_t;
|
|
|
|
static intptr_t mca_btl_ugni_dereg_mem_device (mca_btl_ugni_device_t *device, void *arg)
|
|
{
|
|
mca_btl_ugni_dereg_mem_arg_t *args = (mca_btl_ugni_dereg_mem_arg_t *) arg;
|
|
gni_return_t rc;
|
|
|
|
rc = GNI_MemDeregister (device->dev_handle, &args->ugni_reg->handle.gni_handle);
|
|
return mca_btl_rc_ugni_to_opal (rc);
|
|
}
|
|
|
|
/* multi-thread safe interface to uGNI */
|
|
|
|
static inline int mca_btl_ugni_endpoint_smsg_send_wtag (mca_btl_base_endpoint_t *endpoint, void *hdr, size_t hdr_len,
|
|
void *payload, size_t payload_len, uint32_t msg_id, int tag)
|
|
{
|
|
mca_btl_ugni_smsg_send_wtag_arg_t args = {.ep_handle = endpoint->smsg_ep_handle.gni_handle,
|
|
.hdr = hdr, .hdr_len = hdr_len, .payload = payload,
|
|
.payload_len = payload_len, .msg_id = msg_id,
|
|
.tag = tag};
|
|
mca_btl_ugni_device_t *device = endpoint->smsg_ep_handle.device;
|
|
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_smsg_send_wtag_device, &args);
|
|
}
|
|
|
|
static inline int mca_btl_ugni_smsg_get_next_wtag (mca_btl_ugni_endpoint_handle_t *ep_handle, uintptr_t *data_ptr, uint8_t *tag)
|
|
{
|
|
mca_btl_ugni_device_t *device = ep_handle->device;
|
|
mca_btl_ugni_smsg_get_next_wtag_arg_t args = {.ep_handle = ep_handle->gni_handle, .data_ptr = data_ptr, .tag = tag};
|
|
|
|
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_smsg_get_next_wtag_device, &args);
|
|
}
|
|
|
|
static inline int mca_btl_ugni_smsg_release (mca_btl_ugni_endpoint_handle_t *ep_handle)
|
|
{
|
|
mca_btl_ugni_device_t *device = ep_handle->device;
|
|
|
|
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_smsg_release_device, ep_handle);
|
|
}
|
|
|
|
static inline void mca_btl_ugni_cq_clear (mca_btl_ugni_device_t *device, gni_cq_handle_t cq)
|
|
{
|
|
(void) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_cq_clear_device, (void *) (intptr_t) cq);
|
|
}
|
|
|
|
static inline int mca_btl_ugni_cq_get_event (mca_btl_ugni_device_t *device, mca_btl_ugni_cq_t *cq, gni_cq_entry_t *event_data)
|
|
{
|
|
mca_btl_ugni_cq_get_event_args_t args = {.cq = cq, .event_data = event_data};
|
|
/* NTH: normally there would be a check for any outstanding CQ operations but there seems
|
|
* to be a reason to check the local SMSG completion queue anyway. since this function
|
|
* only handled the SMSG local completion queue not checking here should be fine and
|
|
* should not impact performance. */
|
|
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_cq_get_event_device, &args);
|
|
}
|
|
|
|
static inline int mca_btl_ugni_gni_cq_get_event (mca_btl_ugni_device_t *device, gni_cq_handle_t cq, gni_cq_entry_t *event_data)
|
|
{
|
|
mca_btl_ugni_gni_cq_get_event_args_t args = {.cq = cq, .event_data = event_data};
|
|
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_gni_cq_get_event_device, &args);
|
|
}
|
|
|
|
__opal_attribute_always_inline__
|
|
static inline int mca_btl_ugni_endpoint_post (mca_btl_ugni_endpoint_t *endpoint, mca_btl_ugni_post_descriptor_t *desc,
|
|
mca_btl_ugni_device_serialize_fn_t post_fn)
|
|
{
|
|
struct mca_btl_ugni_post_device_args_t args = {.desc = desc};
|
|
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint);
|
|
int rc;
|
|
|
|
/* use serialize_any as it is responsible for binding devices to threads (if enabled). this generally
|
|
* gives better performance as it reduces contention on any individual device. */
|
|
rc = mca_btl_ugni_device_serialize_any (ugni_module, post_fn, &args);
|
|
if (args.count) {
|
|
mca_btl_ugni_handle_rdma_completions (ugni_module, args.device, args.completed, args.count);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
__opal_attribute_always_inline__
|
|
static inline int mca_btl_ugni_endpoint_post_fma (mca_btl_ugni_endpoint_t *endpoint, mca_btl_ugni_post_descriptor_t *desc)
|
|
{
|
|
return mca_btl_ugni_endpoint_post (endpoint, desc, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_post_fma_device);
|
|
}
|
|
|
|
__opal_attribute_always_inline__
|
|
static inline int mca_btl_ugni_endpoint_post_rdma (mca_btl_ugni_endpoint_t *endpoint, mca_btl_ugni_post_descriptor_t *desc)
|
|
{
|
|
return mca_btl_ugni_endpoint_post (endpoint, desc, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_post_rdma_device);
|
|
}
|
|
|
|
static inline int mca_btl_ugni_endpoint_post_cqwrite (mca_btl_ugni_endpoint_t *endpoint, mca_btl_ugni_post_descriptor_t *desc)
|
|
{
|
|
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint);
|
|
mca_btl_ugni_device_t *device = ugni_module->devices;
|
|
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_post_cqwrite_device, desc);
|
|
}
|
|
|
|
__opal_attribute_always_inline__
|
|
static inline int mca_btl_ugni_cq_get_completed_desc (mca_btl_ugni_device_t *device, mca_btl_ugni_cq_t *cq,
|
|
mca_btl_ugni_post_descriptor_t *post_desc,
|
|
int count)
|
|
{
|
|
mca_btl_ugni_cq_get_completed_desc_arg_t args = {.cq = cq, .post_desc = post_desc, .count = count};
|
|
if (0 == cq->active_operations) {
|
|
return 0;
|
|
}
|
|
|
|
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_cq_get_completed_desc_device, &args);
|
|
}
|
|
|
|
static inline int mca_btl_ugni_get_datagram (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device, gni_ep_handle_t *gni_handle,
|
|
mca_btl_base_endpoint_t **ep)
|
|
{
|
|
mca_btl_ugni_get_datagram_args_t args = {.ugni_module = ugni_module, .ep = ep, .handle = gni_handle};
|
|
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_get_datagram_device, &args);
|
|
}
|
|
|
|
static inline int mca_btl_ugni_reg_mem (mca_btl_ugni_module_t *ugni_module, void *base, size_t size, mca_btl_ugni_reg_t *ugni_reg,
|
|
gni_cq_handle_t cq, int flags)
|
|
{
|
|
mca_btl_ugni_reg_mem_args_t args = {.ugni_module = ugni_module, .base = base, .size = size,
|
|
.ugni_reg = ugni_reg, .cq = cq, .flags = flags};
|
|
mca_btl_ugni_device_t *device = ugni_module->devices;
|
|
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_reg_mem_device, &args);
|
|
}
|
|
|
|
static inline int mca_btl_ugni_dereg_mem (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_reg_t *ugni_reg)
|
|
{
|
|
mca_btl_ugni_dereg_mem_arg_t args = {.ugni_module = ugni_module, .ugni_reg = ugni_reg};
|
|
mca_btl_ugni_device_t *device = ugni_module->devices;
|
|
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_dereg_mem_device, &args);
|
|
}
|
|
|
|
#endif /* BTL_UGNI_DEVICE_H */
|