1
1

btl/ugni: update for BTL 3.0 interface

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
Nathan Hjelm 2015-01-05 16:03:15 -07:00 коммит произвёл Nathan Hjelm
родитель 4972d97b8b
Коммит 655604f509
15 изменённых файлов: 508 добавлений и 514 удалений

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
@ -33,6 +33,7 @@
#include "opal/mca/btl/base/btl_base_error.h"
#include "opal/class/opal_hash_table.h"
#include "opal/class/ompi_free_list.h"
#include "opal/class/opal_free_list.h"
#include "opal/mca/common/ugni/common_ugni.h"
#include <errno.h>
@ -80,11 +81,16 @@ typedef struct mca_btl_ugni_module_t {
opal_mutex_t eager_get_pending_lock;
opal_list_t eager_get_pending;
opal_mutex_t pending_descriptors_lock;
opal_list_t pending_descriptors;
ompi_free_list_t post_descriptors;
mca_mpool_base_module_t *smsg_mpool;
ompi_free_list_t smsg_mboxes;
gni_ep_handle_t wildcard_ep;
gni_ep_handle_t local_ep;
struct mca_btl_base_endpoint_t *local_ep;
struct mca_btl_ugni_endpoint_attr_t wc_remote_attr, wc_local_attr;
@ -126,7 +132,7 @@ typedef struct mca_btl_ugni_module_t {
typedef struct mca_btl_ugni_component_t {
/* base BTL component */
mca_btl_base_component_2_0_0_t super;
mca_btl_base_component_3_0_0_t super;
/* maximum supported btls. hardcoded to 1 for now */
uint32_t ugni_max_btls;
@ -143,8 +149,6 @@ typedef struct mca_btl_ugni_component_t {
/* After this message size switch to BTE protocols */
size_t ugni_fma_limit;
/* Switch to put when trying to GET at or above this size */
size_t ugni_get_limit;
/* Switch to get when sending above this size */
size_t ugni_smsg_limit;
@ -267,33 +271,15 @@ mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
uint32_t flags, mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t **descriptor);
/**
* Initiate a get operation.
*
* location: btl_ugni_get.c
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/
int
mca_btl_ugni_get (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
struct mca_btl_base_descriptor_t *des);
int mca_btl_ugni_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
/**
* Initiate a put operation.
*
* location: btl_ugni_put.c
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/
int
mca_btl_ugni_put (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
struct mca_btl_base_descriptor_t *des);
int mca_btl_ugni_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
int mca_btl_ugni_progress_send_wait_list (struct mca_btl_base_endpoint_t *endpoint);
@ -302,9 +288,14 @@ mca_btl_ugni_alloc(struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
uint8_t order, size_t size, uint32_t flags);
struct mca_btl_base_registration_handle_t {
/** uGNI memory handle */
gni_mem_handle_t gni_handle;
};
typedef struct mca_btl_ugni_reg_t {
mca_mpool_base_registration_t base;
gni_mem_handle_t memory_hdl;
mca_btl_base_registration_handle_t handle;
} mca_btl_ugni_reg_t;
/* Global structures */

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
@ -61,10 +61,10 @@ int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl,
}
for (i = 0 ; i < nprocs ; ++i) {
struct opal_proc_t *ompi_proc = procs[i];
uint64_t proc_id = mca_btl_ugni_proc_name_to_id(ompi_proc->proc_name);
struct opal_proc_t *opal_proc = procs[i];
uint64_t proc_id = mca_btl_ugni_proc_name_to_id(opal_proc->proc_name);
if (OPAL_PROC_ON_LOCAL_NODE(ompi_proc->proc_flags)) {
if (OPAL_PROC_ON_LOCAL_NODE(opal_proc->proc_flags)) {
ugni_module->nlocal_procs++;
/* Do not use uGNI to communicate with local procs unless we are adding more ranks.
@ -75,12 +75,17 @@ int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl,
}
/* Create and Init endpoints */
rc = mca_btl_ugni_init_ep (ugni_module, peers + i, (mca_btl_ugni_module_t *) btl, ompi_proc);
rc = mca_btl_ugni_init_ep (ugni_module, peers + i, (mca_btl_ugni_module_t *) btl, opal_proc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("btl/ugni error initializing endpoint"));
return rc;
}
/* go ahead and connect the local endpoint for RDMA/CQ write */
if (opal_proc == opal_proc_local_get ()) {
ugni_module->local_ep = peers[i];
}
/* Add this endpoint to the pointer array. */
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) peers[i]));
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, peers[i]);
@ -138,26 +143,6 @@ int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl,
BTL_ERROR(("error creating remote SMSG CQ"));
return opal_common_rc_ugni_to_opal (rc);
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_EpCreate (ugni_module->device->dev_handle, ugni_module->rdma_local_cq,
&ugni_module->local_ep);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("error creating local ugni endpoint"));
return opal_common_rc_ugni_to_opal (rc);
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_EpBind (ugni_module->local_ep,
ugni_module->device->dev_addr,
getpid());
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("error binding local ugni endpoint"));
return opal_common_rc_ugni_to_opal (rc);
}
}
rc = mca_btl_ugni_setup_mpools (ugni_module);
@ -222,8 +207,8 @@ int mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl,
}
for (i = 0 ; i < nprocs ; ++i) {
struct opal_proc_t *ompi_proc = procs[i];
uint64_t proc_id = mca_btl_ugni_proc_name_to_id(ompi_proc->proc_name);
struct opal_proc_t *opal_proc = procs[i];
uint64_t proc_id = mca_btl_ugni_proc_name_to_id(opal_proc->proc_name);
mca_btl_base_endpoint_t *ep = NULL;
/* lookup this proc in the hash table */
@ -257,7 +242,7 @@ static int ugni_reg_rdma_mem (void *reg_data, void *base, size_t size,
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_MemRegister (ugni_module->device->dev_handle, (uint64_t) base,
size, NULL, GNI_MEM_READWRITE | GNI_MEM_RELAXED_PI_ORDERING,
-1, &(ugni_reg->memory_hdl));
-1, &(ugni_reg->handle.gni_handle));
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
@ -280,7 +265,7 @@ static int ugni_reg_smsg_mem (void *reg_data, void *base, size_t size,
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_MemRegister (ugni_module->device->dev_handle, (uint64_t) base,
size, ugni_module->smsg_remote_cq, GNI_MEM_READWRITE, -1,
&(ugni_reg->memory_hdl));
&(ugni_reg->handle.gni_handle));
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
return opal_common_rc_ugni_to_opal (rc);
}
@ -293,7 +278,7 @@ ugni_dereg_mem (void *reg_data, mca_mpool_base_registration_t *reg)
gni_return_t rc;
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_MemDeregister (ugni_module->device->dev_handle, &ugni_reg->memory_hdl);
rc = GNI_MemDeregister (ugni_module->device->dev_handle, &ugni_reg->handle.gni_handle);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
return OPAL_ERROR;
@ -470,6 +455,15 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
return rc;
}
rc = ompi_free_list_init_new (&ugni_module->post_descriptors,
sizeof (mca_btl_ugni_post_descriptor_t),
8, OBJ_CLASS(mca_btl_ugni_post_descriptor_t),
0, 0, 0, -1, 256, NULL);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("error creating post descriptor free list"));
return rc;
}
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -52,6 +52,7 @@ static int
btl_ugni_component_register(void)
{
mca_base_var_enum_t *new_enum;
gni_nic_device_t device_type;
int rc;
(void) mca_base_var_group_component_register(&mca_btl_ugni_component.super.btl_version,
@ -139,15 +140,6 @@ btl_ugni_component_register(void)
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.ugni_fma_limit);
mca_btl_ugni_component.ugni_get_limit = 1 * 1024 * 1024;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"get_limit", "Maximum size message that "
"will be sent using a get protocol "
"(default 1M)", MCA_BASE_VAR_TYPE_INT,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.ugni_get_limit);
mca_btl_ugni_component.rdma_max_retries = 16;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"rdma_max_retries", NULL, MCA_BASE_VAR_TYPE_INT,
@ -222,13 +214,24 @@ btl_ugni_component_register(void)
mca_btl_ugni_module.super.btl_max_send_size = 8 * 1024;
mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = 8 * 1024;
mca_btl_ugni_module.super.btl_get_limit = 1 * 1024 * 1024;
/* determine if there are get alignment restrictions */
GNI_GetDeviceType (&device_type);
if (GNI_DEVICE_GEMINI == device_type) {
mca_btl_ugni_module.super.btl_get_alignment = 4;
} else {
mca_btl_ugni_module.super.btl_get_alignment = 0;
}
/* threshold for put */
mca_btl_ugni_module.super.btl_min_rdma_pipeline_size = 8 * 1024;
mca_btl_ugni_module.super.btl_flags = MCA_BTL_FLAGS_SEND |
MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE;
mca_btl_ugni_module.super.btl_seg_size = sizeof (mca_btl_ugni_segment_t);
mca_btl_ugni_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);
mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */
mca_btl_ugni_module.super.btl_latency = 2; /* Microsecs */
@ -439,89 +442,110 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
return count;
}
static inline int
mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module, int which_cq)
#if OPAL_ENABLE_DEBUG
static inline void btl_ugni_dump_post_desc (mca_btl_ugni_post_descriptor_t *desc)
{
opal_common_ugni_post_desc_t *desc;
mca_btl_ugni_base_frag_t *frag;
fprintf (stderr, "desc->desc.base.post_id = %" PRIx64 "\n", desc->desc.base.post_id);
fprintf (stderr, "desc->desc.base.status = %" PRIx64 "\n", desc->desc.base.status);
fprintf (stderr, "desc->desc.base.cq_mode_complete = %hu\n", desc->desc.base.cq_mode_complete);
fprintf (stderr, "desc->desc.base.type = %d\n", desc->desc.base.type);
fprintf (stderr, "desc->desc.base.cq_mode = %hu\n", desc->desc.base.cq_mode);
fprintf (stderr, "desc->desc.base.dlvr_mode = %hu\n", desc->desc.base.dlvr_mode);
fprintf (stderr, "desc->desc.base.local_addr = %" PRIx64 "\n", desc->desc.base.local_addr);
fprintf (stderr, "desc->desc.base.local_mem_hndl = {%" PRIx64 ", %" PRIx64 "}\n", desc->desc.base.local_mem_hndl.qword1,
desc->desc.base.local_mem_hndl.qword2);
fprintf (stderr, "desc->desc.base.remote_addr = %" PRIx64 "\n", desc->desc.base.remote_addr);
fprintf (stderr, "desc->desc.base.remote_mem_hndl = {%" PRIx64 ", %" PRIx64 "}\n", desc->desc.base.remote_mem_hndl.qword1,
desc->desc.base.remote_mem_hndl.qword2);
fprintf (stderr, "desc->desc.base.length = %" PRIu64 "\n", desc->desc.base.length);
fprintf (stderr, "desc->desc.base.rdma_mode = %hu\n", desc->desc.base.rdma_mode);
fprintf (stderr, "desc->desc.base.amo_cmd = %d\n", desc->desc.base.amo_cmd);
}
#endif
static inline int mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module, int which_cq)
{
mca_btl_ugni_post_descriptor_t *post_desc = NULL;
gni_cq_entry_t event_data = 0;
gni_post_descriptor_t *desc;
uint32_t recoverable = 1;
gni_return_t rc;
gni_return_t grc;
gni_cq_handle_t the_cq;
the_cq = (which_cq == 0) ? ugni_module->rdma_local_cq : ugni_module->rdma_local_irq_cq;
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_CqGetEvent (the_cq, &event_data);
if (GNI_RC_NOT_DONE == rc) {
grc = GNI_CqGetEvent (the_cq, &event_data);
if (GNI_RC_NOT_DONE == grc) {
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
return 0;
}
if (OPAL_UNLIKELY((GNI_RC_SUCCESS != rc && !event_data) || GNI_CQ_OVERRUN(event_data))) {
if (OPAL_UNLIKELY((GNI_RC_SUCCESS != grc && !event_data) || GNI_CQ_OVERRUN(event_data))) {
/* TODO -- need to handle overrun -- how do we do this without an event?
will the event eventually come back? Ask Cray */
BTL_ERROR(("unhandled post error! ugni rc = %d %s", rc,gni_err_str[rc]));
BTL_ERROR(("unhandled post error! ugni rc = %d %s", grc, gni_err_str[grc]));
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
return opal_common_rc_ugni_to_opal (rc);
return opal_common_rc_ugni_to_opal (grc);
}
rc = GNI_GetCompleted (the_cq, event_data, (gni_post_descriptor_t **) &desc);
grc = GNI_GetCompleted (the_cq, event_data, &desc);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc && GNI_RC_TRANSACTION_ERROR != rc)) {
BTL_ERROR(("Error in GNI_GetComplete %s", gni_err_str[rc]));
return opal_common_rc_ugni_to_opal (rc);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc && GNI_RC_TRANSACTION_ERROR != grc)) {
BTL_ERROR(("Error in GNI_GetComplete %s", gni_err_str[grc]));
return opal_common_rc_ugni_to_opal (grc);
}
frag = MCA_BTL_UGNI_DESC_TO_FRAG(desc);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc || !GNI_CQ_STATUS_OK(event_data))) {
char buffer[1024];
post_desc = MCA_BTL_UGNI_DESC_TO_PDESC(desc);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc || !GNI_CQ_STATUS_OK(event_data))) {
(void) GNI_CqErrorRecoverable (event_data, &recoverable);
GNI_CqErrorStr(event_data,buffer,sizeof(buffer));
if (OPAL_UNLIKELY(++frag->post_desc.tries >= mca_btl_ugni_component.rdma_max_retries ||
if (OPAL_UNLIKELY(++post_desc->desc.tries >= mca_btl_ugni_component.rdma_max_retries ||
!recoverable)) {
char char_buffer[1024];
GNI_CqErrorStr (event_data, char_buffer, 1024);
/* give up */
BTL_ERROR(("giving up on frag %p type %d CQE error %s", (void *) frag, frag->post_desc.base.type, buffer));
mca_btl_ugni_frag_complete (frag, OPAL_ERROR);
BTL_ERROR(("giving up on desciptor %p, recoverable %d: %s", (void *) post_desc,
recoverable, char_buffer));
#if OPAL_ENABLE_DEBUG
btl_ugni_dump_post_desc (post_desc);
#endif
mca_btl_ugni_post_desc_complete (ugni_module, post_desc, OPAL_ERROR);
return OPAL_ERROR;
}
/* repost transaction */
mca_btl_ugni_repost (frag);
mca_btl_ugni_repost (ugni_module, post_desc);
return 0;
}
BTL_VERBOSE(("RDMA/FMA complete for frag %p", (void *) frag));
mca_btl_ugni_frag_complete (frag, opal_common_rc_ugni_to_opal (rc));
mca_btl_ugni_post_desc_complete (ugni_module, post_desc, opal_common_rc_ugni_to_opal (grc));
return 1;
}
static inline int
mca_btl_ugni_retry_failed (mca_btl_ugni_module_t *ugni_module)
mca_btl_ugni_post_pending (mca_btl_ugni_module_t *ugni_module)
{
int count = opal_list_get_size (&ugni_module->failed_frags);
int count = opal_list_get_size (&ugni_module->pending_descriptors);
int i;
for (i = 0 ; i < count ; ++i) {
OPAL_THREAD_LOCK(&ugni_module->failed_frags_lock);
mca_btl_ugni_base_frag_t *frag =
(mca_btl_ugni_base_frag_t *) opal_list_remove_first (&ugni_module->failed_frags);
OPAL_THREAD_UNLOCK(&ugni_module->failed_frags_lock);
if (NULL == frag) {
OPAL_THREAD_LOCK(&ugni_module->pending_descriptors_lock);
mca_btl_ugni_post_descriptor_t *post_desc =
(mca_btl_ugni_post_descriptor_t *) opal_list_remove_first (&ugni_module->pending_descriptors);
OPAL_THREAD_UNLOCK(&ugni_module->pending_descriptors_lock);
if (OPAL_SUCCESS != mca_btl_ugni_repost (ugni_module, post_desc)) {
break;
}
mca_btl_ugni_repost (frag);
}
return count;
return i;
}
static inline int
@ -571,7 +595,6 @@ static int mca_btl_ugni_component_progress (void)
for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {
ugni_module = mca_btl_ugni_component.modules + i;
mca_btl_ugni_retry_failed (ugni_module);
mca_btl_ugni_progress_wait_list (ugni_module);
count += mca_btl_ugni_progress_datagram (ugni_module);
@ -581,6 +604,9 @@ static int mca_btl_ugni_component_progress (void)
if (mca_btl_ugni_component.progress_thread_enabled) {
count += mca_btl_ugni_progress_rdma (ugni_module, 1);
}
/* post pending after progressing rdma */
mca_btl_ugni_post_pending (ugni_module);
}
return count;

Просмотреть файл

@ -198,7 +198,7 @@ int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep) {
return OPAL_SUCCESS;
}
if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) {
if (MCA_BTL_UGNI_EP_STATE_RDMA >= ep->state) {
rc = mca_btl_ugni_ep_connect_start (ep);
if (OPAL_SUCCESS != rc) {
return rc;

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -16,7 +16,7 @@
static inline void mca_btl_ugni_base_frag_constructor (mca_btl_ugni_base_frag_t *frag)
{
memset ((char *) frag + sizeof (frag->base), 0, sizeof (*frag) - sizeof (frag->base));
frag->segments[0].base.seg_addr.pval = frag->base.super.ptr;
frag->segments[0].seg_addr.pval = frag->base.super.ptr;
}
static inline void mca_btl_ugni_eager_frag_constructor (mca_btl_ugni_base_frag_t *frag)
@ -26,7 +26,7 @@ static inline void mca_btl_ugni_eager_frag_constructor (mca_btl_ugni_base_frag_t
mca_btl_ugni_base_frag_constructor (frag);
frag->segments[0].memory_handle = reg->memory_hdl;
frag->memory_handle = reg->handle;
}
OBJ_CLASS_INSTANCE(mca_btl_ugni_smsg_frag_t, mca_btl_base_descriptor_t,
@ -38,6 +38,9 @@ OBJ_CLASS_INSTANCE(mca_btl_ugni_rdma_frag_t, mca_btl_base_descriptor_t,
OBJ_CLASS_INSTANCE(mca_btl_ugni_eager_frag_t, mca_btl_base_descriptor_t,
mca_btl_ugni_eager_frag_constructor, NULL);
OBJ_CLASS_INSTANCE(mca_btl_ugni_post_descriptor_t, ompi_free_list_item_t,
NULL, NULL);
void mca_btl_ugni_frag_init (mca_btl_ugni_base_frag_t *frag, mca_btl_ugni_module_t *ugni_module)
{
frag->msg_id = opal_pointer_array_add (&ugni_module->pending_smsg_frags_bb, (void *) frag);

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2013 The University of Tennessee and The University
@ -19,13 +19,6 @@
#include "btl_ugni.h"
#include "btl_ugni_endpoint.h"
typedef struct mca_btl_ugni_segment_t {
mca_btl_base_segment_t base;
gni_mem_handle_t memory_handle;
uint8_t extra_bytes[3];
uint8_t extra_byte_count;
} mca_btl_ugni_segment_t;
typedef struct mca_btl_ugni_send_frag_hdr_t {
uint32_t lag;
} mca_btl_ugni_send_frag_hdr_t;
@ -41,7 +34,9 @@ typedef struct mca_btl_ugni_rdma_frag_hdr_t {
typedef struct mca_btl_ugni_eager_frag_hdr_t {
mca_btl_ugni_send_frag_hdr_t send;
mca_btl_ugni_segment_t src_seg;
uint32_t size;
uint64_t address;
mca_btl_base_registration_handle_t memory_handle;
void *ctx;
} mca_btl_ugni_eager_frag_hdr_t;
@ -59,29 +54,28 @@ typedef union mca_btl_ugni_frag_hdr_t {
} mca_btl_ugni_frag_hdr_t;
enum {
MCA_BTL_UGNI_FRAG_BUFFERED = 1, /* frag data is buffered */
MCA_BTL_UGNI_FRAG_COMPLETE = 2, /* smsg complete for frag */
MCA_BTL_UGNI_FRAG_EAGER = 4, /* eager get frag */
MCA_BTL_UGNI_FRAG_IGNORE = 8, /* ignore local smsg completion */
MCA_BTL_UGNI_FRAG_SMSG_COMPLETE = 16 /* SMSG has completed for this message */
MCA_BTL_UGNI_FRAG_BUFFERED = 1, /* frag data is buffered */
MCA_BTL_UGNI_FRAG_COMPLETE = 2, /* smsg complete for frag */
MCA_BTL_UGNI_FRAG_EAGER = 4, /* eager get frag */
MCA_BTL_UGNI_FRAG_IGNORE = 8, /* ignore local smsg completion */
MCA_BTL_UGNI_FRAG_SMSG_COMPLETE = 16, /* SMSG has completed for this message */
MCA_BTL_UGNI_FRAG_RESPONSE = 32,
};
struct mca_btl_ugni_base_frag_t;
typedef void (*frag_cb_t) (struct mca_btl_ugni_base_frag_t *, int);
typedef struct mca_btl_ugni_base_frag_t {
mca_btl_base_descriptor_t base;
uint32_t msg_id;
uint16_t hdr_size;
uint16_t flags;
mca_btl_ugni_frag_hdr_t hdr;
mca_btl_ugni_segment_t segments[2];
mca_btl_base_segment_t segments[2];
opal_common_ugni_post_desc_t post_desc;
mca_btl_base_endpoint_t *endpoint;
mca_btl_ugni_reg_t *registration;
ompi_free_list_t *my_list;
frag_cb_t cbfunc;
mca_btl_base_registration_handle_t memory_handle;
} mca_btl_ugni_base_frag_t;
typedef struct mca_btl_ugni_base_frag_t mca_btl_ugni_smsg_frag_t;
@ -91,6 +85,58 @@ typedef struct mca_btl_ugni_base_frag_t mca_btl_ugni_eager_frag_t;
#define MCA_BTL_UGNI_DESC_TO_FRAG(desc) \
((mca_btl_ugni_base_frag_t *)((uintptr_t) (desc) - offsetof (mca_btl_ugni_base_frag_t, post_desc)))
typedef struct mca_btl_ugni_post_descriptor_t {
ompi_free_list_item_t super;
opal_common_ugni_post_desc_t desc;
mca_btl_base_endpoint_t *endpoint;
mca_btl_base_registration_handle_t *local_handle;
mca_btl_base_rdma_completion_fn_t cbfunc;
void *cbdata;
void *ctx;
} mca_btl_ugni_post_descriptor_t;
OBJ_CLASS_DECLARATION(mca_btl_ugni_post_descriptor_t);
#define MCA_BTL_UGNI_DESC_TO_PDESC(desc) \
((mca_btl_ugni_post_descriptor_t *)((uintptr_t) (desc) - offsetof (mca_btl_ugni_post_descriptor_t, desc)))
static inline void mca_btl_ugni_alloc_post_descriptor (mca_btl_base_endpoint_t *endpoint, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata,
mca_btl_ugni_post_descriptor_t **desc)
{
ompi_free_list_item_t *item = NULL;
OMPI_FREE_LIST_GET_MT(&endpoint->btl->post_descriptors, item);
*desc = (mca_btl_ugni_post_descriptor_t *) item;
if (NULL != item) {
(*desc)->cbfunc = cbfunc;
(*desc)->ctx = cbcontext;
(*desc)->cbdata = cbdata;
(*desc)->local_handle = local_handle;
(*desc)->endpoint = endpoint;
}
}
static inline void mca_btl_ugni_return_post_descriptor (mca_btl_ugni_module_t *module,
mca_btl_ugni_post_descriptor_t *desc)
{
OMPI_FREE_LIST_RETURN_MT(&module->post_descriptors, &desc->super);
}
static inline void mca_btl_ugni_post_desc_complete (mca_btl_ugni_module_t *module, mca_btl_ugni_post_descriptor_t *desc, int rc)
{
BTL_VERBOSE(("RDMA/FMA/ATOMIC operation complete for post descriptor %p. rc = %d", (void *) desc, rc));
if (NULL != desc->cbfunc) {
/* call the user's callback function */
desc->cbfunc (&module->super, desc->endpoint, (void *)(intptr_t) desc->desc.base.local_addr,
desc->local_handle, desc->ctx, desc->cbdata, rc);
}
/* the descriptor is no longer needed */
mca_btl_ugni_return_post_descriptor (module, desc);
}
OBJ_CLASS_DECLARATION(mca_btl_ugni_smsg_frag_t);
OBJ_CLASS_DECLARATION(mca_btl_ugni_rdma_frag_t);
OBJ_CLASS_DECLARATION(mca_btl_ugni_eager_frag_t);

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -13,44 +13,31 @@
#include "btl_ugni_rdma.h"
#include "btl_ugni_smsg.h"
/**
* Initiate a get operation.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/
int mca_btl_ugni_get (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
struct mca_btl_base_descriptor_t *des) {
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) des;
mca_btl_ugni_segment_t *src_seg = (mca_btl_ugni_segment_t *) des->des_remote;
mca_btl_ugni_segment_t *dst_seg = (mca_btl_ugni_segment_t *) des->des_local;
size_t size = src_seg->base.seg_len - src_seg->extra_byte_count;
int mca_btl_ugni_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
bool check;
BTL_VERBOSE(("Using RDMA/FMA Get"));
/* Check if the get is aligned/sized on a multiple of 4 */
check = !!((remote_address | (uint64_t)(intptr_t) local_address | size) & (mca_btl_ugni_module.super.btl_get_alignment - 1));
if (OPAL_UNLIKELY(check || size > mca_btl_ugni_module.super.btl_get_limit)) {
BTL_VERBOSE(("RDMA/FMA Get not available due to size or alignment restrictions"));
/* notify the caller that get is not available */
return OPAL_ERR_NOT_AVAILABLE;
}
BTL_VERBOSE(("Using RDMA/FMA Get from local address %p to remote address %" PRIx64,
local_address, remote_address));
/* cause endpoint to bind if it isn't already (bind is sufficient for rdma) */
(void) mca_btl_ugni_check_endpoint_state(endpoint);
/* Check if the get is aligned/sized on a multiple of 4 */
check = !!((des->des_remote->seg_addr.lval | des->des_local->seg_addr.lval | size) & 3);
if (OPAL_UNLIKELY(check || size > mca_btl_ugni_component.ugni_get_limit)) {
/* switch to put */
return OPAL_ERR_NOT_AVAILABLE;
}
if (src_seg->extra_byte_count) {
memmove ((char *) dst_seg->base.seg_addr.pval + size, src_seg->extra_bytes, src_seg->extra_byte_count);
src_seg->base.seg_len = size;
dst_seg->base.seg_len = size;
}
des->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
return mca_btl_ugni_post (frag, true, dst_seg, src_seg);
return mca_btl_ugni_post (endpoint, true, size, local_address, remote_address, local_handle,
remote_handle, order, cbfunc, cbcontext, cbdata);
}
/* eager get */
@ -60,6 +47,8 @@ static void mca_btl_ugni_callback_eager_get_progress_pending (struct mca_btl_bas
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
mca_btl_ugni_base_frag_t *pending_frag, *frag = (mca_btl_ugni_base_frag_t *) desc;
memset (&frag->hdr, 0, sizeof (frag->hdr));
OPAL_THREAD_LOCK(&ugni_module->eager_get_pending_lock);
pending_frag = (mca_btl_ugni_base_frag_t *) opal_list_remove_first (&ugni_module->eager_get_pending);
OPAL_THREAD_UNLOCK(&ugni_module->eager_get_pending_lock);
@ -68,6 +57,8 @@ static void mca_btl_ugni_callback_eager_get_progress_pending (struct mca_btl_bas
/* copy the relevant data out of the pending fragment */
frag->endpoint = pending_frag->endpoint;
assert (frag != pending_frag);
/* start the next eager get using this fragment */
(void) mca_btl_ugni_start_eager_get (frag->endpoint, pending_frag->hdr.eager_ex, frag);
@ -80,39 +71,43 @@ static void mca_btl_ugni_callback_eager_get_progress_pending (struct mca_btl_bas
}
static void mca_btl_ugni_callback_eager_get (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
struct mca_btl_base_descriptor_t *desc, int rc)
void *local_address, mca_btl_base_registration_handle_t *local_handle,
void *context, void *cbdata, int status)
{
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) desc;
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) context;
uint32_t len = frag->hdr.eager.send.lag & 0x00ffffff;
uint8_t tag = frag->hdr.eager.send.lag >> 24;
size_t payload_len = frag->hdr.eager.src_seg.base.seg_len;
size_t payload_len = frag->hdr.eager.size;
size_t hdr_len = len - payload_len;
mca_btl_active_message_callback_t *reg;
mca_btl_base_segment_t segs[2];
mca_btl_ugni_base_frag_t tmp;
int rc;
BTL_VERBOSE(("eager get for rem_ctx %p complete", frag->hdr.eager.ctx));
BTL_VERBOSE(("eager get for rem_ctx %p complete", frag->hdr.eager.ctx))
tmp.base.des_local = segs;
tmp.base.des_segments = segs;
if (hdr_len) {
tmp.base.des_local_count = 2;
tmp.base.des_segment_count = 2;
segs[0].seg_addr.pval = frag->hdr.eager_ex.pml_header;
segs[0].seg_len = hdr_len;
segs[1].seg_addr.pval = frag->segments[0].base.seg_addr.pval;
segs[1].seg_addr.pval = local_address;
segs[1].seg_len = payload_len;
} else {
tmp.base.des_local_count = 1;
tmp.base.des_segment_count = 1;
segs[0].seg_addr.pval = frag->segments[0].base.seg_addr.pval;
segs[0].seg_addr.pval = local_address;
segs[0].seg_len = payload_len;
}
reg = mca_btl_base_active_message_trigger + tag;
reg->cbfunc(&frag->endpoint->btl->super, tag, &(tmp.base), reg->cbdata);
/* fill in the response header */
frag->hdr.rdma.ctx = frag->hdr.eager.ctx;
frag->flags = MCA_BTL_UGNI_FRAG_RESPONSE;
/* once complete use this fragment for a pending eager get if any exist */
frag->base.des_cbfunc = mca_btl_ugni_callback_eager_get_progress_pending;
@ -122,6 +117,7 @@ static void mca_btl_ugni_callback_eager_get (struct mca_btl_base_module_t *btl,
NULL, 0, MCA_BTL_UGNI_TAG_RDMA_COMPLETE);
if (OPAL_UNLIKELY(0 > rc)) {
/* queue fragment */
OPAL_THREAD_LOCK(&endpoint->lock);
if (false == endpoint->wait_listed) {
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
opal_list_append (&ugni_module->ep_wait_list, &endpoint->super);
@ -129,50 +125,50 @@ static void mca_btl_ugni_callback_eager_get (struct mca_btl_base_module_t *btl,
endpoint->wait_listed = true;
}
OPAL_THREAD_LOCK(&endpoint->lock);
opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) frag);
OPAL_THREAD_UNLOCK(&endpoint->lock);
}
}
int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *ep,
int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *endpoint,
mca_btl_ugni_eager_ex_frag_hdr_t hdr,
mca_btl_ugni_base_frag_t *frag)
{
mca_btl_ugni_module_t *ugni_module = ep->btl;
mca_btl_ugni_module_t *ugni_module = endpoint->btl;
size_t size;
int rc;
BTL_VERBOSE(("starting eager get for remote ctx: %p", hdr.eager.ctx));
do {
if (NULL == frag) {
rc = MCA_BTL_UGNI_FRAG_ALLOC_EAGER_RECV(ep, frag);
/* try to allocate a registered buffer */
rc = MCA_BTL_UGNI_FRAG_ALLOC_EAGER_RECV(endpoint, frag);
if (OPAL_UNLIKELY(NULL == frag)) {
(void) MCA_BTL_UGNI_FRAG_ALLOC_RDMA_INT(ep, frag);
/* no registered buffers available. try again later */
(void) MCA_BTL_UGNI_FRAG_ALLOC_RDMA_INT(endpoint, frag);
/* not much can be done if a small fragment can not be allocated. abort! */
assert (NULL != frag);
frag->hdr.eager_ex = hdr;
break;
}
}
frag->hdr.eager_ex = hdr;
frag->flags = 0;
frag->base.des_flags = 0;
frag->hdr.eager_ex = hdr;
frag->segments[1] = hdr.eager.src_seg;
/* increase size to a multiple of 4 bytes (required for get) */
frag->segments[0].base.seg_len = frag->segments[1].base.seg_len =
(hdr.eager.src_seg.base.seg_len + 3) & ~3;
frag->base.des_local = &frag->segments[1].base;
/* increase size to a multiple of 4 bytes (required for get on Gemini) */
size = (hdr.eager.size + 3) & ~3;
/* set up callback for get completion */
frag->base.des_flags = MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
frag->base.des_cbfunc = mca_btl_ugni_callback_eager_get;
rc = mca_btl_ugni_post (frag, GNI_POST_RDMA_GET, frag->segments, frag->segments + 1);
/* start the get */
rc = mca_btl_ugni_post (endpoint, true, size, frag->base.super.ptr, hdr.eager.address,
&frag->memory_handle, &hdr.eager.memory_handle,
MCA_BTL_NO_ORDER, mca_btl_ugni_callback_eager_get, frag, NULL);
if (OPAL_UNLIKELY(OPAL_SUCCESS == rc)) {
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
@ -27,35 +27,34 @@ mca_btl_ugni_free (struct mca_btl_base_module_t *btl,
static int
mca_btl_ugni_module_finalize (struct mca_btl_base_module_t* btl);
static mca_btl_base_descriptor_t *
mca_btl_ugni_prepare_dst (mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint,
mca_mpool_base_registration_t *registration,
opal_convertor_t *convertor, uint8_t order,
size_t reserve, size_t *size, uint32_t flags);
static struct mca_btl_base_descriptor_t *
mca_btl_ugni_prepare_src (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
mca_mpool_base_registration_t *registration,
struct opal_convertor_t *convertor,
uint8_t order, size_t reserve, size_t *size,
uint32_t flags);
static mca_btl_base_registration_handle_t *
mca_btl_ugni_register_mem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *base,
size_t size, uint32_t flags);
static int mca_btl_ugni_deregister_mem (mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle);
mca_btl_ugni_module_t mca_btl_ugni_module = {
.super = {
.btl_component = &mca_btl_ugni_component.super,
.btl_add_procs = mca_btl_ugni_add_procs,
.btl_del_procs = mca_btl_ugni_del_procs,
.btl_finalize = mca_btl_ugni_module_finalize,
.btl_alloc = mca_btl_ugni_alloc,
.btl_free = mca_btl_ugni_free,
.btl_prepare_src = mca_btl_ugni_prepare_src,
.btl_prepare_dst = mca_btl_ugni_prepare_dst,
.btl_send = mca_btl_ugni_send,
.btl_sendi = mca_btl_ugni_sendi,
.btl_put = mca_btl_ugni_put,
.btl_get = mca_btl_ugni_get,
.btl_component = &mca_btl_ugni_component.super,
.btl_add_procs = mca_btl_ugni_add_procs,
.btl_del_procs = mca_btl_ugni_del_procs,
.btl_finalize = mca_btl_ugni_module_finalize,
.btl_alloc = mca_btl_ugni_alloc,
.btl_free = mca_btl_ugni_free,
.btl_prepare_src = mca_btl_ugni_prepare_src,
.btl_send = mca_btl_ugni_send,
.btl_sendi = mca_btl_ugni_sendi,
.btl_put = mca_btl_ugni_put,
.btl_get = mca_btl_ugni_get,
.btl_register_mem = mca_btl_ugni_register_mem,
.btl_deregister_mem = mca_btl_ugni_deregister_mem,
}
};
@ -92,6 +91,9 @@ mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
OBJ_CONSTRUCT(&ugni_module->endpoints, opal_pointer_array_t);
OBJ_CONSTRUCT(&ugni_module->id_to_endpoint, opal_hash_table_t);
OBJ_CONSTRUCT(&ugni_module->smsg_mboxes, ompi_free_list_t);
OBJ_CONSTRUCT(&ugni_module->pending_descriptors, opal_list_t);
OBJ_CONSTRUCT(&ugni_module->eager_get_pending, opal_list_t);
OBJ_CONSTRUCT(&ugni_module->post_descriptors, ompi_free_list_t);
ugni_module->device = dev;
dev->btl_ctx = (void *) ugni_module;
@ -204,7 +206,6 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
OBJ_DESTRUCT(&ugni_module->pending_smsg_frags_bb);
OBJ_DESTRUCT(&ugni_module->id_to_endpoint);
OBJ_DESTRUCT(&ugni_module->endpoints);
OBJ_DESTRUCT(&ugni_module->failed_frags);
OBJ_DESTRUCT(&ugni_module->eager_get_pending);
OBJ_DESTRUCT(&ugni_module->eager_get_pending_lock);
@ -250,13 +251,13 @@ mca_btl_ugni_alloc(struct mca_btl_base_module_t *btl,
frag->base.des_flags = flags;
frag->base.order = order;
frag->base.des_local = &frag->segments[1].base;
frag->base.des_local_count = 1;
frag->base.des_segments = &frag->segments[1];
frag->base.des_segment_count = 1;
frag->segments[0].base.seg_addr.pval = NULL;
frag->segments[0].base.seg_len = 0;
frag->segments[1].base.seg_addr.pval = frag->base.super.ptr;
frag->segments[1].base.seg_len = size;
frag->segments[0].seg_addr.pval = NULL;
frag->segments[0].seg_len = 0;
frag->segments[1].seg_addr.pval = frag->base.super.ptr;
frag->segments[1].seg_len = size;
frag->flags = MCA_BTL_UGNI_FRAG_BUFFERED;
if (size > mca_btl_ugni_component.smsg_max_data) {
@ -267,7 +268,7 @@ mca_btl_ugni_alloc(struct mca_btl_base_module_t *btl,
registration = (mca_btl_ugni_reg_t *) frag->base.super.registration;
frag->segments[1].memory_handle = registration->memory_hdl;
frag->hdr.eager.memory_handle = registration->handle;
} else {
frag->hdr_size = sizeof (frag->hdr.send);
}
@ -285,59 +286,36 @@ mca_btl_ugni_free (struct mca_btl_base_module_t *btl,
static struct mca_btl_base_descriptor_t *
mca_btl_ugni_prepare_src (struct mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint,
mca_mpool_base_registration_t *registration,
struct opal_convertor_t *convertor,
uint8_t order, size_t reserve, size_t *size,
uint32_t flags)
{
if (OPAL_LIKELY(reserve)) {
return mca_btl_ugni_prepare_src_send (btl, endpoint, convertor,
order, reserve, size, flags);
} else {
return mca_btl_ugni_prepare_src_rdma (btl, endpoint, registration,
convertor, order, size, flags);
}
return mca_btl_ugni_prepare_src_send (btl, endpoint, convertor,
order, reserve, size, flags);
}
static mca_btl_base_descriptor_t *
mca_btl_ugni_prepare_dst (mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint,
mca_mpool_base_registration_t *registration,
opal_convertor_t *convertor, uint8_t order,
size_t reserve, size_t *size, uint32_t flags)
static mca_btl_base_registration_handle_t *
mca_btl_ugni_register_mem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *base,
size_t size, uint32_t flags)
{
mca_btl_ugni_base_frag_t *frag;
void *data_ptr;
mca_btl_ugni_reg_t *reg;
int rc;
opal_convertor_get_current_pointer (convertor, &data_ptr);
(void) MCA_BTL_UGNI_FRAG_ALLOC_RDMA(endpoint, frag);
if (OPAL_UNLIKELY(NULL == frag)) {
rc = btl->btl_mpool->mpool_register(btl->btl_mpool, base, size, 0,
(mca_mpool_base_registration_t **) &reg);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return NULL;
}
/* always need to register the buffer for put/get (even for fma) */
if (NULL == registration) {
rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
data_ptr, *size, 0,
&registration);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
mca_btl_ugni_frag_return (frag);
return NULL;
}
frag->registration = (mca_btl_ugni_reg_t*) registration;
}
frag->segments[0].memory_handle = ((mca_btl_ugni_reg_t *)registration)->memory_hdl;
frag->segments[0].base.seg_len = *size;
frag->segments[0].base.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
frag->base.des_local = &frag->segments->base;
frag->base.des_local_count = 1;
frag->base.order = order;
frag->base.des_flags = flags;
return (struct mca_btl_base_descriptor_t *) frag;
return &reg->handle;
}
static int mca_btl_ugni_deregister_mem (mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle)
{
mca_btl_ugni_reg_t *reg =
(mca_btl_ugni_reg_t *)((intptr_t) handle - offsetof (mca_btl_ugni_reg_t, handle));
(void) btl->btl_mpool->mpool_deregister (btl->btl_mpool, &reg->base);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -35,14 +35,14 @@ mca_btl_ugni_prepare_src_send_nodata (struct mca_btl_base_module_t *btl,
frag->hdr_size = reserve + sizeof (frag->hdr.send);
frag->segments[0].base.seg_addr.pval = frag->hdr.send_ex.pml_header;
frag->segments[0].base.seg_len = reserve;
frag->segments[0].seg_addr.pval = frag->hdr.send_ex.pml_header;
frag->segments[0].seg_len = reserve;
frag->segments[1].base.seg_addr.pval = NULL;
frag->segments[1].base.seg_len = 0;
frag->segments[1].seg_addr.pval = NULL;
frag->segments[1].seg_len = 0;
frag->base.des_local = &frag->segments->base;
frag->base.des_local_count = 1;
frag->base.des_segments = frag->segments;
frag->base.des_segment_count = 1;
frag->base.order = order;
frag->base.des_flags = flags;
@ -84,22 +84,22 @@ mca_btl_ugni_prepare_src_send_inplace (struct mca_btl_base_module_t *btl,
frag->flags = MCA_BTL_UGNI_FRAG_EAGER | MCA_BTL_UGNI_FRAG_IGNORE;
frag->registration = registration;
frag->segments[1].memory_handle = registration->memory_hdl;
frag->hdr.eager.memory_handle = registration->handle;;
frag->hdr_size = reserve + sizeof (frag->hdr.eager);
frag->segments[0].base.seg_addr.pval = frag->hdr.eager_ex.pml_header;
frag->segments[0].seg_addr.pval = frag->hdr.eager_ex.pml_header;
} else {
frag->hdr_size = reserve + sizeof (frag->hdr.send);
frag->segments[0].base.seg_addr.pval = frag->hdr.send_ex.pml_header;
frag->segments[0].seg_addr.pval = frag->hdr.send_ex.pml_header;
}
frag->segments[0].base.seg_len = reserve;
frag->segments[0].seg_len = reserve;
frag->segments[1].base.seg_addr.pval = data_ptr;
frag->segments[1].base.seg_len = *size;
frag->segments[1].seg_addr.pval = data_ptr;
frag->segments[1].seg_len = *size;
frag->base.des_local = &frag->segments->base;
frag->base.des_local_count = 2;
frag->base.des_segments = frag->segments;
frag->base.des_segment_count = 2;
frag->base.order = order;
frag->base.des_flags = flags;
@ -130,10 +130,9 @@ mca_btl_ugni_prepare_src_send_buffered (struct mca_btl_base_module_t *btl,
registration = (mca_btl_ugni_reg_t *) frag->base.super.registration;
frag->segments[1].memory_handle = registration->memory_hdl;
frag->hdr.eager.memory_handle = registration->handle;
frag->hdr_size = reserve + sizeof (frag->hdr.eager);
frag->segments[0].base.seg_addr.pval = frag->hdr.eager_ex.pml_header;
frag->segments[0].seg_addr.pval = frag->hdr.eager_ex.pml_header;
} else {
(void) MCA_BTL_UGNI_FRAG_ALLOC_SMSG(endpoint, frag);
if (OPAL_UNLIKELY(NULL == frag)) {
@ -141,7 +140,7 @@ mca_btl_ugni_prepare_src_send_buffered (struct mca_btl_base_module_t *btl,
}
frag->hdr_size = reserve + sizeof (frag->hdr.send);
frag->segments[0].base.seg_addr.pval = frag->hdr.send_ex.pml_header;
frag->segments[0].seg_addr.pval = frag->hdr.send_ex.pml_header;
}
frag->flags |= MCA_BTL_UGNI_FRAG_BUFFERED;
@ -155,13 +154,13 @@ mca_btl_ugni_prepare_src_send_buffered (struct mca_btl_base_module_t *btl,
return NULL;
}
frag->segments[0].base.seg_len = reserve;
frag->segments[0].seg_len = reserve;
frag->segments[1].base.seg_addr.pval = frag->base.super.ptr;
frag->segments[1].base.seg_len = *size;
frag->segments[1].seg_addr.pval = frag->base.super.ptr;
frag->segments[1].seg_len = *size;
frag->base.des_local = &frag->segments->base;
frag->base.des_local_count = 2;
frag->base.des_segments = frag->segments;
frag->base.des_segment_count = 2;
frag->base.order = order;
frag->base.des_flags = flags;
@ -197,66 +196,4 @@ mca_btl_ugni_prepare_src_send (struct mca_btl_base_module_t *btl,
}
}
static inline struct mca_btl_base_descriptor_t *
mca_btl_ugni_prepare_src_rdma (struct mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint,
mca_mpool_base_registration_t *registration,
struct opal_convertor_t *convertor,
uint8_t order, size_t *size,
uint32_t flags)
{
mca_btl_ugni_base_frag_t *frag;
void *data_ptr;
int rc;
opal_convertor_get_current_pointer (convertor, &data_ptr);
(void) MCA_BTL_UGNI_FRAG_ALLOC_RDMA(endpoint, frag);
if (OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
/*
* For medium message use FMA protocols and for large message
* use BTE protocols
*/
/* No need to register while using FMA Put (registration is
* non-null in get-- is this always true?) */
if (*size >= mca_btl_ugni_component.ugni_fma_limit || (flags & MCA_BTL_DES_FLAGS_GET)) {
if (NULL == registration) {
rc = btl->btl_mpool->mpool_register(btl->btl_mpool, data_ptr, *size, 0,
(mca_mpool_base_registration_t **) &registration);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
mca_btl_ugni_frag_return (frag);
return NULL;
}
frag->registration = (mca_btl_ugni_reg_t *) registration;
}
frag->segments[0].memory_handle = ((mca_btl_ugni_reg_t *)registration)->memory_hdl;
} else {
memset ((void *) &frag->segments[0].memory_handle, 0,
sizeof (frag->segments[0].memory_handle));
}
if ((flags & MCA_BTL_DES_FLAGS_GET) && (*size & 0x3)) {
memmove (frag->segments[0].extra_bytes, (char *) data_ptr + (*size & ~0x3),
*size & 0x3);
frag->segments[0].extra_byte_count = *size & 0x3;
} else {
frag->segments[0].extra_byte_count = 0;
}
frag->segments[0].base.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
frag->segments[0].base.seg_len = *size;
frag->base.des_local = &frag->segments->base;
frag->base.des_local_count = 1;
frag->base.order = order;
frag->base.des_flags = flags;
return &frag->base;
}
#endif

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -88,7 +88,7 @@ static void *mca_btl_ugni_prog_thread_fn(void * data)
}
fn_exit:
return ret;
return (void *) (intptr_t) ret;
}
int mca_btl_ugni_spawn_progress_thread(struct mca_btl_base_module_t *btl)
@ -125,8 +125,6 @@ int mca_btl_ugni_spawn_progress_thread(struct mca_btl_base_module_t *btl)
int mca_btl_ugni_kill_progress_thread(void)
{
int rc, ret=OPAL_SUCCESS;
gni_return_t status;
static mca_btl_ugni_base_frag_t cq_write_frag;
stop_progress_thread = 1;
@ -134,24 +132,14 @@ int mca_btl_ugni_kill_progress_thread(void)
* post a CQ to myself to wake my thread up
*/
cq_write_frag.post_desc.base.type = GNI_POST_CQWRITE;
cq_write_frag.post_desc.base.cqwrite_value = 0xdead; /* up to 48 bytes here, not used for now */
cq_write_frag.post_desc.base.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
cq_write_frag.post_desc.base.dlvr_mode = GNI_DLVMODE_IN_ORDER;
cq_write_frag.post_desc.base.src_cq_hndl = mca_btl_ugni_component.modules[0].rdma_local_cq;
cq_write_frag.post_desc.base.remote_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl;
cq_write_frag.post_desc.tries = 0;
cq_write_frag.cbfunc = NULL;
OPAL_THREAD_LOCK(&mca_btl_ugni_component.modules[0].device->dev_lock);
status = GNI_PostCqWrite(mca_btl_ugni_component.modules[0].local_ep,
&cq_write_frag.post_desc.base);
OPAL_THREAD_UNLOCK(&mca_btl_ugni_component.modules[0].device->dev_lock);
ret = mca_btl_ugni_post_cqwrite (mca_btl_ugni_component.modules[0].local_ep,
mca_btl_ugni_component.modules[0].rdma_local_cq,
mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl,
0xdead, NULL, NULL, NULL);
/*
* TODO: if error returned, need to kill off thread manually
*/
if (GNI_RC_SUCCESS != status) {
BTL_ERROR(("GNI_PostCqWrite returned error - %s",gni_err_str[status]));
ret = opal_common_rc_ugni_to_opal(status);
if (OPAL_SUCCESS != ret) {
goto fn_exit;
}
@ -178,19 +166,6 @@ int mca_btl_ugni_kill_progress_thread(void)
goto fn_exit;
}
/*
* destroy the local_ep
*/
OPAL_THREAD_LOCK(&mca_btl_ugni_component.modules[0].device->dev_lock);
status = GNI_EpDestroy (mca_btl_ugni_component.modules[0].local_ep);
OPAL_THREAD_UNLOCK(&mca_btl_ugni_component.modules[0].device->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != status)) {
BTL_ERROR(("GNI_EpDestroy returned error - %s", gni_err_str[status]));
ret = opal_common_rc_ugni_to_opal(status);
goto fn_exit;
}
fn_exit:
return ret;
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -14,25 +14,17 @@
#include "btl_ugni_rdma.h"
/**
* Initiate a put operation.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/
int mca_btl_ugni_put (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
struct mca_btl_base_descriptor_t *des) {
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) des;
BTL_VERBOSE(("Using RDMA/FMA Put for frag %p", (void *) des));
int mca_btl_ugni_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
BTL_VERBOSE(("Using RDMA/FMA Put from local address %p to remote address %" PRIx64,
local_address, remote_address));
/* cause endpoint to bind if it isn't already (bind is sufficient for rdma) */
(void) mca_btl_ugni_check_endpoint_state(endpoint);
des->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
return mca_btl_ugni_post (frag, false, (mca_btl_ugni_segment_t *) des->des_local,
(mca_btl_ugni_segment_t *) des->des_remote);
return mca_btl_ugni_post (endpoint, false, size, local_address, remote_address, local_handle,
remote_handle, order, cbfunc, cbcontext, cbdata);
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -20,107 +20,185 @@ int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *ep,
mca_btl_ugni_eager_ex_frag_hdr_t hdr,
mca_btl_ugni_base_frag_t *frag);
static inline void init_gni_post_desc (mca_btl_ugni_base_frag_t *frag,
gni_post_type_t op_type,
uint64_t lcl_addr,
gni_mem_handle_t lcl_mdh,
uint64_t rem_addr,
gni_mem_handle_t rem_mdh,
uint64_t bufsize,
gni_cq_handle_t cq_hndl) {
frag->post_desc.base.type = op_type;
frag->post_desc.base.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
frag->post_desc.base.dlvr_mode = GNI_DLVMODE_PERFORMANCE;
frag->post_desc.base.local_addr = (uint64_t) lcl_addr;
frag->post_desc.base.local_mem_hndl = lcl_mdh;
frag->post_desc.base.remote_addr = (uint64_t) rem_addr;
frag->post_desc.base.remote_mem_hndl = rem_mdh;
frag->post_desc.base.length = bufsize;
frag->post_desc.base.rdma_mode = 0;
frag->post_desc.base.rdma_mode = 0;
frag->post_desc.base.src_cq_hndl = cq_hndl;
frag->post_desc.tries = 0;
static inline void init_gni_post_desc (opal_common_ugni_post_desc_t *post_desc,
int order, gni_post_type_t op_type,
uint64_t lcl_addr,
gni_mem_handle_t lcl_mdh,
uint64_t rem_addr,
gni_mem_handle_t rem_mdh,
uint64_t bufsize,
gni_cq_handle_t cq_hndl) {
post_desc->base.type = op_type;
post_desc->base.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
if (MCA_BTL_NO_ORDER == order) {
post_desc->base.dlvr_mode = GNI_DLVMODE_PERFORMANCE;
} else {
post_desc->base.dlvr_mode = GNI_DLVMODE_NO_ADAPT;
}
post_desc->base.local_addr = (uint64_t) lcl_addr;
post_desc->base.local_mem_hndl = lcl_mdh;
post_desc->base.remote_addr = (uint64_t) rem_addr;
post_desc->base.remote_mem_hndl = rem_mdh;
post_desc->base.length = bufsize;
post_desc->base.rdma_mode = 0;
post_desc->base.src_cq_hndl = cq_hndl;
post_desc->tries = 0;
}
static inline int mca_btl_ugni_post_fma (mca_btl_ugni_base_frag_t *frag, gni_post_type_t op_type,
mca_btl_ugni_segment_t *lcl_seg, mca_btl_ugni_segment_t *rem_seg)
static inline int mca_btl_ugni_post_fma (struct mca_btl_base_endpoint_t *endpoint, gni_post_type_t op_type,
size_t size, void *local_address, uint64_t remote_address,
mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle,
int order, mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata)
{
gni_return_t rc;
mca_btl_ugni_post_descriptor_t *post_desc;
gni_return_t grc;
/* Post descriptor (CQ is ignored for FMA transactions) */
init_gni_post_desc (frag, op_type, lcl_seg->base.seg_addr.lval, lcl_seg->memory_handle,
rem_seg->base.seg_addr.lval, rem_seg->memory_handle, lcl_seg->base.seg_len, 0);
mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata, &post_desc);
if (OPAL_UNLIKELY(NULL == post_desc)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
rc = GNI_PostFma (frag->endpoint->rdma_ep_handle, &frag->post_desc.base);
OPAL_THREAD_UNLOCK(&frag->endpoint->common->dev->dev_lock);
if (GNI_RC_SUCCESS != rc) {
BTL_VERBOSE(("GNI_PostFma failed with gni rc: %d", rc));
/* Post descriptor (CQ is ignored for FMA transactions) -- The CQ associated with the endpoint
* is used. */
init_gni_post_desc (&post_desc->desc, order, op_type, (intptr_t) local_address, local_handle->gni_handle,
remote_address, remote_handle->gni_handle, size, 0);
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
grc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base);
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
if (GNI_RC_ALIGNMENT_ERROR == grc) {
BTL_VERBOSE(("GNI_PostFma failed with an alignment error"));
return OPAL_ERR_NOT_AVAILABLE;
}
BTL_VERBOSE(("GNI_PostFma failed with gni rc: %d", grc));
return OPAL_ERR_OUT_OF_RESOURCE;
}
return OPAL_SUCCESS;
}
static inline int mca_btl_ugni_post_bte (mca_btl_ugni_base_frag_t *frag, gni_post_type_t op_type,
mca_btl_ugni_segment_t *lcl_seg, mca_btl_ugni_segment_t *rem_seg)
static inline int mca_btl_ugni_post_bte (mca_btl_base_endpoint_t *endpoint, gni_post_type_t op_type,
size_t size, void *local_address, uint64_t remote_address,
mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle,
int order, mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata)
{
mca_btl_ugni_post_descriptor_t *post_desc;
gni_cq_handle_t cq_handle = endpoint->btl->rdma_local_cq;
gni_return_t status;
/* Post descriptor */
if (mca_btl_ugni_component.progress_thread_enabled) {
init_gni_post_desc (frag, op_type, lcl_seg->base.seg_addr.lval, lcl_seg->memory_handle,
rem_seg->base.seg_addr.lval, rem_seg->memory_handle, lcl_seg->base.seg_len,
frag->endpoint->btl->rdma_local_irq_cq);
} else {
init_gni_post_desc (frag, op_type, lcl_seg->base.seg_addr.lval, lcl_seg->memory_handle,
rem_seg->base.seg_addr.lval, rem_seg->memory_handle, lcl_seg->base.seg_len,
frag->endpoint->btl->rdma_local_cq);
mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata, &post_desc);
if (OPAL_UNLIKELY(NULL == post_desc)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
status = GNI_PostRdma (frag->endpoint->rdma_ep_handle, &frag->post_desc.base);
OPAL_THREAD_UNLOCK(&frag->endpoint->common->dev->dev_lock);
if (GNI_RC_SUCCESS != status) {
if (mca_btl_ugni_component.progress_thread_enabled) {
cq_handle = endpoint->btl->rdma_local_irq_cq;
}
/* Post descriptor */
init_gni_post_desc (&post_desc->desc, order, op_type, (intptr_t) local_address, local_handle->gni_handle,
remote_address, remote_handle->gni_handle, size, cq_handle);
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
status = GNI_PostRdma (endpoint->rdma_ep_handle, &post_desc->desc.base);
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != status)) {
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
if (GNI_RC_ALIGNMENT_ERROR == status) {
BTL_VERBOSE(("GNI_PostRdma failed with an alignment error"));
return OPAL_ERR_NOT_AVAILABLE;
}
BTL_VERBOSE(("GNI_PostRdma failed with gni rc: %d", status));
return opal_common_rc_ugni_to_opal(status);
return OPAL_ERR_OUT_OF_RESOURCE;
}
return OPAL_SUCCESS;
}
static inline int mca_btl_ugni_post (mca_btl_ugni_base_frag_t *frag, bool get, mca_btl_ugni_segment_t *lcl_seg,
mca_btl_ugni_segment_t *rem_seg) {
static inline int mca_btl_ugni_post_cqwrite (mca_btl_base_endpoint_t *endpoint, gni_cq_handle_t cq_handle,
gni_mem_handle_t irq_mhndl, uint64_t value,
mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata)
{
mca_btl_ugni_post_descriptor_t *post_desc;
gni_return_t grc;
mca_btl_ugni_alloc_post_descriptor (endpoint, NULL, cbfunc, cbcontext, cbdata, &post_desc);
if (OPAL_UNLIKELY(NULL == post_desc)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
post_desc->desc.base.type = GNI_POST_CQWRITE;
post_desc->desc.base.cqwrite_value = value; /* up to 48 bytes here, not used for now */
post_desc->desc.base.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
post_desc->desc.base.dlvr_mode = GNI_DLVMODE_IN_ORDER;
post_desc->desc.base.src_cq_hndl = cq_handle;
post_desc->desc.base.remote_mem_hndl = irq_mhndl;
post_desc->desc.tries = 0;
OPAL_THREAD_LOCK(&endpoint->common->dev->dev_lock);
grc = GNI_PostCqWrite(endpoint->rdma_ep_handle, &post_desc->desc.base);
OPAL_THREAD_UNLOCK(&endpoint->common->dev->dev_lock);
if (GNI_RC_SUCCESS != grc) { /* errors for PostCqWrite treated as non-fatal */
BTL_VERBOSE(("GNI_PostCqWrite returned error - %s", gni_err_str[grc]));
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
}
return opal_common_rc_ugni_to_opal (grc);
}
static inline int mca_btl_ugni_post (mca_btl_base_endpoint_t *endpoint, int get, size_t size,
void *local_address, uint64_t remote_address,
mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle,
int order, mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata)
{
const gni_post_type_t fma_ops[2] = {GNI_POST_FMA_PUT, GNI_POST_FMA_GET};
const gni_post_type_t rdma_ops[2] = {GNI_POST_RDMA_PUT, GNI_POST_RDMA_GET};
if (frag->base.des_local->seg_len <= mca_btl_ugni_component.ugni_fma_limit) {
return mca_btl_ugni_post_fma (frag, fma_ops[get], lcl_seg, rem_seg);
if (size <= mca_btl_ugni_component.ugni_fma_limit) {
return mca_btl_ugni_post_fma (endpoint, fma_ops[get], size, local_address, remote_address,
local_handle, remote_handle, order, cbfunc, cbcontext, cbdata);
}
return mca_btl_ugni_post_bte (frag, rdma_ops[get], lcl_seg, rem_seg);
return mca_btl_ugni_post_bte (endpoint, rdma_ops[get], size, local_address, remote_address,
local_handle, remote_handle, order, cbfunc, cbcontext, cbdata);
}
static inline void mca_btl_ugni_repost (mca_btl_ugni_base_frag_t *frag) {
static inline int mca_btl_ugni_repost (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_post_descriptor_t *post_desc)
{
gni_return_t grc;
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
if (GNI_POST_RDMA_PUT == frag->post_desc.base.type ||
GNI_POST_RDMA_GET == frag->post_desc.base.type) {
grc = GNI_PostRdma (frag->endpoint->rdma_ep_handle, &frag->post_desc.base);
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
if (GNI_POST_RDMA_PUT == post_desc->desc.base.type ||
GNI_POST_RDMA_GET == post_desc->desc.base.type) {
grc = GNI_PostRdma (post_desc->endpoint->rdma_ep_handle, &post_desc->desc.base);
} else {
grc = GNI_PostFma (frag->endpoint->rdma_ep_handle, &frag->post_desc.base);
grc = GNI_PostFma (post_desc->endpoint->rdma_ep_handle, &post_desc->desc.base);
}
OPAL_THREAD_UNLOCK(&frag->endpoint->common->dev->dev_lock);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
/* NTH: Should we even retry these? When this code was written there was no indication
* whether an error in post is recoverable. Clobber this code and the associated data
* structures if post errors are not recoverable. */
OPAL_THREAD_LOCK(&frag->endpoint->btl->failed_frags_lock);
opal_list_append (&frag->endpoint->btl->failed_frags, (opal_list_item_t *) frag);
OPAL_THREAD_UNLOCK(&frag->endpoint->btl->failed_frags_lock);
OPAL_THREAD_LOCK(&ugni_module->pending_descriptors_lock);
opal_list_append (&ugni_module->pending_descriptors, (opal_list_item_t *) post_desc);
OPAL_THREAD_UNLOCK(&ugni_module->pending_descriptors_lock);
}
return opal_common_rc_ugni_to_opal (grc);
}
#endif /* MCA_BTL_UGNI_RDMA_H */

Просмотреть файл

@ -23,7 +23,7 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
mca_btl_base_tag_t tag)
{
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) descriptor;
size_t size = frag->segments[0].base.seg_len + frag->segments[1].base.seg_len;
size_t size = frag->segments[0].seg_len + frag->segments[1].seg_len;
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
int flags_save = frag->base.des_flags;
int rc;
@ -41,7 +41,7 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
}
BTL_VERBOSE(("btl/ugni sending descriptor %p from %d -> %d. length = %" PRIu64, (void *)descriptor,
OPAL_PROC_MY_NAME.vpid, endpoint->common->ep_rem_id, frag->segments[0].base.seg_len));
OPAL_PROC_MY_NAME.vpid, endpoint->common->ep_rem_id, size));
/* temporarily disable ownership and callback flags so we can reliably check the complete flag */
frag->base.des_flags &= ~(MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
@ -90,14 +90,13 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
return rc;
}
int
mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
struct opal_convertor_t *convertor,
void *header, size_t header_size,
size_t payload_size, uint8_t order,
uint32_t flags, mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t **descriptor)
int mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
struct opal_convertor_t *convertor,
void *header, size_t header_size,
size_t payload_size, uint8_t order,
uint32_t flags, mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t **descriptor)
{
size_t total_size = header_size + payload_size;
mca_btl_ugni_base_frag_t *frag = NULL;
@ -118,13 +117,14 @@ mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
frag = (mca_btl_ugni_base_frag_t *) mca_btl_ugni_prepare_src_send_buffered (btl, endpoint, convertor, order,
header_size, &packed_size, flags);
}
assert (packed_size == payload_size);
if (OPAL_UNLIKELY(NULL == frag)) {
break;
}
frag->hdr.send.lag = (tag << 24) | total_size;
memcpy (frag->segments[0].base.seg_addr.pval, header, header_size);
memcpy (frag->segments[0].seg_addr.pval, header, header_size);
rc = mca_btl_ugni_send_frag (endpoint, frag);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
@ -153,7 +153,13 @@ int mca_btl_ugni_progress_send_wait_list (mca_btl_base_endpoint_t *endpoint)
if (NULL == frag) {
break;
}
rc = mca_btl_ugni_send_frag (endpoint, frag);
if (OPAL_LIKELY(!(frag->flags & MCA_BTL_UGNI_FRAG_RESPONSE))) {
rc = mca_btl_ugni_send_frag (endpoint, frag);
} else {
rc = opal_mca_btl_ugni_smsg_send (frag, &frag->hdr.rdma, sizeof (frag->hdr.rdma),
NULL, 0, MCA_BTL_UGNI_TAG_RDMA_COMPLETE);
}
if (OPAL_UNLIKELY(OPAL_SUCCESS > rc)) {
if (OPAL_LIKELY(OPAL_ERR_OUT_OF_RESOURCE == rc)) {
OPAL_THREAD_LOCK(&endpoint->lock);

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -26,7 +26,7 @@ static void mca_btl_ugni_smsg_mbox_construct (mca_btl_ugni_smsg_mbox_t *mbox) {
mbox->attr.smsg_attr.mbox_offset = (uintptr_t) mbox->super.ptr - (uintptr_t) base_reg->base;
mbox->attr.smsg_attr.msg_buffer = base_reg->base;
mbox->attr.smsg_attr.buff_size = mca_btl_ugni_component.smsg_mbox_size;
mbox->attr.smsg_attr.mem_hndl = ugni_reg->memory_hdl;
mbox->attr.smsg_attr.mem_hndl = ugni_reg->handle.gni_handle;
mbox->attr.proc_id = mca_btl_ugni_proc_name_to_id (OPAL_PROC_MY_NAME);
mbox->attr.rmt_irq_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl;
}
@ -106,8 +106,8 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
BTL_VERBOSE(("received smsg fragment. hdr = {len = %u, tag = %d}", len, tag));
reg = mca_btl_base_active_message_trigger + tag;
frag.base.des_local = &seg;
frag.base.des_local_count = 1;
frag.base.des_segments = &seg;
frag.base.des_segment_count = 1;
seg.seg_addr.pval = (void *)((uintptr_t)data_ptr + sizeof (mca_btl_ugni_send_frag_hdr_t));
seg.seg_len = len;

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -82,22 +82,12 @@ static inline int mca_btl_ugni_progress_local_smsg (mca_btl_ugni_module_t *ugni_
return 1;
}
static void mca_btl_ugni_cqwrite_complete (struct mca_btl_ugni_base_frag_t *frag, int rc)
{
frag->flags |= MCA_BTL_UGNI_FRAG_COMPLETE;
BTL_VERBOSE(("cqwrite frag complete"));
mca_btl_ugni_frag_return (frag);
}
static inline int opal_mca_btl_ugni_smsg_send (mca_btl_ugni_base_frag_t *frag,
void *hdr, size_t hdr_len,
void *payload, size_t payload_len,
mca_btl_ugni_smsg_tag_t tag)
{
int rc;
gni_return_t grc;
mca_btl_ugni_base_frag_t *cq_write_frag = NULL;
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
grc = GNI_SmsgSendWTag (frag->endpoint->smsg_ep_handle, hdr, hdr_len,
@ -110,28 +100,9 @@ static inline int opal_mca_btl_ugni_smsg_send (mca_btl_ugni_base_frag_t *frag,
if (mca_btl_ugni_component.progress_thread_enabled) {
if (frag->base.des_flags & MCA_BTL_DES_FLAGS_SIGNAL) {
rc = mca_btl_ugni_frag_alloc(frag->endpoint,
&frag->endpoint->btl->rdma_frags,
&cq_write_frag);
if (rc == OPAL_SUCCESS) {
cq_write_frag->base.des_flags = MCA_BTL_DES_FLAGS_BTL_OWNERSHIP;
cq_write_frag->registration = NULL;
cq_write_frag->endpoint = frag->endpoint;
cq_write_frag->post_desc.base.type = GNI_POST_CQWRITE;
cq_write_frag->post_desc.base.cqwrite_value = 0xdead; /* up to 48 bytes here, not used for now */
cq_write_frag->post_desc.base.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
cq_write_frag->post_desc.base.dlvr_mode = GNI_DLVMODE_IN_ORDER;
cq_write_frag->post_desc.base.src_cq_hndl = frag->endpoint->btl->rdma_local_cq;
cq_write_frag->post_desc.base.remote_mem_hndl = frag->endpoint->rmt_irq_mem_hndl;
cq_write_frag->post_desc.tries = 0;
cq_write_frag->cbfunc = mca_btl_ugni_cqwrite_complete;
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
grc = GNI_PostCqWrite(frag->endpoint->rdma_ep_handle, &cq_write_frag->post_desc.base);
OPAL_THREAD_UNLOCK(&frag->endpoint->common->dev->dev_lock);
if (grc == GNI_RC_ERROR_RESOURCE) { /* errors for PostCqWrite treated as non-fatal */
mca_btl_ugni_frag_return (cq_write_frag);
}
}
/* errors for PostCqWrite treated as non-fatal */
(void) mca_btl_ugni_post_cqwrite (frag->endpoint, frag->endpoint->btl->rdma_local_cq,
frag->endpoint->rmt_irq_mem_hndl, 0xdead, NULL, NULL, NULL);
}
}
@ -155,12 +126,13 @@ static inline int mca_btl_ugni_send_frag (struct mca_btl_base_endpoint_t *btl_pe
mca_btl_ugni_base_frag_t *frag) {
if (OPAL_LIKELY(!(frag->flags & MCA_BTL_UGNI_FRAG_EAGER))) {
return opal_mca_btl_ugni_smsg_send (frag, &frag->hdr.send, frag->hdr_size,
frag->segments[1].base.seg_addr.pval,
frag->segments[1].base.seg_len,
frag->segments[1].seg_addr.pval,
frag->segments[1].seg_len,
MCA_BTL_UGNI_TAG_SEND);
}
frag->hdr.eager.src_seg = frag->segments[1];
frag->hdr.eager.size = frag->segments[1].seg_len;
frag->hdr.eager.address = frag->segments[1].seg_addr.lval;
frag->hdr.eager.ctx = (void *) frag;
return opal_mca_btl_ugni_smsg_send (frag, &frag->hdr.eager, frag->hdr_size,