1
1

Update the scif and openib btls for the new btl interface

Other changes:
 - Remove the registration argument from prepare_src since it no
   longer is meant for RDMA buffers.

 - Additional cleanup and bugfixes.
Этот коммит содержится в:
Nathan Hjelm 2014-10-30 16:43:41 -06:00 коммит произвёл Nathan Hjelm
родитель c61e017177
Коммит e03956e099
43 изменённых файлов: 1054 добавлений и 1093 удалений

Просмотреть файл

@ -241,23 +241,27 @@ enum {
/** Allow local write on the registered region. If a region is registered /** Allow local write on the registered region. If a region is registered
* with this flag the registration can be used as the local handle for a * with this flag the registration can be used as the local handle for a
* btl_get operation. */ * btl_get operation. */
MCA_BTL_REG_FLAG_LOCAL_WRITE = 0x1, MCA_BTL_REG_FLAG_LOCAL_WRITE = 0x00000001,
/** Allow remote read on the registered region. If a region is registered /** Allow remote read on the registered region. If a region is registered
* with this flag the registration can be used as the remote handle for a * with this flag the registration can be used as the remote handle for a
* btl_get operation. */ * btl_get operation. */
MCA_BTL_REG_FLAG_REMOTE_READ = 0x2, MCA_BTL_REG_FLAG_REMOTE_READ = 0x00000002,
/** Allow remote write on the registered region. If a region is registered /** Allow remote write on the registered region. If a region is registered
* with this flag the registration can be used as the remote handle for a * with this flag the registration can be used as the remote handle for a
* btl_put operation. */ * btl_put operation. */
MCA_BTL_REG_FLAG_REMOTE_WRITE = 0x4, MCA_BTL_REG_FLAG_REMOTE_WRITE = 0x00000004,
/** Allow remote atomic operations on the registered region. If a region is /** Allow remote atomic operations on the registered region. If a region is
* registered with this flag the registration can be used as the remote * registered with this flag the registration can be used as the remote
* handle for a btl_atomic_op or btl_atomic_fop operation. */ * handle for a btl_atomic_op or btl_atomic_fop operation. */
MCA_BTL_REG_FLAG_REMOTE_ATOMIC = 0x8, MCA_BTL_REG_FLAG_REMOTE_ATOMIC = 0x00000008,
/** Allow any btl operation on the registered region. If a region is registered /** Allow any btl operation on the registered region. If a region is registered
* with this flag the registration can be used as the local or remote handle for * with this flag the registration can be used as the local or remote handle for
* any btl operation. */ * any btl operation. */
MCA_BTL_REG_FLAG_ACCESS_ANY = 0xf, MCA_BTL_REG_FLAG_ACCESS_ANY = 0x0000000f,
#if OPAL_CUDA_GDR_SUPPORT
/** Region is in GPU memory */
MCA_BTL_REG_FLAG_CUDA_GPU_MEM = 0x00010000,
#endif
}; };
/** /**
@ -718,7 +722,6 @@ typedef int (*mca_btl_base_module_free_fn_t)(
typedef struct mca_btl_base_descriptor_t* (*mca_btl_base_module_prepare_fn_t)( typedef struct mca_btl_base_descriptor_t* (*mca_btl_base_module_prepare_fn_t)(
struct mca_btl_base_module_t* btl, struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint, struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor, struct opal_convertor_t* convertor,
uint8_t order, uint8_t order,
size_t reserve, size_t reserve,
@ -853,6 +856,7 @@ typedef int (*mca_btl_base_module_sendi_fn_t)(
* (remote_address, remote_address + size) * (remote_address, remote_address + size)
* @param size (IN) Number of bytes to put * @param size (IN) Number of bytes to put
* @param flags (IN) Flags for this put operation * @param flags (IN) Flags for this put operation
* @param order (IN) Ordering
* @param cbfunc (IN) Function to call on completion (if queued) * @param cbfunc (IN) Function to call on completion (if queued)
* @param cbcontext (IN) Context for the callback * @param cbcontext (IN) Context for the callback
* @param cbdata (IN) Data for callback * @param cbdata (IN) Data for callback
@ -868,7 +872,7 @@ typedef int (*mca_btl_base_module_put_fn_t) (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint, void *local_address, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
uint64_t remote_address, struct mca_btl_base_registration_handle_t *local_handle, uint64_t remote_address, struct mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata); int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
/** /**
* Initiate an asynchronous get. * Initiate an asynchronous get.
@ -916,6 +920,7 @@ typedef int (*mca_btl_base_module_put_fn_t) (struct mca_btl_base_module_t *btl,
* (remote_address, remote_address + size) * (remote_address, remote_address + size)
* @param size (IN) Number of bytes to put * @param size (IN) Number of bytes to put
* @param flags (IN) Flags for this put operation * @param flags (IN) Flags for this put operation
* @param order (IN) Ordering
* @param cbfunc (IN) Function to call on completion (if queued) * @param cbfunc (IN) Function to call on completion (if queued)
* @param cbcontext (IN) Context for the callback * @param cbcontext (IN) Context for the callback
* @param cbdata (IN) Data for callback * @param cbdata (IN) Data for callback
@ -931,7 +936,7 @@ typedef int (*mca_btl_base_module_get_fn_t) (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint, void *local_address, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
uint64_t remote_address, struct mca_btl_base_registration_handle_t *local_handle, uint64_t remote_address, struct mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata); int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
/** /**
* Diagnostic dump of btl state. * Diagnostic dump of btl state.

Просмотреть файл

@ -91,6 +91,11 @@
#define MIN(a,b) ((a)<(b)?(a):(b)) #define MIN(a,b) ((a)<(b)?(a):(b))
#endif #endif
static mca_btl_base_registration_handle_t *mca_btl_openib_register_mem (mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint,
void *base, size_t size, uint32_t flags);
static int mca_btl_openib_deregister_mem (mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle);
mca_btl_openib_module_t mca_btl_openib_module = { mca_btl_openib_module_t mca_btl_openib_module = {
.super = { .super = {
.btl_component = &mca_btl_openib_component.super, .btl_component = &mca_btl_openib_component.super,
@ -101,14 +106,15 @@ mca_btl_openib_module_t mca_btl_openib_module = {
.btl_alloc = mca_btl_openib_alloc, .btl_alloc = mca_btl_openib_alloc,
.btl_free = mca_btl_openib_free, .btl_free = mca_btl_openib_free,
.btl_prepare_src = mca_btl_openib_prepare_src, .btl_prepare_src = mca_btl_openib_prepare_src,
.btl_prepare_dst = mca_btl_openib_prepare_dst,
.btl_send = mca_btl_openib_send, .btl_send = mca_btl_openib_send,
.btl_sendi = mca_btl_openib_sendi, /* send immediate */ .btl_sendi = mca_btl_openib_sendi, /* send immediate */
.btl_put = mca_btl_openib_put, .btl_put = mca_btl_openib_put,
.btl_get = mca_btl_openib_get, .btl_get = mca_btl_openib_get,
.btl_dump = mca_btl_base_dump, .btl_dump = mca_btl_base_dump,
.btl_register_error = mca_btl_openib_register_error_cb, /* error call back registration */ .btl_register_error = mca_btl_openib_register_error_cb, /* error call back registration */
.btl_ft_event = mca_btl_openib_ft_event .btl_ft_event = mca_btl_openib_ft_event,
.btl_register_mem = mca_btl_openib_register_mem,
.btl_deregister_mem = mca_btl_openib_deregister_mem,
} }
}; };
@ -1226,7 +1232,7 @@ ib_frag_alloc(mca_btl_openib_module_t *btl, size_t size, uint8_t order,
/* check if pending fragment has enough space for coalescing */ /* check if pending fragment has enough space for coalescing */
static mca_btl_openib_send_frag_t *check_coalescing(opal_list_t *frag_list, static mca_btl_openib_send_frag_t *check_coalescing(opal_list_t *frag_list,
opal_mutex_t *lock, mca_btl_base_endpoint_t *ep, size_t size) opal_mutex_t *lock, struct mca_btl_base_endpoint_t *ep, size_t size)
{ {
mca_btl_openib_send_frag_t *frag = NULL; mca_btl_openib_send_frag_t *frag = NULL;
@ -1390,12 +1396,6 @@ int mca_btl_openib_free(
to_send_frag(des)->hdr + 1; to_send_frag(des)->hdr + 1;
assert(!opal_list_get_size(&to_send_frag(des)->coalesced_frags)); assert(!opal_list_get_size(&to_send_frag(des)->coalesced_frags));
/* fall through */ /* fall through */
case MCA_BTL_OPENIB_FRAG_RECV:
case MCA_BTL_OPENIB_FRAG_RECV_USER:
case MCA_BTL_OPENIB_FRAG_SEND_USER:
to_base_frag(des)->base.des_remote = NULL;
to_base_frag(des)->base.des_remote_count = 0;
break;
default: default:
break; break;
} }
@ -1430,7 +1430,6 @@ int mca_btl_openib_free(
mca_btl_base_descriptor_t* mca_btl_openib_prepare_src( mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
struct mca_btl_base_module_t* btl, struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint, struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor, struct opal_convertor_t* convertor,
uint8_t order, uint8_t order,
size_t reserve, size_t reserve,
@ -1438,7 +1437,6 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
uint32_t flags) uint32_t flags)
{ {
mca_btl_openib_module_t *openib_btl; mca_btl_openib_module_t *openib_btl;
mca_btl_openib_reg_t *openib_reg;
mca_btl_openib_com_frag_t *frag = NULL; mca_btl_openib_com_frag_t *frag = NULL;
struct iovec iov; struct iovec iov;
uint32_t iov_count = 1; uint32_t iov_count = 1;
@ -1448,82 +1446,19 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
openib_btl = (mca_btl_openib_module_t*)btl; openib_btl = (mca_btl_openib_module_t*)btl;
#if OPAL_CUDA_GDR_SUPPORT
if(opal_convertor_cuda_need_buffers(convertor) == false && 0 == reserve) {
#else
if(opal_convertor_need_buffers(convertor) == false && 0 == reserve) {
#endif /* OPAL_CUDA_GDR_SUPPORT */
/* GMS bloody HACK! */
if(registration != NULL || max_data > btl->btl_max_send_size) {
frag = alloc_send_user_frag();
if(NULL == frag) {
return NULL;
}
iov.iov_len = max_data;
iov.iov_base = NULL;
opal_convertor_pack(convertor, &iov, &iov_count, &max_data);
*size = max_data;
if(NULL == registration) {
rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
iov.iov_base, max_data, 0, &registration);
if(OPAL_SUCCESS != rc || NULL == registration) {
MCA_BTL_IB_FRAG_RETURN(frag);
return NULL;
}
/* keep track of the registration we did */
to_com_frag(frag)->registration =
(mca_btl_openib_reg_t*)registration;
}
openib_reg = (mca_btl_openib_reg_t*)registration;
frag->sg_entry.length = max_data;
frag->sg_entry.lkey = openib_reg->mr->lkey;
frag->sg_entry.addr = (uint64_t)(uintptr_t)iov.iov_base;
to_base_frag(frag)->base.order = order;
to_base_frag(frag)->base.des_flags = flags;
to_base_frag(frag)->segment.base.seg_len = max_data;
to_base_frag(frag)->segment.base.seg_addr.lval = (uint64_t)(uintptr_t) iov.iov_base;
to_base_frag(frag)->segment.key = frag->sg_entry.lkey;
assert(MCA_BTL_NO_ORDER == order); assert(MCA_BTL_NO_ORDER == order);
BTL_VERBOSE(("frag->sg_entry.lkey = %" PRIu32 " .addr = %" PRIx64, if (max_data + reserve > btl->btl_max_send_size) {
frag->sg_entry.lkey, frag->sg_entry.addr));
return &to_base_frag(frag)->base;
}
}
assert(MCA_BTL_NO_ORDER == order);
if(max_data + reserve > btl->btl_max_send_size) {
max_data = btl->btl_max_send_size - reserve; max_data = btl->btl_max_send_size - reserve;
} }
if (OPAL_UNLIKELY(0 == reserve)) { frag = (mca_btl_openib_com_frag_t *) mca_btl_openib_alloc (btl, endpoint, order,
frag = (mca_btl_openib_com_frag_t *) ib_frag_alloc(openib_btl, max_data, order, flags);
if(NULL == frag)
return NULL;
/* NTH: this frag will be ue used for either a get or put so we need to set the lval to be
consistent with the usage in get and put. the pval will be restored in mca_btl_openib_free */
ptr = to_base_frag(frag)->segment.base.seg_addr.pval;
to_base_frag(frag)->segment.base.seg_addr.lval =
(uint64_t)(uintptr_t) ptr;
} else {
frag =
(mca_btl_openib_com_frag_t *) mca_btl_openib_alloc(btl, endpoint, order,
max_data + reserve, flags); max_data + reserve, flags);
if(NULL == frag) if (NULL == frag) {
return NULL; return NULL;
}
ptr = to_base_frag(frag)->segment.base.seg_addr.pval; ptr = to_base_frag(frag)->segment.base.seg_addr.pval;
}
iov.iov_len = max_data; iov.iov_len = max_data;
iov.iov_base = (IOVBASE_TYPE *) ( (unsigned char*) ptr + reserve ); iov.iov_base = (IOVBASE_TYPE *) ( (unsigned char*) ptr + reserve );
@ -1547,103 +1482,6 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
return &to_base_frag(frag)->base; return &to_base_frag(frag)->base;
} }
/**
* Prepare the dst buffer
*
* @param btl (IN) BTL module
* @param peer (IN) BTL peer addressing
* prepare dest's behavior depends on the following:
* Has a valid memory registration been passed to prepare_src?
* if so we attempt to use the pre-registered user-buffer, if the memory registration
* is to small (only a portion of the user buffer) then we must reregister the user buffer
* Has the user requested the memory to be left pinned?
* if so we insert the memory registration into a memory tree for later lookup, we
* may also remove a previous registration if a MRU (most recently used) list of
* registrations is full, this prevents resources from being exhausted.
*/
mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags)
{
mca_btl_openib_module_t *openib_btl;
mca_btl_openib_component_t *openib_component;
mca_btl_openib_com_frag_t *frag;
mca_btl_openib_reg_t *openib_reg;
uint32_t max_msg_sz;
int rc;
void *buffer;
openib_btl = (mca_btl_openib_module_t*)btl;
openib_component = (mca_btl_openib_component_t*)btl->btl_component;
frag = alloc_recv_user_frag();
if(NULL == frag) {
return NULL;
}
/* max_msg_sz is the maximum message size of the HCA (hw limitation)
set the minimum between local max_msg_sz and the remote */
max_msg_sz = MIN(openib_btl->ib_port_attr.max_msg_sz,
endpoint->endpoint_btl->ib_port_attr.max_msg_sz);
/* check if user has explicitly limited the max message size */
if (openib_component->max_hw_msg_size > 0 &&
max_msg_sz > (size_t)openib_component->max_hw_msg_size) {
max_msg_sz = openib_component->max_hw_msg_size;
}
/* limit the message so to max_msg_sz */
if (*size > (size_t)max_msg_sz) {
*size = (size_t)max_msg_sz;
BTL_VERBOSE(("message size limited to %" PRIsize_t "\n", *size));
}
opal_convertor_get_current_pointer(convertor, &buffer);
if(NULL == registration){
/* we didn't get a memory registration passed in, so we have to
* register the region ourselves
*/
uint32_t mflags = 0;
#if OPAL_CUDA_GDR_SUPPORT
if (convertor->flags & CONVERTOR_CUDA) {
mflags |= MCA_MPOOL_FLAGS_CUDA_GPU_MEM;
}
#endif /* OPAL_CUDA_GDR_SUPPORT */
rc = btl->btl_mpool->mpool_register(btl->btl_mpool, buffer, *size, mflags,
&registration);
if(OPAL_SUCCESS != rc || NULL == registration) {
MCA_BTL_IB_FRAG_RETURN(frag);
return NULL;
}
/* keep track of the registration we did */
frag->registration = (mca_btl_openib_reg_t*)registration;
}
openib_reg = (mca_btl_openib_reg_t*)registration;
frag->sg_entry.length = *size;
frag->sg_entry.lkey = openib_reg->mr->lkey;
frag->sg_entry.addr = (uint64_t)(uintptr_t)buffer;
to_base_frag(frag)->segment.base.seg_addr.lval = (uint64_t)(uintptr_t) buffer;
to_base_frag(frag)->segment.base.seg_len = *size;
to_base_frag(frag)->segment.key = openib_reg->mr->rkey;
to_base_frag(frag)->base.order = order;
to_base_frag(frag)->base.des_flags = flags;
BTL_VERBOSE(("frag->sg_entry.lkey = %" PRIu32 " .addr = %" PRIx64 " "
"rkey = %" PRIu32, frag->sg_entry.lkey, frag->sg_entry.addr,
openib_reg->mr->rkey));
return &to_base_frag(frag)->base;
}
static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl) { static int mca_btl_openib_finalize_resources(struct mca_btl_base_module_t* btl) {
mca_btl_openib_module_t* openib_btl; mca_btl_openib_module_t* openib_btl;
mca_btl_openib_endpoint_t* endpoint; mca_btl_openib_endpoint_t* endpoint;
@ -1997,40 +1835,127 @@ int mca_btl_openib_send(
return mca_btl_openib_endpoint_send(ep, frag); return mca_btl_openib_endpoint_send(ep, frag);
} }
static mca_btl_base_registration_handle_t *mca_btl_openib_register_mem (mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint,
void *base, size_t size, uint32_t flags)
{
mca_btl_openib_reg_t *reg;
uint32_t mflags = 0;
int rc;
#if OPAL_CUDA_GDR_SUPPORT
if (flags & MCA_BTL_REG_FLAG_CUDA_GPU_MEM) {
mflags |= MCA_MPOOL_FLAGS_CUDA_GPU_MEM;
}
#endif /* OPAL_CUDA_GDR_SUPPORT */
rc = btl->btl_mpool->mpool_register (btl->btl_mpool, base, size, mflags,
(mca_mpool_base_registration_t **) &reg);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == reg)) {
return NULL;
}
return &reg->btl_handle;
}
static int mca_btl_openib_deregister_mem (mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle)
{
mca_btl_openib_reg_t *reg = (mca_btl_openib_reg_t *)((intptr_t) handle - offsetof (mca_btl_openib_reg_t, btl_handle));
btl->btl_mpool->mpool_deregister (btl->btl_mpool, (mca_mpool_base_registration_t *) reg);
return OPAL_SUCCESS;
}
/* /*
* RDMA WRITE local buffer to remote buffer address. * RDMA WRITE local buffer to remote buffer address.
*/ */
int mca_btl_openib_put( mca_btl_base_module_t* btl, int mca_btl_openib_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *ep, void *local_address,
mca_btl_base_endpoint_t* ep, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_descriptor_t* descriptor) mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
mca_btl_openib_segment_t *src_seg = (mca_btl_openib_segment_t *) descriptor->des_segments; mca_btl_openib_put_frag_t *frag = NULL;
mca_btl_openib_segment_t *dst_seg = (mca_btl_openib_segment_t *) descriptor->des_remote; int rc, qp = order;
struct ibv_send_wr* bad_wr;
mca_btl_openib_out_frag_t* frag = to_out_frag(descriptor);
int qp = descriptor->order;
uint64_t rem_addr = dst_seg->base.seg_addr.lval;
uint32_t rkey = dst_seg->key;
assert(openib_frag_type(frag) == MCA_BTL_OPENIB_FRAG_SEND_USER || if (OPAL_UNLIKELY(size > btl->btl_put_limit)) {
openib_frag_type(frag) == MCA_BTL_OPENIB_FRAG_SEND); return OPAL_ERR_BAD_PARAM;
descriptor->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
if(ep->endpoint_state != MCA_BTL_IB_CONNECTED) {
int rc;
OPAL_THREAD_LOCK(&ep->endpoint_lock);
rc = check_endpoint_state(ep, descriptor, &ep->pending_put_frags);
OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
if(OPAL_ERR_RESOURCE_BUSY == rc)
return OPAL_SUCCESS;
if(OPAL_SUCCESS != rc)
return rc;
} }
if(MCA_BTL_NO_ORDER == qp) frag = to_put_frag(alloc_send_user_frag ());
if (OPAL_UNLIKELY(NULL == frag)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
if (MCA_BTL_NO_ORDER == qp) {
qp = mca_btl_openib_component.rdma_qp; qp = mca_btl_openib_component.rdma_qp;
}
/* set base descriptor flags */
to_base_frag(frag)->base.order = order;
/* free this descriptor when the operation is complete */
to_base_frag(frag)->base.des_flags = MCA_BTL_DES_FLAGS_BTL_OWNERSHIP;
/* set up scatter-gather entry */
to_com_frag(frag)->sg_entry.length = size;
to_com_frag(frag)->sg_entry.lkey = local_handle->lkey;
to_com_frag(frag)->sg_entry.addr = (uint64_t)(intptr_t) local_address;
to_com_frag(frag)->endpoint = ep;
/* set up rdma callback */
frag->cb.func = cbfunc;
frag->cb.context = cbcontext;
frag->cb.data = cbdata;
frag->cb.local_handle = local_handle;
/* post descriptor */
to_out_frag(frag)->sr_desc.opcode = IBV_WR_RDMA_WRITE;
to_out_frag(frag)->sr_desc.send_flags = ib_send_flags(size, &(ep->qps[qp]), 1);
to_out_frag(frag)->sr_desc.wr.rdma.remote_addr = remote_address;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if ((ep->endpoint_proc->proc_opal->proc_arch & OPAL_ARCH_ISBIGENDIAN)
!= (opal_proc_local_get()->proc_arch & OPAL_ARCH_ISBIGENDIAN)) {
to_out_frag(frag)->sr_desc.wr.rdma.rkey = opal_swap_bytes4(remote_handle->rkey);
} else
#endif
{
to_out_frag(frag)->sr_desc.wr.rdma.rkey = remote_handle->rkey;
}
#if HAVE_XRC
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp))
to_out_frag(frag)->sr_desc.xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num;
#endif
if (ep->endpoint_state != MCA_BTL_IB_CONNECTED) {
OPAL_THREAD_LOCK(&ep->endpoint_lock);
rc = check_endpoint_state(ep, &to_base_frag(frag)->base, &ep->pending_put_frags);
OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
if (OPAL_ERR_RESOURCE_BUSY == rc) {
/* descriptor was queued pending connection */
return OPAL_SUCCESS;
}
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
MCA_BTL_IB_FRAG_RETURN (frag);
return rc;
}
}
rc = mca_btl_openib_put_internal (btl, ep, qp, frag);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
MCA_BTL_IB_FRAG_RETURN (frag);
}
return rc;
}
int mca_btl_openib_put_internal (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *ep,
int qp, mca_btl_openib_put_frag_t *frag)
{
struct ibv_send_wr *bad_wr;
/* check for a send wqe */ /* check for a send wqe */
if (qp_get_wqe(ep, qp) < 0) { if (qp_get_wqe(ep, qp) < 0) {
@ -2040,35 +1965,11 @@ int mca_btl_openib_put( mca_btl_base_module_t* btl,
OPAL_THREAD_UNLOCK(&ep->endpoint_lock); OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
/* post descriptor */
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if((ep->endpoint_proc->proc_opal->proc_arch & OPAL_ARCH_ISBIGENDIAN)
!= (opal_proc_local_get()->proc_arch & OPAL_ARCH_ISBIGENDIAN)) {
rem_addr = opal_swap_bytes8(rem_addr);
rkey = opal_swap_bytes4(rkey);
}
#endif
frag->sr_desc.wr.rdma.remote_addr = rem_addr;
frag->sr_desc.wr.rdma.rkey = rkey;
to_com_frag(frag)->sg_entry.addr = src_seg->base.seg_addr.lval;
to_com_frag(frag)->sg_entry.length = src_seg->base.seg_len;
to_com_frag(frag)->endpoint = ep;
#if HAVE_XRC
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp))
frag->sr_desc.xrc_remote_srq_num=ep->rem_info.rem_srqs[qp].rem_srq_num;
#endif
descriptor->order = qp;
/* Setting opcode on a frag constructor isn't enough since prepare_src
* may return send_frag instead of put_frag */
frag->sr_desc.opcode = IBV_WR_RDMA_WRITE;
frag->sr_desc.send_flags = ib_send_flags(src_seg->base.seg_len, &(ep->qps[qp]), 1);
qp_inflight_wqe_to_frag(ep, qp, to_com_frag(frag)); qp_inflight_wqe_to_frag(ep, qp, to_com_frag(frag));
qp_reset_signal_count(ep, qp); qp_reset_signal_count(ep, qp);
if(ibv_post_send(ep->qps[qp].qp->lcl_qp, &frag->sr_desc, &bad_wr)) if (ibv_post_send(ep->qps[qp].qp->lcl_qp, &to_out_frag(frag)->sr_desc, &bad_wr))
return OPAL_ERROR; return OPAL_ERROR;
return OPAL_SUCCESS; return OPAL_SUCCESS;
@ -2078,35 +1979,84 @@ int mca_btl_openib_put( mca_btl_base_module_t* btl,
* RDMA READ remote buffer to local buffer address. * RDMA READ remote buffer to local buffer address.
*/ */
int mca_btl_openib_get(mca_btl_base_module_t* btl, int mca_btl_openib_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *ep, void *local_address,
mca_btl_base_endpoint_t* ep, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_descriptor_t* descriptor) mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
mca_btl_openib_segment_t *src_seg = (mca_btl_openib_segment_t *) descriptor->des_remote; mca_btl_openib_get_frag_t* frag = NULL;
mca_btl_openib_segment_t *dst_seg = (mca_btl_openib_segment_t *) descriptor->des_segments; int qp = order;
struct ibv_send_wr* bad_wr;
mca_btl_openib_get_frag_t* frag = to_get_frag(descriptor);
int qp = descriptor->order;
uint64_t rem_addr = src_seg->base.seg_addr.lval;
uint32_t rkey = src_seg->key;
assert(openib_frag_type(frag) == MCA_BTL_OPENIB_FRAG_RECV_USER);
descriptor->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
if(ep->endpoint_state != MCA_BTL_IB_CONNECTED) {
int rc; int rc;
OPAL_THREAD_LOCK(&ep->endpoint_lock);
rc = check_endpoint_state(ep, descriptor, &ep->pending_get_frags); if (OPAL_UNLIKELY(size > btl->btl_get_limit)) {
OPAL_THREAD_UNLOCK(&ep->endpoint_lock); return OPAL_ERR_BAD_PARAM;
if(OPAL_ERR_RESOURCE_BUSY == rc)
return OPAL_SUCCESS;
if(OPAL_SUCCESS != rc)
return rc;
} }
if(MCA_BTL_NO_ORDER == qp) frag = to_get_frag(alloc_recv_user_frag());
if (OPAL_UNLIKELY(NULL == frag)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
if (MCA_BTL_NO_ORDER == qp) {
qp = mca_btl_openib_component.rdma_qp; qp = mca_btl_openib_component.rdma_qp;
}
/* set base descriptor flags */
to_base_frag(frag)->base.order = order;
/* free this descriptor when the operation is complete */
to_base_frag(frag)->base.des_flags = MCA_BTL_DES_FLAGS_BTL_OWNERSHIP;
/* set up scatter-gather entry */
to_com_frag(frag)->sg_entry.length = size;
to_com_frag(frag)->sg_entry.lkey = local_handle->lkey;
to_com_frag(frag)->sg_entry.addr = (uint64_t)(uintptr_t) local_address;
to_com_frag(frag)->endpoint = ep;
/* set up rdma callback */
frag->cb.func = cbfunc;
frag->cb.context = cbcontext;
frag->cb.data = cbdata;
frag->cb.local_handle = local_handle;
/* set up descriptor */
frag->sr_desc.wr.rdma.remote_addr = remote_address;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if((ep->endpoint_proc->proc_opal->proc_arch & OPAL_ARCH_ISBIGENDIAN)
!= (opal_proc_local_get()->proc_arch & OPAL_ARCH_ISBIGENDIAN)) {
frag->sr_desc.wr.rdma.rkey = opal_swap_bytes4 (remote_handle->rkey);
} else
#endif
{
frag->sr_desc.wr.rdma.rkey = remote_handle->rkey;
}
if (ep->endpoint_state != MCA_BTL_IB_CONNECTED) {
OPAL_THREAD_LOCK(&ep->endpoint_lock);
rc = check_endpoint_state(ep, &to_base_frag(frag)->base, &ep->pending_get_frags);
OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
if (OPAL_ERR_RESOURCE_BUSY == rc) {
return OPAL_SUCCESS;
}
if (OPAL_SUCCESS != rc) {
MCA_BTL_IB_FRAG_RETURN (frag);
return rc;
}
}
rc = mca_btl_openib_get_internal (btl, ep, qp, frag);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
MCA_BTL_IB_FRAG_RETURN (frag);
}
return rc;
}
int mca_btl_openib_get_internal (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *ep,
int qp, mca_btl_openib_get_frag_t *frag)
{
struct ibv_send_wr* bad_wr;
/* check for a send wqe */ /* check for a send wqe */
if (qp_get_wqe(ep, qp) < 0) { if (qp_get_wqe(ep, qp) < 0) {
@ -2118,7 +2068,7 @@ int mca_btl_openib_get(mca_btl_base_module_t* btl,
} }
/* check for a get token */ /* check for a get token */
if(OPAL_THREAD_ADD32(&ep->get_tokens,-1) < 0) { if (OPAL_THREAD_ADD32(&ep->get_tokens,-1) < 0) {
qp_put_wqe(ep, qp); qp_put_wqe(ep, qp);
OPAL_THREAD_ADD32(&ep->get_tokens,1); OPAL_THREAD_ADD32(&ep->get_tokens,1);
OPAL_THREAD_LOCK(&ep->endpoint_lock); OPAL_THREAD_LOCK(&ep->endpoint_lock);
@ -2127,30 +2077,15 @@ int mca_btl_openib_get(mca_btl_base_module_t* btl,
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if((ep->endpoint_proc->proc_opal->proc_arch & OPAL_ARCH_ISBIGENDIAN)
!= (opal_proc_local_get()->proc_arch & OPAL_ARCH_ISBIGENDIAN)) {
rem_addr = opal_swap_bytes8(rem_addr);
rkey = opal_swap_bytes4(rkey);
}
#endif
frag->sr_desc.wr.rdma.remote_addr = rem_addr;
frag->sr_desc.wr.rdma.rkey = rkey;
to_com_frag(frag)->sg_entry.addr = dst_seg->base.seg_addr.lval;
to_com_frag(frag)->sg_entry.length = dst_seg->base.seg_len;
to_com_frag(frag)->endpoint = ep;
#if HAVE_XRC #if HAVE_XRC
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp))
frag->sr_desc.xrc_remote_srq_num=ep->rem_info.rem_srqs[qp].rem_srq_num; frag->sr_desc.xrc_remote_srq_num=ep->rem_info.rem_srqs[qp].rem_srq_num;
#endif #endif
descriptor->order = qp;
qp_inflight_wqe_to_frag(ep, qp, to_com_frag(frag)); qp_inflight_wqe_to_frag(ep, qp, to_com_frag(frag));
qp_reset_signal_count(ep, qp); qp_reset_signal_count(ep, qp);
if(ibv_post_send(ep->qps[qp].qp->lcl_qp, &frag->sr_desc, &bad_wr)) if (ibv_post_send(ep->qps[qp].qp->lcl_qp, &frag->sr_desc, &bad_wr))
return OPAL_ERROR; return OPAL_ERROR;
return OPAL_SUCCESS; return OPAL_SUCCESS;

Просмотреть файл

@ -497,9 +497,15 @@ typedef struct mca_btl_openib_module_t mca_btl_openib_module_t;
extern mca_btl_openib_module_t mca_btl_openib_module; extern mca_btl_openib_module_t mca_btl_openib_module;
struct mca_btl_base_registration_handle_t {
uint32_t rkey;
uint32_t lkey;
};
struct mca_btl_openib_reg_t { struct mca_btl_openib_reg_t {
mca_mpool_base_registration_t base; mca_mpool_base_registration_t base;
struct ibv_mr *mr; struct ibv_mr *mr;
mca_btl_base_registration_handle_t btl_handle;
}; };
typedef struct mca_btl_openib_reg_t mca_btl_openib_reg_t; typedef struct mca_btl_openib_reg_t mca_btl_openib_reg_t;
@ -612,32 +618,91 @@ extern int mca_btl_openib_sendi( struct mca_btl_base_module_t* btl,
mca_btl_base_descriptor_t** descriptor mca_btl_base_descriptor_t** descriptor
); );
/** /* forward decaration for internal put/get */
* PML->BTL Initiate a put of the specified size. struct mca_btl_openib_put_frag_t;
* struct mca_btl_openib_get_frag_t;
* @param btl (IN) BTL instance
* @param btl_peer (IN) BTL peer addressing
* @param descriptor (IN) Descriptor of data to be transmitted.
*/
extern int mca_btl_openib_put(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* descriptor
);
/** /**
* PML->BTL Initiate a get of the specified size. * @brief Schedule a put fragment with the HCA (internal)
* *
* @param btl (IN) BTL instance * @param btl (IN) BTL instance
* @param btl_base_peer (IN) BTL peer addressing * @param ep (IN) BTL endpoint
* @param descriptor (IN) Descriptor of data to be transmitted. * @param qp (IN) ID of queue pair to schedule the get on
* @param frag (IN) Fragment prepared by mca_btl_openib_put
*
* If the fragment can not be scheduled due to resource limitations then
* the fragment will be put on the pending put fragment list and retried
* when another get/put fragment has completed.
*/ */
extern int mca_btl_openib_get( int mca_btl_openib_put_internal (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *ep,
struct mca_btl_base_module_t* btl, int qp, struct mca_btl_openib_put_frag_t *frag);
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* descriptor
);
/**
* @brief Schedule an RDMA write with the HCA
*
* @param btl (IN) BTL instance
* @param ep (IN) BTL endpoint
* @param local_address (IN) Source address
* @param remote_address (IN) Destination address
* @param local_handle (IN) Registration handle for region containing the region {local_address, size}
* @param remote_handle (IN) Registration handle for region containing the region {remote_address, size}
* @param size (IN) Number of bytes to write
* @param flags (IN) Transfer flags
* @param order (IN) Ordering
* @param cbfunc (IN) Function to call on completion
* @param cbcontext (IN) Context for completion callback
* @param cbdata (IN) Data for completion callback
*
* @return OPAL_ERR_BAD_PARAM if a bad parameter was passed
* @return OPAL_SUCCCESS if the operation was successfully scheduled
*
* This function will attempt to schedule a put operation with the HCA.
*/
int mca_btl_openib_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
/**
* @brief Schedule a get fragment with the HCA (internal)
*
* @param btl (IN) BTL instance
* @param ep (IN) BTL endpoint
* @param qp (IN) ID of queue pair to schedule the get on
* @param frag (IN) Fragment prepared by mca_btl_openib_get
*
* If the fragment can not be scheduled due to resource limitations then
* the fragment will be put on the pending get fragment list and retried
* when another get/put fragment has completed.
*/
int mca_btl_openib_get_internal (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *ep,
int qp, struct mca_btl_openib_get_frag_t *frag);
/**
* @brief Schedule an RDMA read with the HCA
*
* @param btl (IN) BTL instance
* @param ep (IN) BTL endpoint
* @param local_address (IN) Destination address
* @param remote_address (IN) Source address
* @param local_handle (IN) Registration handle for region containing the region {local_address, size}
* @param remote_handle (IN) Registration handle for region containing the region {remote_address, size}
* @param size (IN) Number of bytes to read
* @param flags (IN) Transfer flags
* @param order (IN) Ordering
* @param cbfunc (IN) Function to call on completion
* @param cbcontext (IN) Context for completion callback
* @param cbdata (IN) Data for completion callback
*
* @return OPAL_ERR_BAD_PARAM if a bad parameter was passed
* @return OPAL_SUCCCESS if the operation was successfully scheduled
*
* This function will attempt to schedule a get operation with the HCA.
*/
int mca_btl_openib_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
/** /**
* Allocate a descriptor. * Allocate a descriptor.
@ -674,7 +739,6 @@ extern int mca_btl_openib_free(
mca_btl_base_descriptor_t* mca_btl_openib_prepare_src( mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
struct mca_btl_base_module_t* btl, struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* peer, struct mca_btl_base_endpoint_t* peer,
mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor, struct opal_convertor_t* convertor,
uint8_t order, uint8_t order,
size_t reserve, size_t reserve,
@ -682,22 +746,6 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
uint32_t flags uint32_t flags
); );
/**
* Allocate a descriptor initialized for RDMA write.
*
* @param btl (IN) BTL module
* @param peer (IN) BTL peer addressing
*/
extern mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* peer,
mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags);
extern void mca_btl_openib_frag_progress_pending_put_get( extern void mca_btl_openib_frag_progress_pending_put_get(
struct mca_btl_base_endpoint_t*, const int); struct mca_btl_base_endpoint_t*, const int);

Просмотреть файл

@ -605,6 +605,9 @@ static int openib_reg_mr(void *reg_data, void *base, size_t size,
return OPAL_ERR_OUT_OF_RESOURCE; return OPAL_ERR_OUT_OF_RESOURCE;
} }
openib_reg->btl_handle.lkey = openib_reg->mr->lkey;
openib_reg->btl_handle.rkey = openib_reg->mr->rkey;
OPAL_OUTPUT_VERBOSE((30, mca_btl_openib_component.memory_registration_verbose, OPAL_OUTPUT_VERBOSE((30, mca_btl_openib_component.memory_registration_verbose,
"openib_reg_mr: base=%p, bound=%p, size=%d, flags=0x%x", reg->base, reg->bound, "openib_reg_mr: base=%p, bound=%p, size=%d, flags=0x%x", reg->base, reg->bound,
(int) (reg->bound - reg->base + 1), reg->flags)); (int) (reg->bound - reg->base + 1), reg->flags));
@ -804,7 +807,19 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
mca_btl_base_active_message_trigger[MCA_BTL_TAG_IB].cbfunc = btl_openib_control; mca_btl_base_active_message_trigger[MCA_BTL_TAG_IB].cbfunc = btl_openib_control;
mca_btl_base_active_message_trigger[MCA_BTL_TAG_IB].cbdata = NULL; mca_btl_base_active_message_trigger[MCA_BTL_TAG_IB].cbdata = NULL;
openib_btl->super.btl_seg_size = sizeof (mca_btl_openib_segment_t); if (openib_btl->super.btl_get_limit > openib_btl->ib_port_attr.max_msg_sz) {
openib_btl->super.btl_get_limit = openib_btl->ib_port_attr.max_msg_sz;
}
openib_btl->super.btl_get_alignment = 0;
if (openib_btl->super.btl_put_limit > openib_btl->ib_port_attr.max_msg_sz) {
openib_btl->super.btl_put_limit = openib_btl->ib_port_attr.max_msg_sz;
}
openib_btl->super.btl_put_alignment = 0;
openib_btl->super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);
/* Check bandwidth configured for this device */ /* Check bandwidth configured for this device */
sprintf(param, "bandwidth_%s", ibv_get_device_name(device->ib_dev)); sprintf(param, "bandwidth_%s", ibv_get_device_name(device->ib_dev));
@ -2881,16 +2896,15 @@ void mca_btl_openib_frag_progress_pending_put_get(mca_btl_base_endpoint_t *ep,
size_t i, len = opal_list_get_size(&ep->pending_get_frags); size_t i, len = opal_list_get_size(&ep->pending_get_frags);
int rc; int rc;
for(i = 0; i < len && ep->qps[qp].qp->sd_wqe > 0 && ep->get_tokens > 0; i++) for(i = 0; i < len && ep->qps[qp].qp->sd_wqe > 0 && ep->get_tokens > 0; i++) {
{
OPAL_THREAD_LOCK(&ep->endpoint_lock); OPAL_THREAD_LOCK(&ep->endpoint_lock);
frag = opal_list_remove_first(&(ep->pending_get_frags)); frag = opal_list_remove_first(&(ep->pending_get_frags));
OPAL_THREAD_UNLOCK(&ep->endpoint_lock); OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
if(NULL == frag) if (NULL == frag)
break; break;
rc = mca_btl_openib_get((mca_btl_base_module_t *)openib_btl, ep, rc = mca_btl_openib_get_internal ((mca_btl_base_module_t *)openib_btl, ep,
&to_base_frag(frag)->base); qp, to_get_frag(frag));
if(OPAL_ERR_OUT_OF_RESOURCE == rc) if (OPAL_ERR_OUT_OF_RESOURCE == rc)
break; break;
} }
@ -2899,11 +2913,11 @@ void mca_btl_openib_frag_progress_pending_put_get(mca_btl_base_endpoint_t *ep,
OPAL_THREAD_LOCK(&ep->endpoint_lock); OPAL_THREAD_LOCK(&ep->endpoint_lock);
frag = opal_list_remove_first(&(ep->pending_put_frags)); frag = opal_list_remove_first(&(ep->pending_put_frags));
OPAL_THREAD_UNLOCK(&ep->endpoint_lock); OPAL_THREAD_UNLOCK(&ep->endpoint_lock);
if(NULL == frag) if (NULL == frag)
break; break;
rc = mca_btl_openib_put((mca_btl_base_module_t*)openib_btl, ep, rc = mca_btl_openib_put_internal ((mca_btl_base_module_t*)openib_btl, ep,
&to_base_frag(frag)->base); qp, to_put_frag(frag));
if(OPAL_ERR_OUT_OF_RESOURCE == rc) if (OPAL_ERR_OUT_OF_RESOURCE == rc)
break; break;
} }
} }
@ -3266,11 +3280,25 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
/* Handle work completions */ /* Handle work completions */
switch(wc->opcode) { switch(wc->opcode) {
case IBV_WC_RDMA_READ: case IBV_WC_RDMA_READ:
OPAL_OUTPUT((-1, "Got WC: RDMA_READ"));
OPAL_THREAD_ADD32(&endpoint->get_tokens, 1);
/* fall through */
case IBV_WC_RDMA_WRITE: case IBV_WC_RDMA_WRITE:
OPAL_OUTPUT((-1, "Got WC: RDMA_READ or RDMA_WRITE"));
if (IBV_WC_RDMA_READ == wc->opcode) {
OPAL_THREAD_ADD32(&endpoint->get_tokens, 1);
mca_btl_openib_get_frag_t *get_frag = to_get_frag(des);
get_frag->cb.func (&openib_btl->super, endpoint, (void *)(intptr_t) frag->sg_entry.addr,
get_frag->cb.local_handle, get_frag->cb.context, get_frag->cb.data,
OPAL_SUCCESS);
} else {
mca_btl_openib_put_frag_t *put_frag = to_put_frag(des);
put_frag->cb.func (&openib_btl->super, endpoint, (void *)(intptr_t) frag->sg_entry.addr,
put_frag->cb.local_handle, put_frag->cb.context, put_frag->cb.data,
OPAL_SUCCESS);
}
/* fall through */
case IBV_WC_SEND: case IBV_WC_SEND:
OPAL_OUTPUT((-1, "Got WC: RDMA_WRITE or SEND")); OPAL_OUTPUT((-1, "Got WC: RDMA_WRITE or SEND"));
if(openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_SEND) { if(openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_SEND) {

Просмотреть файл

@ -349,7 +349,15 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_recv_frag_t);
#define to_recv_frag(f) ((mca_btl_openib_recv_frag_t*)(f)) #define to_recv_frag(f) ((mca_btl_openib_recv_frag_t*)(f))
typedef struct mca_btl_openib_out_frag_t mca_btl_openib_put_frag_t; typedef struct mca_btl_openib_put_frag_t {
mca_btl_openib_out_frag_t super;
struct {
mca_btl_base_rdma_completion_fn_t func;
mca_btl_base_registration_handle_t *local_handle;
void *context;
void *data;
} cb;
} mca_btl_openib_put_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_put_frag_t); OBJ_CLASS_DECLARATION(mca_btl_openib_put_frag_t);
#define to_put_frag(f) ((mca_btl_openib_put_frag_t*)(f)) #define to_put_frag(f) ((mca_btl_openib_put_frag_t*)(f))
@ -357,6 +365,12 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_put_frag_t);
typedef struct mca_btl_openib_get_frag_t { typedef struct mca_btl_openib_get_frag_t {
mca_btl_openib_in_frag_t super; mca_btl_openib_in_frag_t super;
struct ibv_send_wr sr_desc; struct ibv_send_wr sr_desc;
struct {
mca_btl_base_rdma_completion_fn_t func;
mca_btl_base_registration_handle_t *local_handle;
void *context;
void *data;
} cb;
} mca_btl_openib_get_frag_t; } mca_btl_openib_get_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_get_frag_t); OBJ_CLASS_DECLARATION(mca_btl_openib_get_frag_t);

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2014 Research Organization for Information Science * Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
@ -197,29 +197,21 @@ int mca_btl_scif_sendi (struct mca_btl_base_module_t *btl,
* Initiate a get operation. * Initiate a get operation.
* *
* location: btl_scif_get.c * location: btl_scif_get.c
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/ */
int int mca_btl_scif_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
mca_btl_scif_get (struct mca_btl_base_module_t *btl, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_endpoint_t *endpoint, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
struct mca_btl_base_descriptor_t *des); int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
/** /**
* Initiate a put operation. * Initiate a put operation.
* *
* location: btl_scif_put.c * location: btl_scif_put.c
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/ */
int int mca_btl_scif_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
mca_btl_scif_put (struct mca_btl_base_module_t *btl, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_endpoint_t *endpoint, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
struct mca_btl_base_descriptor_t *des); int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
mca_btl_base_descriptor_t * mca_btl_base_descriptor_t *
mca_btl_scif_alloc(struct mca_btl_base_module_t *btl, mca_btl_scif_alloc(struct mca_btl_base_module_t *btl,
@ -228,9 +220,25 @@ mca_btl_scif_alloc(struct mca_btl_base_module_t *btl,
int mca_btl_scif_progress_send_wait_list (struct mca_btl_base_endpoint_t *endpoint); int mca_btl_scif_progress_send_wait_list (struct mca_btl_base_endpoint_t *endpoint);
struct mca_btl_scif_reg_t;
struct mca_btl_base_registration_handle_t {
/** scif offset */
off_t scif_offset;
/** base address of this scif region */
uintptr_t scif_base;
};
struct mca_btl_scif_registration_handle_t {
mca_btl_base_registration_handle_t btl_handle;
struct mca_btl_scif_reg_t *reg;
};
typedef struct mca_btl_scif_registration_handle_t mca_btl_scif_registration_handle_t;
typedef struct mca_btl_scif_reg_t { typedef struct mca_btl_scif_reg_t {
mca_mpool_base_registration_t base; mca_mpool_base_registration_t base;
off_t *registrations; /** per-endpoint btl handles for this registration */
mca_btl_scif_registration_handle_t *handles;
} mca_btl_scif_reg_t; } mca_btl_scif_reg_t;
/* Global structures */ /* Global structures */

Просмотреть файл

@ -165,14 +165,14 @@ static int scif_dereg_mem (void *reg_data, mca_mpool_base_registration_t *reg)
/* register the fragment with all connected endpoints */ /* register the fragment with all connected endpoints */
for (i = 0 ; i < (int) mca_btl_scif_module.endpoint_count ; ++i) { for (i = 0 ; i < (int) mca_btl_scif_module.endpoint_count ; ++i) {
if ((off_t)-1 != scif_reg->registrations[i] && if ((off_t)-1 != scif_reg->handles[i].btl_handle.scif_offset &&
MCA_BTL_SCIF_EP_STATE_CONNECTED == mca_btl_scif_module.endpoints[i].state) { MCA_BTL_SCIF_EP_STATE_CONNECTED == mca_btl_scif_module.endpoints[i].state) {
(void) scif_unregister(mca_btl_scif_module.endpoints[i].scif_epd, (void) scif_unregister(mca_btl_scif_module.endpoints[i].scif_epd,
scif_reg->registrations[i], size); scif_reg->handles[i].btl_handle.scif_offset, size);
} }
} }
free (scif_reg->registrations); free (scif_reg->handles);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
@ -184,17 +184,22 @@ static int scif_reg_mem (void *reg_data, void *base, size_t size,
int rc = OPAL_SUCCESS; int rc = OPAL_SUCCESS;
unsigned int i; unsigned int i;
scif_reg->registrations = calloc (mca_btl_scif_module.endpoint_count, scif_reg->handles = calloc (mca_btl_scif_module.endpoint_count, sizeof (scif_reg->handles[0]));
sizeof (off_t));
memset (scif_reg->registrations, -1, mca_btl_scif_module.endpoint_count * sizeof (off_t)); /* intialize all scif offsets to -1 and initialize the pointer back to the mpool registration */
for (i = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) {
scif_reg->handles[i].btl_handle.scif_offset = -1;
scif_reg->handles[i].btl_handle.scif_base = (intptr_t) base;
scif_reg->handles[i].reg = scif_reg;
}
/* register the pointer with all connected endpoints */ /* register the pointer with all connected endpoints */
for (i = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) { for (i = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) {
if (MCA_BTL_SCIF_EP_STATE_CONNECTED == mca_btl_scif_module.endpoints[i].state) { if (MCA_BTL_SCIF_EP_STATE_CONNECTED == mca_btl_scif_module.endpoints[i].state) {
scif_reg->registrations[i] = scif_register(mca_btl_scif_module.endpoints[i].scif_epd, scif_reg->handles[i].btl_handle.scif_offset = scif_register (mca_btl_scif_module.endpoints[i].scif_epd,
base, size, 0, SCIF_PROT_READ | base, size, 0, SCIF_PROT_READ |
SCIF_PROT_WRITE, 0); SCIF_PROT_WRITE, 0);
if (SCIF_REGISTER_FAILED == scif_reg->registrations[i]) { if (SCIF_REGISTER_FAILED == scif_reg->handles[i].btl_handle.scif_offset) {
/* cleanup */ /* cleanup */
scif_dereg_mem (reg_data, reg); scif_dereg_mem (reg_data, reg);
rc = OPAL_ERR_OUT_OF_RESOURCE; rc = OPAL_ERR_OUT_OF_RESOURCE;

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2014 Research Organization for Information Science * Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
@ -171,7 +171,7 @@ static int btl_scif_component_register(void)
mca_btl_scif_module.super.btl_flags = MCA_BTL_FLAGS_SEND | mca_btl_scif_module.super.btl_flags = MCA_BTL_FLAGS_SEND |
MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE; MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE;
mca_btl_scif_module.super.btl_seg_size = sizeof (mca_btl_scif_segment_t); mca_btl_scif_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);
mca_btl_scif_module.super.btl_bandwidth = 50000; /* Mbs */ mca_btl_scif_module.super.btl_bandwidth = 50000; /* Mbs */
mca_btl_scif_module.super.btl_latency = 2; /* Microsecs */ mca_btl_scif_module.super.btl_latency = 2; /* Microsecs */
@ -330,10 +330,10 @@ static int mca_btl_scif_progress_recvs (mca_btl_base_endpoint_t *ep)
* limitation has not appeared to cause any performance * limitation has not appeared to cause any performance
* problems. */ * problems. */
frag.base.des_segment_count = 1; frag.base.des_segment_count = 1;
frag.segments[0].base.seg_len = hdr->size; frag.segments[0].seg_len = hdr->size;
frag.segments[0].base.seg_addr.pval = (void *) (hdr + 1); frag.segments[0].seg_addr.pval = (void *) (hdr + 1);
frag.base.des_segments = &frag.segments[0].base; frag.base.des_segments = frag.segments;
/* call the registered callback function */ /* call the registered callback function */
reg->cbfunc(&mca_btl_scif_module.super, hdr->tag, &frag.base, reg->cbdata); reg->cbfunc(&mca_btl_scif_module.super, hdr->tag, &frag.base, reg->cbdata);

Просмотреть файл

@ -15,13 +15,13 @@
static inline void mca_btl_scif_base_frag_constructor (mca_btl_scif_base_frag_t *frag) static inline void mca_btl_scif_base_frag_constructor (mca_btl_scif_base_frag_t *frag)
{ {
memset ((char *) frag + sizeof (frag->base), 0, sizeof (*frag) - sizeof (frag->base)); memset ((char *) frag + sizeof (frag->base), 0, sizeof (*frag) - sizeof (frag->base));
frag->segments[0].base.seg_addr.pval = frag->base.super.ptr; frag->segments[0].seg_addr.pval = frag->base.super.ptr;
} }
static inline void mca_btl_scif_eager_frag_constructor (mca_btl_scif_base_frag_t *frag) static inline void mca_btl_scif_eager_frag_constructor (mca_btl_scif_base_frag_t *frag)
{ {
memset ((char *) frag + sizeof (frag->base), 0, sizeof (*frag) - sizeof (frag->base)); memset ((char *) frag + sizeof (frag->base), 0, sizeof (*frag) - sizeof (frag->base));
frag->segments[0].base.seg_addr.pval = frag->base.super.ptr; frag->segments[0].seg_addr.pval = frag->base.super.ptr;
} }
OBJ_CLASS_INSTANCE(mca_btl_scif_eager_frag_t, mca_btl_base_descriptor_t, OBJ_CLASS_INSTANCE(mca_btl_scif_eager_frag_t, mca_btl_base_descriptor_t,

Просмотреть файл

@ -15,16 +15,6 @@
#include "btl_scif.h" #include "btl_scif.h"
#include "btl_scif_endpoint.h" #include "btl_scif_endpoint.h"
typedef struct mca_btl_scif_segment_t {
mca_btl_base_segment_t base;
/* scif offset */
off_t scif_offset;
/* original pointer */
uint64_t orig_ptr;
} mca_btl_scif_segment_t;
typedef struct mca_btl_scif_frag_hdr_t { typedef struct mca_btl_scif_frag_hdr_t {
#if defined(SCIF_USE_SEQ) #if defined(SCIF_USE_SEQ)
uint32_t seq; uint32_t seq;
@ -41,7 +31,7 @@ typedef void (*frag_cb_t) (struct mca_btl_scif_base_frag_t *, int);
typedef struct mca_btl_scif_base_frag_t { typedef struct mca_btl_scif_base_frag_t {
mca_btl_base_descriptor_t base; mca_btl_base_descriptor_t base;
mca_btl_scif_frag_hdr_t hdr; mca_btl_scif_frag_hdr_t hdr;
mca_btl_scif_segment_t segments[2]; mca_btl_base_segment_t segments[2];
mca_btl_base_endpoint_t *endpoint; mca_btl_base_endpoint_t *endpoint;
mca_btl_scif_reg_t *registration; mca_btl_scif_reg_t *registration;
ompi_free_list_t *my_list; ompi_free_list_t *my_list;
@ -78,9 +68,9 @@ static inline int mca_btl_scif_frag_return (mca_btl_scif_base_frag_t *frag)
frag->registration = NULL; frag->registration = NULL;
} }
frag->segments[0].base.seg_addr.pval = frag->base.super.ptr; frag->segments[0].seg_addr.pval = frag->base.super.ptr;
frag->segments[0].base.seg_len = 0; frag->segments[0].seg_len = 0;
frag->segments[1].base.seg_len = 0; frag->segments[1].seg_len = 0;
OMPI_FREE_LIST_RETURN_MT(frag->my_list, (ompi_free_list_item_t *) frag); OMPI_FREE_LIST_RETURN_MT(frag->my_list, (ompi_free_list_item_t *) frag);

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2014 Research Organization for Information Science * Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
@ -20,18 +20,13 @@
/** /**
* Initiate a get operation. * Initiate a get operation.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/ */
int mca_btl_scif_get (struct mca_btl_base_module_t *btl, int mca_btl_scif_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des) { mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
mca_btl_scif_segment_t *src = (mca_btl_scif_segment_t *) des->des_remote; int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
mca_btl_scif_segment_t *dst = (mca_btl_scif_segment_t *) des->des_segments; {
size_t len = lmin (src->base.seg_len, dst->base.seg_len); int rc, mark, scif_flags = 0;
int rc, mark, flags = 0;
off_t roffset, loffset; off_t roffset, loffset;
#if defined(SCIF_TIMING) #if defined(SCIF_TIMING)
struct timespec ts; struct timespec ts;
@ -41,30 +36,27 @@ int mca_btl_scif_get (struct mca_btl_base_module_t *btl,
mca_btl_scif_component.get_count++; mca_btl_scif_component.get_count++;
#endif #endif
BTL_VERBOSE(("Using DMA Get for frag %p from offset %lu", (void *) des, BTL_VERBOSE(("Using DMA Get from remote address %" PRIx64 " to local address %p",
(unsigned long) src->scif_offset)); remote_address, local_address));
roffset = src->scif_offset + (off_t)(src->orig_ptr - src->base.seg_addr.lval); roffset = remote_handle->scif_offset + (off_t)(remote_address - remote_handle->scif_base);
loffset = dst->scif_offset + (off_t)(dst->orig_ptr - dst->base.seg_addr.lval); loffset = local_handle->scif_offset + (off_t)((intptr_t)local_address - local_handle->scif_base);
if (mca_btl_scif_component.rma_use_cpu) { if (mca_btl_scif_component.rma_use_cpu) {
flags = SCIF_RMA_USECPU; scif_flags = SCIF_RMA_USECPU;
} }
if (mca_btl_scif_component.rma_sync) { if (mca_btl_scif_component.rma_sync) {
flags |= SCIF_RMA_SYNC; scif_flags |= SCIF_RMA_SYNC;
} }
/* start the read */ /* start the read */
rc = scif_readfrom (endpoint->scif_epd, loffset, len, roffset, flags); rc = scif_readfrom (endpoint->scif_epd, loffset, size, roffset, scif_flags);
if (OPAL_UNLIKELY(-1 == rc)) { if (OPAL_UNLIKELY(-1 == rc)) {
return OPAL_ERROR; return OPAL_ERROR;
} }
/* always call the callback function */ if (!(scif_flags & SCIF_RMA_SYNC)) {
des->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
if (!(flags & SCIF_RMA_SYNC)) {
/* according to the scif documentation is is better to use a fence rather /* according to the scif documentation is is better to use a fence rather
* than using the SCIF_RMA_SYNC flag with scif_readfrom */ * than using the SCIF_RMA_SYNC flag with scif_readfrom */
scif_fence_mark (endpoint->scif_epd, SCIF_FENCE_INIT_SELF, &mark); scif_fence_mark (endpoint->scif_epd, SCIF_FENCE_INIT_SELF, &mark);
@ -76,8 +68,8 @@ int mca_btl_scif_get (struct mca_btl_base_module_t *btl,
mca_btl_scif_component.get_time_max, ts); mca_btl_scif_component.get_time_max, ts);
#endif #endif
/* since we completed the fence the RMA operation is complete */ /* always call the callback function */
mca_btl_scif_frag_complete ((mca_btl_scif_base_frag_t *) des, OPAL_SUCCESS); cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }

Просмотреть файл

@ -24,17 +24,14 @@ mca_btl_scif_free (struct mca_btl_base_module_t *btl,
static int static int
mca_btl_scif_module_finalize (struct mca_btl_base_module_t* btl); mca_btl_scif_module_finalize (struct mca_btl_base_module_t* btl);
static mca_btl_base_descriptor_t * static mca_btl_base_registration_handle_t *mca_btl_scif_register_mem (struct mca_btl_base_module_t *btl,
mca_btl_scif_prepare_dst (mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint, mca_btl_base_endpoint_t *endpoint,
mca_mpool_base_registration_t *registration, void *base, size_t size, uint32_t flags);
opal_convertor_t *convertor, uint8_t order, static int mca_btl_scif_deregister_mem (struct mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle);
size_t reserve, size_t *size, uint32_t flags);
static struct mca_btl_base_descriptor_t * static struct mca_btl_base_descriptor_t *
mca_btl_scif_prepare_src (struct mca_btl_base_module_t *btl, mca_btl_scif_prepare_src (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint, struct mca_btl_base_endpoint_t *endpoint,
mca_mpool_base_registration_t *registration,
struct opal_convertor_t *convertor, struct opal_convertor_t *convertor,
uint8_t order, size_t reserve, size_t *size, uint8_t order, size_t reserve, size_t *size,
uint32_t flags); uint32_t flags);
@ -48,11 +45,12 @@ mca_btl_scif_module_t mca_btl_scif_module = {
.btl_alloc = mca_btl_scif_alloc, .btl_alloc = mca_btl_scif_alloc,
.btl_free = mca_btl_scif_free, .btl_free = mca_btl_scif_free,
.btl_prepare_src = mca_btl_scif_prepare_src, .btl_prepare_src = mca_btl_scif_prepare_src,
.btl_prepare_dst = mca_btl_scif_prepare_dst,
.btl_send = mca_btl_scif_send, .btl_send = mca_btl_scif_send,
.btl_sendi = mca_btl_scif_sendi, .btl_sendi = mca_btl_scif_sendi,
.btl_put = mca_btl_scif_put, .btl_put = mca_btl_scif_put,
.btl_get = mca_btl_scif_get, .btl_get = mca_btl_scif_get,
.btl_register_mem = mca_btl_scif_register_mem,
.btl_deregister_mem = mca_btl_scif_deregister_mem,
} }
}; };
@ -163,10 +161,10 @@ mca_btl_scif_alloc(struct mca_btl_base_module_t *btl,
frag->base.des_flags = flags; frag->base.des_flags = flags;
frag->base.order = order; frag->base.order = order;
frag->base.des_segments = &frag->segments[0].base; frag->base.des_segments = frag->segments;
frag->base.des_segment_count = 1; frag->base.des_segment_count = 1;
frag->segments[0].base.seg_len = size; frag->segments[0].seg_len = size;
return &frag->base; return &frag->base;
} }
@ -178,16 +176,19 @@ mca_btl_scif_free (struct mca_btl_base_module_t *btl,
return mca_btl_scif_frag_return ((mca_btl_scif_base_frag_t *) des); return mca_btl_scif_frag_return ((mca_btl_scif_base_frag_t *) des);
} }
static inline mca_btl_base_descriptor_t *mca_btl_scif_prepare_dma (struct mca_btl_base_module_t *btl, static mca_btl_base_registration_handle_t *mca_btl_scif_register_mem (struct mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint, mca_btl_base_endpoint_t *endpoint,
void *data_ptr, size_t size, void *base, size_t size, uint32_t flags)
mca_mpool_base_registration_t *registration,
uint8_t order, uint32_t flags)
{ {
mca_btl_scif_base_frag_t *frag;
mca_btl_scif_reg_t *scif_reg; mca_btl_scif_reg_t *scif_reg;
int rc; int rc;
if (MCA_BTL_ENDPOINT_ANY == endpoint) {
/* it probably isn't possible to support registering memory to use with any endpoint so
* return NULL */
return NULL;
}
if (OPAL_LIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) { if (OPAL_LIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) {
/* the endpoint needs to be connected before the fragment can be /* the endpoint needs to be connected before the fragment can be
* registered. */ * registered. */
@ -198,67 +199,36 @@ static inline mca_btl_base_descriptor_t *mca_btl_scif_prepare_dma (struct mca_bt
} }
} }
(void) MCA_BTL_SCIF_FRAG_ALLOC_DMA(endpoint, frag); rc = btl->btl_mpool->mpool_register(btl->btl_mpool, base, size, 0,
if (OPAL_UNLIKELY(NULL == frag)) { (mca_mpool_base_registration_t **) &scif_reg);
return NULL;
}
if (NULL == registration) {
rc = btl->btl_mpool->mpool_register(btl->btl_mpool, data_ptr, size, 0,
(mca_mpool_base_registration_t **) &registration);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
mca_btl_scif_frag_return (frag);
return NULL; return NULL;
} }
frag->registration = (mca_btl_scif_reg_t *) registration;
}
scif_reg = (mca_btl_scif_reg_t *) registration;
/* register the memory location with this peer if it isn't already */ /* register the memory location with this peer if it isn't already */
if ((off_t) -1 == scif_reg->registrations[endpoint->id]) { if ((off_t) -1 == scif_reg->handles[endpoint->id].btl_handle.scif_offset) {
size_t seg_size = (size_t)((uintptr_t) registration->bound - (uintptr_t) registration->base) + 1; size_t seg_size = (size_t)((uintptr_t) scif_reg->base.bound - (uintptr_t) scif_reg->base.base) + 1;
scif_reg->registrations[endpoint->id] = scif_register (endpoint->scif_epd, registration->base,
seg_size, 0, SCIF_PROT_READ | /* NTH: until we determine a way to pass permissions to the mpool just make all segments
* read/write */
scif_reg->handles[endpoint->id].btl_handle.scif_offset =
scif_register (endpoint->scif_epd, scif_reg->base.base, seg_size, 0, SCIF_PROT_READ |
SCIF_PROT_WRITE, 0); SCIF_PROT_WRITE, 0);
BTL_VERBOSE(("registered fragment for scif DMA transaction. offset = %lu", BTL_VERBOSE(("registered fragment for scif DMA transaction. offset = %lu",
(unsigned long) scif_reg->registrations[endpoint->id])); (unsigned long) scif_reg->handles[endpoint->id].btl_handle.scif_offset));
} }
if (OPAL_UNLIKELY((off_t) -1 == scif_reg->registrations[endpoint->id])) { return &scif_reg->handles[endpoint->id].btl_handle;
mca_btl_scif_frag_return (frag);
return NULL;
}
frag->segments[0].base.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
frag->segments[0].base.seg_len = size;
frag->segments[0].scif_offset = scif_reg->registrations[endpoint->id] +
(off_t) ((ptrdiff_t) data_ptr - (ptrdiff_t) registration->base);
/* save the original pointer so the offset can be adjusted if needed (this is
* required for osc/rdma) */
frag->segments[0].orig_ptr = (uint64_t)(uintptr_t) data_ptr;
frag->base.order = order;
frag->base.des_flags = flags;
frag->base.des_segments = &frag->segments->base;
frag->base.des_segment_count = 1;
return &frag->base;
} }
static inline mca_btl_base_descriptor_t *mca_btl_scif_prepare_dma_conv (struct mca_btl_base_module_t *btl, static int mca_btl_scif_deregister_mem (struct mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle)
mca_btl_base_endpoint_t *endpoint,
mca_mpool_base_registration_t *registration,
struct opal_convertor_t *convertor,
uint8_t order, size_t *size,
uint32_t flags)
{ {
void *data_ptr; mca_btl_scif_registration_handle_t *scif_handle = (mca_btl_scif_registration_handle_t *) handle;
mca_btl_scif_reg_t *scif_reg = scif_handle->reg;
opal_convertor_get_current_pointer (convertor, &data_ptr); btl->btl_mpool->mpool_deregister (btl->btl_mpool, &scif_reg->base);
return mca_btl_scif_prepare_dma (btl, endpoint, data_ptr, *size, registration, order, flags); return OPAL_SUCCESS;
} }
static inline struct mca_btl_base_descriptor_t * static inline struct mca_btl_base_descriptor_t *
@ -286,9 +256,9 @@ mca_btl_scif_prepare_src_send (struct mca_btl_base_module_t *btl,
return NULL; return NULL;
} }
frag->segments[0].base.seg_len = reserve; frag->segments[0].seg_len = reserve;
frag->segments[1].base.seg_addr.pval = data_ptr; frag->segments[1].seg_addr.pval = data_ptr;
frag->segments[1].base.seg_len = *size; frag->segments[1].seg_len = *size;
frag->base.des_segment_count = 2; frag->base.des_segment_count = 2;
} else { } else {
/* buffered send */ /* buffered send */
@ -299,7 +269,7 @@ mca_btl_scif_prepare_src_send (struct mca_btl_base_module_t *btl,
if (*size) { if (*size) {
iov.iov_len = *size; iov.iov_len = *size;
iov.iov_base = (IOVBASE_TYPE *) ((uintptr_t) frag->segments[0].base.seg_addr.pval + reserve); iov.iov_base = (IOVBASE_TYPE *) ((uintptr_t) frag->segments[0].seg_addr.pval + reserve);
rc = opal_convertor_pack (convertor, &iov, &iov_count, &max_size); rc = opal_convertor_pack (convertor, &iov, &iov_count, &max_size);
if (OPAL_UNLIKELY(rc < 0)) { if (OPAL_UNLIKELY(rc < 0)) {
@ -309,11 +279,11 @@ mca_btl_scif_prepare_src_send (struct mca_btl_base_module_t *btl,
*size = max_size; *size = max_size;
} }
frag->segments[0].base.seg_len = reserve + *size; frag->segments[0].seg_len = reserve + *size;
frag->base.des_segment_count = 1; frag->base.des_segment_count = 1;
} }
frag->base.des_segments = &frag->segments->base; frag->base.des_segments = frag->segments;
frag->base.order = order; frag->base.order = order;
frag->base.des_flags = flags; frag->base.des_flags = flags;
@ -322,24 +292,9 @@ mca_btl_scif_prepare_src_send (struct mca_btl_base_module_t *btl,
static mca_btl_base_descriptor_t *mca_btl_scif_prepare_src (struct mca_btl_base_module_t *btl, static mca_btl_base_descriptor_t *mca_btl_scif_prepare_src (struct mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint, mca_btl_base_endpoint_t *endpoint,
mca_mpool_base_registration_t *registration,
struct opal_convertor_t *convertor, struct opal_convertor_t *convertor,
uint8_t order, size_t reserve, size_t *size, uint8_t order, size_t reserve, size_t *size,
uint32_t flags) uint32_t flags)
{ {
if (OPAL_LIKELY(reserve)) { return mca_btl_scif_prepare_src_send (btl, endpoint, convertor, order, reserve, size, flags);
return mca_btl_scif_prepare_src_send (btl, endpoint, convertor,
order, reserve, size, flags);
} else {
return mca_btl_scif_prepare_dma_conv (btl, endpoint, registration, convertor, order, size, flags);
}
}
static mca_btl_base_descriptor_t *mca_btl_scif_prepare_dst (mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint,
mca_mpool_base_registration_t *registration,
opal_convertor_t *convertor, uint8_t order,
size_t reserve, size_t *size, uint32_t flags)
{
return mca_btl_scif_prepare_dma_conv (btl, endpoint, registration, convertor, order, size, flags);
} }

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -16,63 +16,57 @@
/** /**
* Initiate a put operation. * Initiate a put operation.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/ */
int mca_btl_scif_put (struct mca_btl_base_module_t *btl, int mca_btl_scif_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des) { mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
mca_btl_scif_segment_t *src = (mca_btl_scif_segment_t *) des->des_segments; int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
mca_btl_scif_segment_t *dst = (mca_btl_scif_segment_t *) des->des_remote; {
size_t len = lmin (src->base.seg_len, dst->base.seg_len); int rc, mark, scif_flags = 0;
int rc, mark, flags = 0;
off_t roffset, loffset; off_t roffset, loffset;
#if defined(SCIF_TIMING) #if defined(SCIF_TIMING)
struct timespec ts; struct timespec ts;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
mca_btl_scif_component.put_count++; mca_btl_scif_component.get_count++;
#endif #endif
BTL_VERBOSE(("Using DMA Put for frag %p", (void *) des)); BTL_VERBOSE(("Using DMA Put from local address %p to remote address %" PRIx64,
local_address, remote_address));
roffset = dst->scif_offset + (off_t)(dst->orig_ptr - dst->base.seg_addr.lval); roffset = remote_handle->scif_offset + (off_t)(remote_address - remote_handle->scif_base);
loffset = src->scif_offset + (off_t)(src->orig_ptr - src->base.seg_addr.lval); loffset = local_handle->scif_offset + (off_t)((intptr_t) local_address - local_handle->scif_base);
if (mca_btl_scif_component.rma_use_cpu) { if (mca_btl_scif_component.rma_use_cpu) {
flags = SCIF_RMA_USECPU; scif_flags = SCIF_RMA_USECPU;
} }
if (mca_btl_scif_component.rma_sync) { if (mca_btl_scif_component.rma_sync) {
flags |= SCIF_RMA_SYNC; scif_flags |= SCIF_RMA_SYNC;
} }
/* start the write */ /* start the write */
rc = scif_writeto (endpoint->scif_epd, loffset, len, roffset, flags); rc = scif_writeto (endpoint->scif_epd, loffset, size, roffset, scif_flags);
rc = scif_readfrom (endpoint->scif_epd, loffset, size, roffset, scif_flags);
if (OPAL_UNLIKELY(-1 == rc)) { if (OPAL_UNLIKELY(-1 == rc)) {
return OPAL_ERROR; return OPAL_ERROR;
} }
/* always call the callback function */ if (!(scif_flags & SCIF_RMA_SYNC)) {
des->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
/* according to the scif documentation is is better to use a fence rather /* according to the scif documentation is is better to use a fence rather
* than using the SCIF_RMA_SYNC flag with scif_writeto */ * than using the SCIF_RMA_SYNC flag with scif_readfrom */
if (!(flags & SCIF_RMA_SYNC)) {
scif_fence_mark (endpoint->scif_epd, SCIF_FENCE_INIT_SELF, &mark); scif_fence_mark (endpoint->scif_epd, SCIF_FENCE_INIT_SELF, &mark);
scif_fence_wait (endpoint->scif_epd, mark); scif_fence_wait (endpoint->scif_epd, mark);
} }
#if defined(SCIF_TIMING) #if defined(SCIF_TIMING)
SCIF_UPDATE_TIMER(mca_btl_scif_component.put_time, SCIF_UPDATE_TIMER(mca_btl_scif_component.get_time,
mca_btl_scif_component.put_time_max, ts); mca_btl_scif_component.get_time_max, ts);
#endif #endif
/* since we completed the fence the RMA operation is complete */ /* always call the callback function */
mca_btl_scif_frag_complete ((mca_btl_scif_base_frag_t *) des, OPAL_SUCCESS); cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }

Просмотреть файл

@ -118,22 +118,22 @@ static int mca_btl_scif_send_frag (struct mca_btl_base_endpoint_t *endpoint,
unsigned char * restrict dst; unsigned char * restrict dst;
BTL_VERBOSE(("btl/scif sending descriptor %p from %d -> %d. length = %" PRIu64, (void *) frag, BTL_VERBOSE(("btl/scif sending descriptor %p from %d -> %d. length = %" PRIu64, (void *) frag,
OPAL_PROC_MY_NAME.vpid, endpoint->peer_proc->proc_name.vpid, frag->segments[0].base.seg_len)); opal_process_name_vpid(OPAL_PROC_MY_NAME), opal_process_name_vpid(endpoint->peer_proc->proc_name), frag->segments[0].seg_len));
if (OPAL_LIKELY(OPAL_SUCCESS == mca_btl_scif_send_get_buffer (endpoint, size, &dst))) { if (OPAL_LIKELY(OPAL_SUCCESS == mca_btl_scif_send_get_buffer (endpoint, size, &dst))) {
unsigned char * restrict data = (unsigned char * restrict) frag->segments[0].base.seg_addr.pval; unsigned char * restrict data = (unsigned char * restrict) frag->segments[0].seg_addr.pval;
#if defined(SCIF_TIMING) #if defined(SCIF_TIMING)
struct timespec ts; struct timespec ts;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
#endif #endif
memcpy (dst + sizeof (frag->hdr), data, frag->segments[0].base.seg_len); memcpy (dst + sizeof (frag->hdr), data, frag->segments[0].seg_len);
if (frag->segments[1].base.seg_len) { if (frag->segments[1].seg_len) {
memcpy (dst + sizeof (frag->hdr) + frag->segments[0].base.seg_len, memcpy (dst + sizeof (frag->hdr) + frag->segments[0].seg_len,
frag->segments[1].base.seg_addr.pval, frag->segments[1].seg_addr.pval,
frag->segments[1].base.seg_len); frag->segments[1].seg_len);
} }
#if defined(SCIF_USE_SEQ) #if defined(SCIF_USE_SEQ)
@ -165,7 +165,7 @@ int mca_btl_scif_send (struct mca_btl_base_module_t *btl,
mca_btl_base_tag_t tag) mca_btl_base_tag_t tag)
{ {
mca_btl_scif_base_frag_t *frag = (mca_btl_scif_base_frag_t *) descriptor; mca_btl_scif_base_frag_t *frag = (mca_btl_scif_base_frag_t *) descriptor;
size_t size = frag->segments[0].base.seg_len + frag->segments[1].base.seg_len; size_t size = frag->segments[0].seg_len + frag->segments[1].seg_len;
int rc; int rc;
frag->hdr.tag = tag; frag->hdr.tag = tag;

Просмотреть файл

@ -38,17 +38,15 @@
#include "btl_self_frag.h" #include "btl_self_frag.h"
#include "opal/util/proc.h" #include "opal/util/proc.h"
int mca_btl_self_put (struct mca_btl_base_module_t *btl, static int mca_btl_self_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
uint64_t remote_address, struct mca_btl_base_registration_handle_t *local_handle, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
int mca_btl_self_get (struct mca_btl_base_module_t *btl, static int mca_btl_self_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
uint64_t remote_address, struct mca_btl_base_registration_handle_t *local_handle, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
mca_btl_base_module_t mca_btl_self = { mca_btl_base_module_t mca_btl_self = {
.btl_component = &mca_btl_self_component.super, .btl_component = &mca_btl_self_component.super,
@ -176,7 +174,6 @@ int mca_btl_self_free( struct mca_btl_base_module_t* btl,
struct mca_btl_base_descriptor_t* struct mca_btl_base_descriptor_t*
mca_btl_self_prepare_src( struct mca_btl_base_module_t* btl, mca_btl_self_prepare_src( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint, struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor, struct opal_convertor_t* convertor,
uint8_t order, uint8_t order,
size_t reserve, size_t reserve,
@ -268,11 +265,10 @@ int mca_btl_self_send( struct mca_btl_base_module_t* btl,
} }
int mca_btl_self_put (struct mca_btl_base_module_t *btl, static int mca_btl_self_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
uint64_t remote_address, struct mca_btl_base_registration_handle_t *local_handle, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
memcpy ((void *)(intptr_t) remote_address, local_address, size); memcpy ((void *)(intptr_t) remote_address, local_address, size);
@ -281,11 +277,10 @@ int mca_btl_self_put (struct mca_btl_base_module_t *btl,
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
int mca_btl_self_get (struct mca_btl_base_module_t *btl, static int mca_btl_self_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
uint64_t remote_address, struct mca_btl_base_registration_handle_t *local_handle, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
memcpy (local_address, (void *)(intptr_t) remote_address, size); memcpy (local_address, (void *)(intptr_t) remote_address, size);

Просмотреть файл

@ -165,24 +165,6 @@ int mca_btl_self_free(
struct mca_btl_base_descriptor_t* mca_btl_self_prepare_src( struct mca_btl_base_descriptor_t* mca_btl_self_prepare_src(
struct mca_btl_base_module_t* btl, struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint, struct mca_btl_base_endpoint_t* endpoint,
struct mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags
);
/**
* Prepare data for RDMA
*
* @param btl (IN) BTL module
* @param peer (IN) BTL peer addressing
*/
struct mca_btl_base_descriptor_t* mca_btl_self_prepare_dst(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor, struct opal_convertor_t* convertor,
uint8_t order, uint8_t order,
size_t reserve, size_t reserve,

Просмотреть файл

@ -743,7 +743,6 @@ extern int mca_btl_sm_free(
struct mca_btl_base_descriptor_t* mca_btl_sm_prepare_src( struct mca_btl_base_descriptor_t* mca_btl_sm_prepare_src(
struct mca_btl_base_module_t* btl, struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint, struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor, struct opal_convertor_t* convertor,
uint8_t order, uint8_t order,
size_t reserve, size_t reserve,
@ -999,44 +998,77 @@ int mca_btl_sm_send( struct mca_btl_base_module_t* btl,
} }
#if OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA #if OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA
struct mca_btl_base_descriptor_t* mca_btl_sm_prepare_dst( mca_btl_base_registration_handle_t *mca_btl_sm_register_mem (struct mca_btl_base_module_t* btl,
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint, struct mca_btl_base_endpoint_t* endpoint,
struct mca_mpool_base_registration_t* registration, void *base, size_t size, uint32_t flags)
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags)
{ {
void *ptr; mca_btl_sm_registration_handle_t *handle = NULL;
mca_btl_sm_frag_t* frag;
MCA_BTL_SM_FRAG_ALLOC_USER(frag); OMPI_FREE_LIST_GET_MT(&mca_btl_sm_component.registration_handles, &handle);
if(OPAL_UNLIKELY(NULL == frag)) { if (OPAL_UNLIKELY(NULL == handle)) {
return NULL; return NULL;
} }
frag->segment.base.seg_len = *size; #if OPAL_BTL_SM_HAVE_KNEM
opal_convertor_get_current_pointer( convertor, &ptr ); if (OPAL_LIKELY(mca_btl_sm_component.use_knem)) {
frag->segment.base.seg_addr.lval = (uint64_t)(uintptr_t) ptr; knem_iov.base = (uintptr_t)base & (opal_getpagesize() - 1);
knem_iov.len = OPAL_ALIGN(size + ((intptr_t) base - knem_iov.base), opal_getpagesize());
knem_cr.iovec_array = (uintptr_t)&knem_iov;
knem_cr.iovec_nr = iov_count;
knem_cr.flags = 0;
knem_cr.protection = 0;
frag->base.des_segments = (mca_btl_base_segment_t*)&frag->segment; if (flags & MCA_BTL_REG_FLAG_REMOTE_READ) {
frag->base.des_segment_count = 1; knem_cr.protection |= PROT_READ;
frag->base.des_flags = flags; }
return &frag->base; if (flags & MCA_BTL_REG_FLAG_REMOTE_WRITE) {
knem_cr.protection |= PROT_WRITE;
}
if (OPAL_UNLIKELY(ioctl(sm_btl->knem_fd, KNEM_CMD_CREATE_REGION, &knem_cr) < 0)) {
return NULL;
}
handle->btl_handle.data.knem.cookie = knem_cr.cookie;
handle->btl_handle.data.knem.base_addr = knem_iov.base;
} else
#endif
{
/* the pid could be included in a modex but this will work until btl/sm is
* deleted */
handle->btl_handle.data.pid = getpid ();
}
/* return the public part of the handle */
return &handle->btl_handle;
} }
void mca_btl_sm_deregister_mem (struct mca_btl_base_module_t* btl, mca_btl_base_registration_handle_t *handle)
{
mca_btl_sm_registration_handle_t *sm_handle =
(mca_btl_sm_registration_handle_t *)((intptr_t) handle - offsetof (mca_btl_sm_registration_handle_t, btl_handle));
#if OPAL_BTL_SM_HAVE_KNEM
if (OPAL_LIKELY(mca_btl_sm_component.use_knem)) {
(void) ioctl(sm_btl->knem_fd, KNEM_CMD_DESTROY_REGION, &handle->cookie);
}
#endif
OMPI_FREE_LIST_RETURN_MT(&mca_btl_sm_component.registration_handles, &sm_handle->super);
return OPAL_SUCCESS;
}
#endif /* OPAL_BTL_SM_HAVE_KNEM */
#if OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA
/** /**
* Initiate an synchronous get. * Initiate an synchronous get.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/ */
int mca_btl_sm_get_sync(struct mca_btl_base_module_t* btl, int mca_btl_sm_get_sync (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t* endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t* des) mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
int btl_ownership; int btl_ownership;
mca_btl_sm_frag_t* frag = (mca_btl_sm_frag_t*)des; mca_btl_sm_frag_t* frag = (mca_btl_sm_frag_t*)des;
@ -1050,12 +1082,12 @@ int mca_btl_sm_get_sync(struct mca_btl_base_module_t* btl,
/* Fill in the ioctl data fields. There's no async completion, so /* Fill in the ioctl data fields. There's no async completion, so
we don't need to worry about getting a slot, etc. */ we don't need to worry about getting a slot, etc. */
recv_iovec.base = (uintptr_t) dst->base.seg_addr.lval; recv_iovec.base = (uintptr_t) local_address;
recv_iovec.len = dst->base.seg_len; recv_iovec.len = size;
icopy.local_iovec_array = (uintptr_t)&recv_iovec; icopy.local_iovec_array = (uintptr_t)&recv_iovec;
icopy.local_iovec_nr = 1; icopy.local_iovec_nr = 1;
icopy.remote_cookie = src->key; icopy.remote_cookie = remote_handle->data.knem.cookie;
icopy.remote_offset = 0; icopy.remote_offset = remote_address - remote_handle->base_addr;
icopy.write = 0; icopy.write = 0;
/* Use the DMA flag if knem supports it *and* the segment length /* Use the DMA flag if knem supports it *and* the segment length
@ -1063,7 +1095,7 @@ int mca_btl_sm_get_sync(struct mca_btl_base_module_t* btl,
value is 0 (i.e., the MCA param was set to 0), the segment size value is 0 (i.e., the MCA param was set to 0), the segment size
will never be larger than it, so DMA will never be used. */ will never be larger than it, so DMA will never be used. */
icopy.flags = 0; icopy.flags = 0;
if (mca_btl_sm_component.knem_dma_min <= dst->base.seg_len) { if (mca_btl_sm_component.knem_dma_min <= size) {
icopy.flags = mca_btl_sm_component.knem_dma_flag; icopy.flags = mca_btl_sm_component.knem_dma_flag;
} }
/* synchronous flags only, no need to specify icopy.async_status_index */ /* synchronous flags only, no need to specify icopy.async_status_index */
@ -1081,23 +1113,18 @@ int mca_btl_sm_get_sync(struct mca_btl_base_module_t* btl,
#if OPAL_BTL_SM_HAVE_CMA #if OPAL_BTL_SM_HAVE_CMA
if (OPAL_LIKELY(mca_btl_sm_component.use_cma)) { if (OPAL_LIKELY(mca_btl_sm_component.use_cma)) {
char *remote_address, *local_address;
int remote_length, local_length;
struct iovec local, remote; struct iovec local, remote;
pid_t remote_pid; pid_t remote_pid;
int val; int val;
remote_address = (char *)(uintptr_t) src->base.seg_addr.lval;
remote_length = src->base.seg_len;
local_address = (char *)(uintptr_t) dst->base.seg_addr.lval; local_address = (char *)(uintptr_t) dst->base.seg_addr.lval;
local_length = dst->base.seg_len; local_length = dst->base.seg_len;
remote_pid = src->key; remote_pid = remote_handle->data.pid;
remote.iov_base = remote_address; remote.iov_base = remote_address;
remote.iov_len = remote_length; remote.iov_len = size;
local.iov_base = local_address; local.iov_base = local_address;
local.iov_len = local_length; local.iov_len = size;
val = process_vm_readv(remote_pid, &local, 1, &remote, 1, 0); val = process_vm_readv(remote_pid, &local, 1, &remote, 1, 0);
@ -1115,15 +1142,7 @@ int mca_btl_sm_get_sync(struct mca_btl_base_module_t* btl,
} }
#endif /* OPAL_BTL_SM_HAVE_CMA */ #endif /* OPAL_BTL_SM_HAVE_CMA */
btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
if (0 != (MCA_BTL_DES_SEND_ALWAYS_CALLBACK & frag->base.des_flags)) {
frag->base.des_cbfunc(&mca_btl_sm.super,
frag->endpoint, &frag->base,
OPAL_SUCCESS);
}
if (btl_ownership) {
MCA_BTL_SM_FRAG_RETURN(frag);
}
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
@ -1135,33 +1154,44 @@ int mca_btl_sm_get_sync(struct mca_btl_base_module_t* btl,
/** /**
* Initiate an asynchronous get. * Initiate an asynchronous get.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/ */
int mca_btl_sm_get_async(struct mca_btl_base_module_t* btl, int mca_btl_sm_get_async (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t* endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t* des) mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
int btl_ownership; int btl_ownership;
mca_btl_sm_t* sm_btl = (mca_btl_sm_t*) btl; mca_btl_sm_t* sm_btl = (mca_btl_sm_t*) btl;
mca_btl_sm_frag_t* frag = (mca_btl_sm_frag_t*)des; mca_btl_sm_frag_t* frag;
mca_btl_sm_segment_t *src = (mca_btl_sm_segment_t*)des->des_remote; mca_btl_sm_segment_t *src = (mca_btl_sm_segment_t*)des->des_remote;
mca_btl_sm_segment_t *dst = (mca_btl_sm_segment_t*)des->des_segments; mca_btl_sm_segment_t *dst = (mca_btl_sm_segment_t*)des->des_segments;
struct knem_cmd_inline_copy icopy; struct knem_cmd_inline_copy icopy;
struct knem_cmd_param_iovec recv_iovec; struct knem_cmd_param_iovec recv_iovec;
/* If we have no knem slots available, return /* If we have no knem slots available, fall back to synchronous */
TEMP_OUT_OF_RESOURCE */
if (sm_btl->knem_status_num_used >= if (sm_btl->knem_status_num_used >=
mca_btl_sm_component.knem_max_simultaneous) { mca_btl_sm_component.knem_max_simultaneous) {
return OPAL_ERR_TEMP_OUT_OF_RESOURCE; return mca_btl_sm_get_sync (btl, endpoint, local_address, remote_address, local_handle,
remote_handle, size, flags, cbfunc, cbcontext, cbdata);
} }
/* allocate a fragment to keep track of this transaction */
MCA_BTL_SM_FRAG_ALLOC_USER(frag);
if (OPAL_UNLIKELY(NULL == frag)) {
return mca_btl_sm_get_sync (btl, endpoint, local_address, remote_address, local_handle,
remote_handle, size, flags, cbfunc, cbcontext, cbdata);
}
/* fill in callback data */
frag->cb.func = cbfunc;
frag->cb.context = cbcontext;
frag->cb.data = cbdata;
frag->cb.local_address = local_address;
frag->cb.local_handle = local_handle;
/* We have a slot, so fill in the data fields. Bump the /* We have a slot, so fill in the data fields. Bump the
first_avail and num_used counters. */ first_avail and num_used counters. */
recv_iovec.base = (uintptr_t) dst->base.seg_addr.lval; recv_iovec.base = (uintptr_t) local_address;
recv_iovec.len = dst->base.seg_len; recv_iovec.len = dst->base.seg_len;
icopy.local_iovec_array = (uintptr_t)&recv_iovec; icopy.local_iovec_array = (uintptr_t)&recv_iovec;
icopy.local_iovec_nr = 1; icopy.local_iovec_nr = 1;
@ -1172,8 +1202,8 @@ int mca_btl_sm_get_async(struct mca_btl_base_module_t* btl,
sm_btl->knem_status_first_avail = 0; sm_btl->knem_status_first_avail = 0;
} }
++sm_btl->knem_status_num_used; ++sm_btl->knem_status_num_used;
icopy.remote_cookie = src->key; icopy.remote_cookie = remote_handle->data.knem.cookie;
icopy.remote_offset = 0; icopy.remote_offset = remote_address - remote_handle->data.knem.base_addr;
/* Use the DMA flag if knem supports it *and* the segment length /* Use the DMA flag if knem supports it *and* the segment length
is greater than the cutoff */ is greater than the cutoff */
@ -1186,19 +1216,11 @@ int mca_btl_sm_get_async(struct mca_btl_base_module_t* btl,
if (OPAL_LIKELY(0 == ioctl(sm_btl->knem_fd, if (OPAL_LIKELY(0 == ioctl(sm_btl->knem_fd,
KNEM_CMD_INLINE_COPY, &icopy))) { KNEM_CMD_INLINE_COPY, &icopy))) {
if (icopy.current_status != KNEM_STATUS_PENDING) { if (icopy.current_status != KNEM_STATUS_PENDING) {
MCA_BTL_SM_FRAG_RETURN(frag);
/* request completed synchronously */ /* request completed synchronously */
/* FIXME: what if icopy.current_status == KNEM_STATUS_FAILED? */ /* FIXME: what if icopy.current_status == KNEM_STATUS_FAILED? */
cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
if (0 != (MCA_BTL_DES_SEND_ALWAYS_CALLBACK & frag->base.des_flags)) {
frag->base.des_cbfunc(&mca_btl_sm.super,
frag->endpoint, &frag->base,
OPAL_SUCCESS);
}
if (btl_ownership) {
MCA_BTL_SM_FRAG_RETURN(frag);
}
--sm_btl->knem_status_num_used; --sm_btl->knem_status_num_used;
++sm_btl->knem_status_first_used; ++sm_btl->knem_status_first_used;

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -11,7 +12,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved.
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2013 Los Alamos National Security, LLC. * Copyright (c) 2010-2014 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* Copyright (c) 2010-2012 IBM Corporation. All rights reserved. * Copyright (c) 2010-2012 IBM Corporation. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
@ -182,6 +183,8 @@ struct mca_btl_sm_component_t {
#if OPAL_BTL_SM_HAVE_KNEM #if OPAL_BTL_SM_HAVE_KNEM
/* Knem capabilities info */ /* Knem capabilities info */
struct knem_cmd_info knem_info; struct knem_cmd_info knem_info;
/** registration handles to hold knem cookies */
ompi_free_list_t registration_handles;
#endif /* OPAL_BTL_SM_HAVE_KNEM */ #endif /* OPAL_BTL_SM_HAVE_KNEM */
/** MCA: should we be using knem or not? neg=try but continue if /** MCA: should we be using knem or not? neg=try but continue if
@ -461,7 +464,6 @@ extern int mca_btl_sm_free(
struct mca_btl_base_descriptor_t* mca_btl_sm_prepare_src( struct mca_btl_base_descriptor_t* mca_btl_sm_prepare_src(
struct mca_btl_base_module_t* btl, struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint, struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor, struct opal_convertor_t* convertor,
uint8_t order, uint8_t order,
size_t reserve, size_t reserve,
@ -504,30 +506,20 @@ extern int mca_btl_sm_send(
/* /*
* Synchronous knem/cma get * Synchronous knem/cma get
*/ */
extern int mca_btl_sm_get_sync( int mca_btl_sm_get_sync (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_module_t* btl, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_endpoint_t* endpoint, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
struct mca_btl_base_descriptor_t* des ); int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
extern struct mca_btl_base_descriptor_t* mca_btl_sm_prepare_dst(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags);
#endif /* OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA */ #endif /* OPAL_BTL_SM_HAVE_KNEM || OPAL_BTL_SM_HAVE_CMA */
#if OPAL_BTL_SM_HAVE_KNEM #if OPAL_BTL_SM_HAVE_KNEM
/* /*
* Asynchronous knem get * Asynchronous knem get
*/ */
extern int mca_btl_sm_get_async( int mca_btl_sm_get_async (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_module_t* btl, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_endpoint_t* endpoint, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
struct mca_btl_base_descriptor_t* des ); int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
#endif /* OPAL_BTL_SM_HAVE_KNEM */ #endif /* OPAL_BTL_SM_HAVE_KNEM */
@ -558,6 +550,32 @@ void mca_btl_sm_component_event_thread(opal_object_t*);
#define MCA_BTL_SM_SIGNAL_PEER(peer) #define MCA_BTL_SM_SIGNAL_PEER(peer)
#endif #endif
#if OPAL_BTL_SM_HAVE_KNEM | OPAL_BTL_SM_HAVE_CMA
struct mca_btl_base_registration_handle_t {
union {
struct {
uint64_t cookie;
intptr_t base_addr;
} knem;
pid_t pid;
} data;
};
struct mca_btl_sm_registration_handle_t {
ompi_free_list_item_t super;
mca_btl_base_registration_handle_t btl_handle;
};
typedef struct mca_btl_sm_registration_handle_t mca_btl_sm_registration_handle_t;
OBJ_CLASS_DECLARATION(mca_btl_sm_registration_handle_t);
mca_btl_base_registration_handle_t *mca_btl_sm_register_mem (struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
void *base, size_t size, uint32_t flags);
void mca_btl_sm_deregister_mem (struct mca_btl_base_module_t* btl, mca_btl_base_registration_handle_t *handle);
#endif
END_C_DECLS END_C_DECLS
#endif #endif

Просмотреть файл

@ -254,6 +254,10 @@ static int sm_register(void)
mca_btl_sm.super.btl_bandwidth = 9000; /* Mbs */ mca_btl_sm.super.btl_bandwidth = 9000; /* Mbs */
mca_btl_sm.super.btl_latency = 1; /* Microsecs */ mca_btl_sm.super.btl_latency = 1; /* Microsecs */
#if OPAL_BTL_SM_HAVE_KNEM
mca_btl_sm.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);
#endif
/* Call the BTL based to register its MCA params */ /* Call the BTL based to register its MCA params */
mca_btl_base_param_register(&mca_btl_sm_component.super.btl_version, mca_btl_base_param_register(&mca_btl_sm_component.super.btl_version,
&mca_btl_sm.super); &mca_btl_sm.super);
@ -295,6 +299,8 @@ static int mca_btl_sm_component_open(void)
mca_btl_sm_component.sm_seg = NULL; mca_btl_sm_component.sm_seg = NULL;
#if OPAL_BTL_SM_HAVE_KNEM #if OPAL_BTL_SM_HAVE_KNEM
OBJ_CONSTRICT(&mca_btl_sm_component.registration_handles, ompi_free_list_t);
mca_btl_sm.knem_fd = -1; mca_btl_sm.knem_fd = -1;
mca_btl_sm.knem_status_array = NULL; mca_btl_sm.knem_status_array = NULL;
mca_btl_sm.knem_frag_array = NULL; mca_btl_sm.knem_frag_array = NULL;
@ -329,6 +335,8 @@ static int mca_btl_sm_component_close(void)
close(mca_btl_sm.knem_fd); close(mca_btl_sm.knem_fd);
mca_btl_sm.knem_fd = -1; mca_btl_sm.knem_fd = -1;
} }
OBJ_DESTRUCT(&mca_btl_sm_component.registration_handles);
#endif /* OPAL_BTL_SM_HAVE_KNEM */ #endif /* OPAL_BTL_SM_HAVE_KNEM */
OBJ_DESTRUCT(&mca_btl_sm_component.sm_lock); OBJ_DESTRUCT(&mca_btl_sm_component.sm_lock);
@ -903,6 +911,9 @@ mca_btl_sm_component_init(int *num_btls,
} else { } else {
mca_btl_sm.super.btl_get = mca_btl_sm_get_sync; mca_btl_sm.super.btl_get = mca_btl_sm_get_sync;
} }
mca_btl_sm.super.btl_register_mem = mca_btl_sm_register_mem;
mca_btl_sm.super.btl_deregister_mem = mca_btl_sm_deregister_mem;
} }
#else #else
/* If the user explicitly asked for knem and we can't provide it, /* If the user explicitly asked for knem and we can't provide it,
@ -917,6 +928,8 @@ mca_btl_sm_component_init(int *num_btls,
/* Will only ever have either cma or knem enabled at runtime /* Will only ever have either cma or knem enabled at runtime
so no problems with accidentally overwriting this set earlier */ so no problems with accidentally overwriting this set earlier */
mca_btl_sm.super.btl_get = mca_btl_sm_get_sync; mca_btl_sm.super.btl_get = mca_btl_sm_get_sync;
mca_btl_sm.super.btl_register_mem = mca_btl_sm_register_mem;
mca_btl_sm.super.btl_deregister_mem = mca_btl_sm_deregister_mem;
} }
#else #else
/* If the user explicitly asked for CMA and we can't provide itm /* If the user explicitly asked for CMA and we can't provide itm
@ -1175,22 +1188,14 @@ int mca_btl_sm_component_progress(void)
mca_btl_sm.knem_status_array[mca_btl_sm.knem_status_first_used]) { mca_btl_sm.knem_status_array[mca_btl_sm.knem_status_first_used]) {
if (KNEM_STATUS_SUCCESS == if (KNEM_STATUS_SUCCESS ==
mca_btl_sm.knem_status_array[mca_btl_sm.knem_status_first_used]) { mca_btl_sm.knem_status_array[mca_btl_sm.knem_status_first_used]) {
int btl_ownership;
/* Handle the completed fragment */ /* Handle the completed fragment */
frag = frag =
mca_btl_sm.knem_frag_array[mca_btl_sm.knem_status_first_used]; mca_btl_sm.knem_frag_array[mca_btl_sm.knem_status_first_used];
btl_ownership = (frag->base.des_flags & frag.cb.func (&mca_btl_sm.super, frag->endpoint,
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); frag->cb.local_address, frag->cb.local_handle,
if (0 != (MCA_BTL_DES_SEND_ALWAYS_CALLBACK & frag->cb.context, frag->cb.data, OPAL_SUCCESS);
frag->base.des_flags)) {
frag->base.des_cbfunc(&mca_btl_sm.super,
frag->endpoint, &frag->base,
OPAL_SUCCESS);
}
if (btl_ownership) {
MCA_BTL_SM_FRAG_RETURN(frag); MCA_BTL_SM_FRAG_RETURN(frag);
}
/* Bump counters, loop around the circular buffer if /* Bump counters, loop around the circular buffer if
necessary */ necessary */

Просмотреть файл

@ -10,6 +10,8 @@
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved.
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
@ -11,6 +12,8 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -64,6 +67,16 @@ struct mca_btl_sm_frag_t {
/* pointer written to the FIFO, this is the base of the shared memory region */ /* pointer written to the FIFO, this is the base of the shared memory region */
mca_btl_sm_hdr_t *hdr; mca_btl_sm_hdr_t *hdr;
ompi_free_list_t* my_list; ompi_free_list_t* my_list;
#if OPAL_BTL_SM_HAVE_KNEM
/* rdma callback data. required for async get */
struct {
mca_btl_base_rdma_completion_fn_t func;
void *local_address;
struct mca_btl_base_registration_handle_t *local_handle;
void *context;
void *data;
} cb;
#endif
}; };
typedef struct mca_btl_sm_frag_t mca_btl_sm_frag_t; typedef struct mca_btl_sm_frag_t mca_btl_sm_frag_t;
typedef struct mca_btl_sm_frag_t mca_btl_sm_frag1_t; typedef struct mca_btl_sm_frag_t mca_btl_sm_frag1_t;

Просмотреть файл

@ -42,7 +42,6 @@ mca_btl_tcp_module_t mca_btl_tcp_module = {
.btl_alloc = mca_btl_tcp_alloc, .btl_alloc = mca_btl_tcp_alloc,
.btl_free = mca_btl_tcp_free, .btl_free = mca_btl_tcp_free,
.btl_prepare_src = mca_btl_tcp_prepare_src, .btl_prepare_src = mca_btl_tcp_prepare_src,
.btl_prepare_dst = mca_btl_tcp_prepare_dst,
.btl_send = mca_btl_tcp_send, .btl_send = mca_btl_tcp_send,
.btl_put = mca_btl_tcp_put, .btl_put = mca_btl_tcp_put,
.btl_dump = mca_btl_base_dump, .btl_dump = mca_btl_base_dump,
@ -202,7 +201,6 @@ int mca_btl_tcp_free(
mca_btl_base_descriptor_t* mca_btl_tcp_prepare_src( mca_btl_base_descriptor_t* mca_btl_tcp_prepare_src(
struct mca_btl_base_module_t* btl, struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint, struct mca_btl_base_endpoint_t* endpoint,
struct mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor, struct opal_convertor_t* convertor,
uint8_t order, uint8_t order,
size_t reserve, size_t reserve,
@ -272,62 +270,12 @@ mca_btl_base_descriptor_t* mca_btl_tcp_prepare_src(
} }
frag->base.des_segments = frag->segments; frag->base.des_segments = frag->segments;
frag->base.des_remote = NULL;
frag->base.des_remote_count = 0;
frag->base.des_flags = flags; frag->base.des_flags = flags;
frag->base.order = MCA_BTL_NO_ORDER; frag->base.order = MCA_BTL_NO_ORDER;
*size = max_data; *size = max_data;
return &frag->base; return &frag->base;
} }
/**
* Prepare a descriptor for send/rdma using the supplied
* convertor. If the convertor references data that is contigous,
* the descriptor may simply point to the user buffer. Otherwise,
* this routine is responsible for allocating buffer space and
* packing if required.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL peer addressing
* @param convertor (IN) Data type convertor
* @param reserve (IN) Additional bytes requested by upper layer to precede user data
* @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT)
*/
mca_btl_base_descriptor_t* mca_btl_tcp_prepare_dst(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags)
{
mca_btl_tcp_frag_t* frag;
if( OPAL_UNLIKELY((*size) > UINT32_MAX) ) { /* limit the size to what we support */
*size = (size_t)UINT32_MAX;
}
MCA_BTL_TCP_FRAG_ALLOC_USER(frag);
if( OPAL_UNLIKELY(NULL == frag) ) {
return NULL;
}
frag->segments->seg_len = *size;
opal_convertor_get_current_pointer( convertor, (void**)&(frag->segments->seg_addr.pval) );
frag->base.des_remote = NULL;
frag->base.des_remote_count = 0;
frag->base.des_segments = frag->segments;
frag->base.des_segment_count = 1;
frag->base.des_flags = flags;
frag->base.order = MCA_BTL_NO_ORDER;
return &frag->base;
}
/** /**
* Initiate an asynchronous send. * Initiate an asynchronous send.
* *
@ -368,23 +316,55 @@ int mca_btl_tcp_send( struct mca_btl_base_module_t* btl,
return mca_btl_tcp_endpoint_send(endpoint,frag); return mca_btl_tcp_endpoint_send(endpoint,frag);
} }
static void fake_rdma_complete (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
mca_btl_base_descriptor_t *desc, int rc)
{
mca_btl_tcp_frag_t *frag = (mca_btl_tcp_frag_t *) desc;
frag->cb.func (btl, endpoint, frag->segments[0].seg_addr.pval, NULL, frag->cb.context, frag->cb.data,
rc);
}
/** /**
* Initiate an asynchronous put. * Initiate an asynchronous put.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/ */
int mca_btl_tcp_put( mca_btl_base_module_t* btl, int mca_btl_tcp_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
mca_btl_base_endpoint_t* endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_descriptor_t* descriptor ) mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl; mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl;
mca_btl_tcp_frag_t* frag = (mca_btl_tcp_frag_t*)descriptor; mca_btl_tcp_frag_t *frag = NULL;
int i; int i;
MCA_BTL_TCP_FRAG_ALLOC_USER(frag);
if( OPAL_UNLIKELY(NULL == frag) ) {
return OPAL_ERR_OUT_OF_RESOURCE;;
}
frag->endpoint = endpoint;
frag->segments->seg_len = size;
frag->segments->seg_addr.pval = local_address;
frag->base.des_segments = frag->segments;
frag->base.des_segment_count = 1;
frag->base.order = MCA_BTL_NO_ORDER;
frag->segments[0].seg_addr.pval = local_address;
frag->segments[0].seg_len = size;
frag->segments[1].seg_addr.lval = remote_address;
frag->segments[1].seg_len = size;
frag->base.des_flags = MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
frag->base.des_cbfunc = fake_rdma_complete;
frag->cb.func = cbfunc;
frag->cb.data = cbdata;
frag->cb.context = cbcontext;
frag->btl = tcp_btl; frag->btl = tcp_btl;
frag->endpoint = endpoint; frag->endpoint = endpoint;
frag->rc = 0; frag->rc = 0;
@ -394,8 +374,8 @@ int mca_btl_tcp_put( mca_btl_base_module_t* btl,
frag->iov_ptr = frag->iov; frag->iov_ptr = frag->iov;
frag->iov[0].iov_base = (IOVBASE_TYPE*)&frag->hdr; frag->iov[0].iov_base = (IOVBASE_TYPE*)&frag->hdr;
frag->iov[0].iov_len = sizeof(frag->hdr); frag->iov[0].iov_len = sizeof(frag->hdr);
frag->iov[1].iov_base = (IOVBASE_TYPE*)frag->base.des_remote; frag->iov[1].iov_base = (IOVBASE_TYPE*) (frag->segments + 1);
frag->iov[1].iov_len = frag->base.des_remote_count * sizeof(mca_btl_base_segment_t); frag->iov[1].iov_len = sizeof(mca_btl_base_segment_t);
for( i = 0; i < (int)frag->base.des_segment_count; i++ ) { for( i = 0; i < (int)frag->base.des_segment_count; i++ ) {
frag->hdr.size += frag->segments[i].seg_len; frag->hdr.size += frag->segments[i].seg_len;
frag->iov[i+2].iov_len = frag->segments[i].seg_len; frag->iov[i+2].iov_len = frag->segments[i].seg_len;
@ -404,7 +384,7 @@ int mca_btl_tcp_put( mca_btl_base_module_t* btl,
} }
frag->hdr.base.tag = MCA_BTL_TAG_BTL; frag->hdr.base.tag = MCA_BTL_TAG_BTL;
frag->hdr.type = MCA_BTL_TCP_HDR_TYPE_PUT; frag->hdr.type = MCA_BTL_TCP_HDR_TYPE_PUT;
frag->hdr.count = frag->base.des_remote_count; frag->hdr.count = 1;
if (endpoint->endpoint_nbo) MCA_BTL_TCP_HDR_HTON(frag->hdr); if (endpoint->endpoint_nbo) MCA_BTL_TCP_HDR_HTON(frag->hdr);
return ((i = mca_btl_tcp_endpoint_send(endpoint,frag)) >= 0 ? OPAL_SUCCESS : i); return ((i = mca_btl_tcp_endpoint_send(endpoint,frag)) >= 0 ? OPAL_SUCCESS : i);
} }
@ -412,22 +392,46 @@ int mca_btl_tcp_put( mca_btl_base_module_t* btl,
/** /**
* Initiate an asynchronous get. * Initiate an asynchronous get.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*
*/ */
int mca_btl_tcp_get( int mca_btl_tcp_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
mca_btl_base_module_t* btl, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_endpoint_t* endpoint, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
mca_btl_base_descriptor_t* descriptor) int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl; mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*) btl;
mca_btl_tcp_frag_t* frag = (mca_btl_tcp_frag_t*)descriptor; mca_btl_tcp_frag_t* frag = NULL;
int rc; int rc;
MCA_BTL_TCP_FRAG_ALLOC_USER(frag);
if( OPAL_UNLIKELY(NULL == frag) ) {
return OPAL_ERR_OUT_OF_RESOURCE;;
}
frag->endpoint = endpoint;
frag->segments->seg_len = size;
frag->segments->seg_addr.pval = local_address;
frag->base.des_segments = frag->segments;
frag->base.des_segment_count = 1;
frag->base.order = MCA_BTL_NO_ORDER;
frag->segments[0].seg_addr.pval = local_address;
frag->segments[0].seg_len = size;
frag->segments[1].seg_addr.lval = remote_address;
frag->segments[1].seg_len = size;
/* call the rdma callback through the descriptor callback. this is
* tcp so the extra latency is not an issue */
frag->base.des_flags = MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
frag->base.des_cbfunc = fake_rdma_complete;
frag->cb.func = cbfunc;
frag->cb.data = cbdata;
frag->cb.context = cbcontext;
frag->btl = tcp_btl; frag->btl = tcp_btl;
frag->endpoint = endpoint; frag->endpoint = endpoint;
frag->rc = 0; frag->rc = 0;
@ -437,11 +441,11 @@ int mca_btl_tcp_get(
frag->iov_ptr = frag->iov; frag->iov_ptr = frag->iov;
frag->iov[0].iov_base = (IOVBASE_TYPE*)&frag->hdr; frag->iov[0].iov_base = (IOVBASE_TYPE*)&frag->hdr;
frag->iov[0].iov_len = sizeof(frag->hdr); frag->iov[0].iov_len = sizeof(frag->hdr);
frag->iov[1].iov_base = (IOVBASE_TYPE*)frag->base.des_remote; frag->iov[1].iov_base = (IOVBASE_TYPE*) &frag->segments[1];
frag->iov[1].iov_len = frag->base.des_remote_count * sizeof(mca_btl_base_segment_t); frag->iov[1].iov_len = sizeof(mca_btl_base_segment_t);
frag->hdr.base.tag = MCA_BTL_TAG_BTL; frag->hdr.base.tag = MCA_BTL_TAG_BTL;
frag->hdr.type = MCA_BTL_TCP_HDR_TYPE_GET; frag->hdr.type = MCA_BTL_TCP_HDR_TYPE_GET;
frag->hdr.count = frag->base.des_remote_count; frag->hdr.count = 1;
if (endpoint->endpoint_nbo) MCA_BTL_TCP_HDR_HTON(frag->hdr); if (endpoint->endpoint_nbo) MCA_BTL_TCP_HDR_HTON(frag->hdr);
return ((rc = mca_btl_tcp_endpoint_send(endpoint,frag)) >= 0 ? OPAL_SUCCESS : rc); return ((rc = mca_btl_tcp_endpoint_send(endpoint,frag)) >= 0 ? OPAL_SUCCESS : rc);
} }

Просмотреть файл

@ -217,32 +217,22 @@ extern int mca_btl_tcp_send(
/** /**
* Initiate an asynchronous put. * Initiate an asynchronous put.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/ */
extern int mca_btl_tcp_put( int mca_btl_tcp_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_module_t* btl, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_endpoint_t* btl_peer, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
struct mca_btl_base_descriptor_t* decriptor int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
);
/** /**
* Initiate an asynchronous get. * Initiate an asynchronous get.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/ */
extern int mca_btl_tcp_get( int mca_btl_tcp_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_module_t* btl, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_endpoint_t* btl_peer, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
struct mca_btl_base_descriptor_t* decriptor int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
);
/** /**
* Allocate a descriptor with a segment of the requested size. * Allocate a descriptor with a segment of the requested size.
@ -290,7 +280,6 @@ extern int mca_btl_tcp_free(
mca_btl_base_descriptor_t* mca_btl_tcp_prepare_src( mca_btl_base_descriptor_t* mca_btl_tcp_prepare_src(
struct mca_btl_base_module_t* btl, struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* peer, struct mca_btl_base_endpoint_t* peer,
struct mca_mpool_base_registration_t*,
struct opal_convertor_t* convertor, struct opal_convertor_t* convertor,
uint8_t order, uint8_t order,
size_t reserve, size_t reserve,
@ -298,16 +287,6 @@ mca_btl_base_descriptor_t* mca_btl_tcp_prepare_src(
uint32_t flags uint32_t flags
); );
extern mca_btl_base_descriptor_t* mca_btl_tcp_prepare_dst(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* peer,
struct mca_mpool_base_registration_t*,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags);
/** /**
* Fault Tolerance Event Notification Function * Fault Tolerance Event Notification Function

Просмотреть файл

@ -287,7 +287,7 @@ static int mca_btl_tcp_component_register(void)
MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_NEED_CSUM |
MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_NEED_ACK |
MCA_BTL_FLAGS_HETEROGENEOUS_RDMA; MCA_BTL_FLAGS_HETEROGENEOUS_RDMA;
mca_btl_tcp_module.super.btl_seg_size = sizeof (mca_btl_base_segment_t);
mca_btl_tcp_module.super.btl_bandwidth = 100; mca_btl_tcp_module.super.btl_bandwidth = 100;
mca_btl_tcp_module.super.btl_latency = 100; mca_btl_tcp_module.super.btl_latency = 100;

Просмотреть файл

@ -58,6 +58,12 @@ struct mca_btl_tcp_frag_t {
size_t size; size_t size;
int rc; int rc;
ompi_free_list_t* my_list; ompi_free_list_t* my_list;
/* fake rdma completion */
struct {
mca_btl_base_rdma_completion_fn_t func;
void *data;
void *context;
} cb;
}; };
typedef struct mca_btl_tcp_frag_t mca_btl_tcp_frag_t; typedef struct mca_btl_tcp_frag_t mca_btl_tcp_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_tcp_frag_t); OBJ_CLASS_DECLARATION(mca_btl_tcp_frag_t);
@ -116,8 +122,6 @@ do { \
frag->iov_cnt = 1; \ frag->iov_cnt = 1; \
frag->iov_idx = 0; \ frag->iov_idx = 0; \
frag->iov_ptr = frag->iov; \ frag->iov_ptr = frag->iov; \
frag->base.des_remote = NULL; \
frag->base.des_remote_count = 0; \
frag->base.des_segments = frag->segments; \ frag->base.des_segments = frag->segments; \
frag->base.des_segment_count = 1; \ frag->base.des_segment_count = 1; \
} while(0) } while(0)

Просмотреть файл

@ -264,39 +264,15 @@ mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
uint32_t flags, mca_btl_base_tag_t tag, uint32_t flags, mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t **descriptor); mca_btl_base_descriptor_t **descriptor);
/** int mca_btl_ugni_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
* Initiate a get operation. uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
* mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
* location: btl_ugni_get.c int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/
int mca_btl_ugni_get (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
void *local_address, uint64_t remote_address,
struct mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_registration_handle_t *remote_handle,
size_t size, int flags, mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata);
/** int mca_btl_ugni_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
* Initiate a put operation. uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
* mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
* location: btl_ugni_put.c int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/
int mca_btl_ugni_put (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
void *local_address, uint64_t remote_address,
struct mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_registration_handle_t *remote_handle,
size_t size, int flags, mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata);
int mca_btl_ugni_progress_send_wait_list (struct mca_btl_base_endpoint_t *endpoint); int mca_btl_ugni_progress_send_wait_list (struct mca_btl_base_endpoint_t *endpoint);

Просмотреть файл

@ -13,13 +13,10 @@
#include "btl_ugni_rdma.h" #include "btl_ugni_rdma.h"
#include "btl_ugni_smsg.h" #include "btl_ugni_smsg.h"
int mca_btl_ugni_get (struct mca_btl_base_module_t *btl, int mca_btl_ugni_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
struct mca_btl_base_registration_handle_t *local_handle, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
struct mca_btl_base_registration_handle_t *remote_handle,
size_t size, int flags, mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata)
{ {
bool check; bool check;
@ -40,7 +37,7 @@ int mca_btl_ugni_get (struct mca_btl_base_module_t *btl,
(void) mca_btl_ugni_check_endpoint_state(endpoint); (void) mca_btl_ugni_check_endpoint_state(endpoint);
return mca_btl_ugni_post (endpoint, true, size, local_address, remote_address, local_handle, return mca_btl_ugni_post (endpoint, true, size, local_address, remote_address, local_handle,
remote_handle, cbfunc, cbcontext, cbdata); remote_handle, order, cbfunc, cbcontext, cbdata);
} }
/* eager get */ /* eager get */
@ -171,7 +168,7 @@ int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *endpoint,
/* start the get */ /* start the get */
rc = mca_btl_ugni_post (endpoint, true, size, frag->base.super.ptr, hdr.eager.address, rc = mca_btl_ugni_post (endpoint, true, size, frag->base.super.ptr, hdr.eager.address,
&frag->memory_handle, &hdr.eager.memory_handle, &frag->memory_handle, &hdr.eager.memory_handle,
mca_btl_ugni_callback_eager_get, frag, NULL); MCA_BTL_NO_ORDER, mca_btl_ugni_callback_eager_get, frag, NULL);
if (OPAL_UNLIKELY(OPAL_SUCCESS == rc)) { if (OPAL_UNLIKELY(OPAL_SUCCESS == rc)) {
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }

Просмотреть файл

@ -30,7 +30,6 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t* btl);
static struct mca_btl_base_descriptor_t * static struct mca_btl_base_descriptor_t *
mca_btl_ugni_prepare_src (struct mca_btl_base_module_t *btl, mca_btl_ugni_prepare_src (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint, struct mca_btl_base_endpoint_t *endpoint,
mca_mpool_base_registration_t *registration,
struct opal_convertor_t *convertor, struct opal_convertor_t *convertor,
uint8_t order, size_t reserve, size_t *size, uint8_t order, size_t reserve, size_t *size,
uint32_t flags); uint32_t flags);
@ -271,7 +270,6 @@ mca_btl_ugni_free (struct mca_btl_base_module_t *btl,
static struct mca_btl_base_descriptor_t * static struct mca_btl_base_descriptor_t *
mca_btl_ugni_prepare_src (struct mca_btl_base_module_t *btl, mca_btl_ugni_prepare_src (struct mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint, mca_btl_base_endpoint_t *endpoint,
mca_mpool_base_registration_t *registration,
struct opal_convertor_t *convertor, struct opal_convertor_t *convertor,
uint8_t order, size_t reserve, size_t *size, uint8_t order, size_t reserve, size_t *size,
uint32_t flags) uint32_t flags)

Просмотреть файл

@ -41,7 +41,7 @@ mca_btl_ugni_prepare_src_send_nodata (struct mca_btl_base_module_t *btl,
frag->segments[1].seg_addr.pval = NULL; frag->segments[1].seg_addr.pval = NULL;
frag->segments[1].seg_len = 0; frag->segments[1].seg_len = 0;
frag->base.des_segments = &frag->segments; frag->base.des_segments = frag->segments;
frag->base.des_segment_count = 1; frag->base.des_segment_count = 1;
frag->base.order = order; frag->base.order = order;
frag->base.des_flags = flags; frag->base.des_flags = flags;
@ -98,7 +98,7 @@ mca_btl_ugni_prepare_src_send_inplace (struct mca_btl_base_module_t *btl,
frag->segments[1].seg_addr.pval = data_ptr; frag->segments[1].seg_addr.pval = data_ptr;
frag->segments[1].seg_len = *size; frag->segments[1].seg_len = *size;
frag->base.des_segments = &frag->segments; frag->base.des_segments = frag->segments;
frag->base.des_segment_count = 2; frag->base.des_segment_count = 2;
frag->base.order = order; frag->base.order = order;
frag->base.des_flags = flags; frag->base.des_flags = flags;
@ -159,7 +159,7 @@ mca_btl_ugni_prepare_src_send_buffered (struct mca_btl_base_module_t *btl,
frag->segments[1].seg_addr.pval = frag->base.super.ptr; frag->segments[1].seg_addr.pval = frag->base.super.ptr;
frag->segments[1].seg_len = *size; frag->segments[1].seg_len = *size;
frag->base.des_segments = &frag->segments; frag->base.des_segments = frag->segments;
frag->base.des_segment_count = 2; frag->base.des_segment_count = 2;
frag->base.order = order; frag->base.order = order;
frag->base.des_flags = flags; frag->base.des_flags = flags;

Просмотреть файл

@ -14,13 +14,10 @@
#include "btl_ugni_rdma.h" #include "btl_ugni_rdma.h"
int mca_btl_ugni_put (struct mca_btl_base_module_t *btl, int mca_btl_ugni_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
struct mca_btl_base_registration_handle_t *local_handle, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
struct mca_btl_base_registration_handle_t *remote_handle,
size_t size, int flags, mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata)
{ {
BTL_VERBOSE(("Using RDMA/FMA Put from local address %p to remote address %" PRIx64, BTL_VERBOSE(("Using RDMA/FMA Put from local address %p to remote address %" PRIx64,
local_address, remote_address)); local_address, remote_address));
@ -29,5 +26,5 @@ int mca_btl_ugni_put (struct mca_btl_base_module_t *btl,
(void) mca_btl_ugni_check_endpoint_state(endpoint); (void) mca_btl_ugni_check_endpoint_state(endpoint);
return mca_btl_ugni_post (endpoint, false, size, local_address, remote_address, local_handle, return mca_btl_ugni_post (endpoint, false, size, local_address, remote_address, local_handle,
remote_handle, cbfunc, cbcontext, cbdata); remote_handle, order, cbfunc, cbcontext, cbdata);
} }

Просмотреть файл

@ -21,7 +21,7 @@ int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *ep,
mca_btl_ugni_base_frag_t *frag); mca_btl_ugni_base_frag_t *frag);
static inline void init_gni_post_desc (opal_common_ugni_post_desc_t *post_desc, static inline void init_gni_post_desc (opal_common_ugni_post_desc_t *post_desc,
gni_post_type_t op_type, int order, gni_post_type_t op_type,
uint64_t lcl_addr, uint64_t lcl_addr,
gni_mem_handle_t lcl_mdh, gni_mem_handle_t lcl_mdh,
uint64_t rem_addr, uint64_t rem_addr,
@ -30,7 +30,11 @@ static inline void init_gni_post_desc (opal_common_ugni_post_desc_t *post_desc,
gni_cq_handle_t cq_hndl) { gni_cq_handle_t cq_hndl) {
post_desc->base.type = op_type; post_desc->base.type = op_type;
post_desc->base.cq_mode = GNI_CQMODE_GLOBAL_EVENT; post_desc->base.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
if (MCA_BTL_NO_ORDER == order) {
post_desc->base.dlvr_mode = GNI_DLVMODE_PERFORMANCE; post_desc->base.dlvr_mode = GNI_DLVMODE_PERFORMANCE;
} else {
post_desc->base.dlvr_mode = GNI_DLVMODE_NO_ADAPT;
}
post_desc->base.local_addr = (uint64_t) lcl_addr; post_desc->base.local_addr = (uint64_t) lcl_addr;
post_desc->base.local_mem_hndl = lcl_mdh; post_desc->base.local_mem_hndl = lcl_mdh;
post_desc->base.remote_addr = (uint64_t) rem_addr; post_desc->base.remote_addr = (uint64_t) rem_addr;
@ -45,7 +49,7 @@ static inline int mca_btl_ugni_post_fma (struct mca_btl_base_endpoint_t *endpoin
size_t size, void *local_address, uint64_t remote_address, size_t size, void *local_address, uint64_t remote_address,
mca_btl_base_registration_handle_t *local_handle, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_registration_handle_t *remote_handle,
mca_btl_base_rdma_completion_fn_t cbfunc, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata) void *cbcontext, void *cbdata)
{ {
mca_btl_ugni_post_descriptor_t *post_desc; mca_btl_ugni_post_descriptor_t *post_desc;
@ -58,7 +62,7 @@ static inline int mca_btl_ugni_post_fma (struct mca_btl_base_endpoint_t *endpoin
/* Post descriptor (CQ is ignored for FMA transactions) -- The CQ associated with the endpoint /* Post descriptor (CQ is ignored for FMA transactions) -- The CQ associated with the endpoint
* is used. */ * is used. */
init_gni_post_desc (&post_desc->desc, op_type, (intptr_t) local_address, local_handle->gni_handle, init_gni_post_desc (&post_desc->desc, order, op_type, (intptr_t) local_address, local_handle->gni_handle,
remote_address, remote_handle->gni_handle, size, 0); remote_address, remote_handle->gni_handle, size, 0);
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock); OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
@ -83,7 +87,7 @@ static inline int mca_btl_ugni_post_bte (mca_btl_base_endpoint_t *endpoint, gni_
size_t size, void *local_address, uint64_t remote_address, size_t size, void *local_address, uint64_t remote_address,
mca_btl_base_registration_handle_t *local_handle, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_registration_handle_t *remote_handle,
mca_btl_base_rdma_completion_fn_t cbfunc, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata) void *cbcontext, void *cbdata)
{ {
mca_btl_ugni_post_descriptor_t *post_desc; mca_btl_ugni_post_descriptor_t *post_desc;
@ -95,7 +99,7 @@ static inline int mca_btl_ugni_post_bte (mca_btl_base_endpoint_t *endpoint, gni_
} }
/* Post descriptor */ /* Post descriptor */
init_gni_post_desc (&post_desc->desc, op_type, (intptr_t) local_address, local_handle->gni_handle, init_gni_post_desc (&post_desc->desc, order, op_type, (intptr_t) local_address, local_handle->gni_handle,
remote_address, remote_handle->gni_handle, size, endpoint->btl->rdma_local_cq); remote_address, remote_handle->gni_handle, size, endpoint->btl->rdma_local_cq);
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock); OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
@ -120,7 +124,7 @@ static inline int mca_btl_ugni_post (mca_btl_base_endpoint_t *endpoint, int get,
void *local_address, uint64_t remote_address, void *local_address, uint64_t remote_address,
mca_btl_base_registration_handle_t *local_handle, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_registration_handle_t *remote_handle,
mca_btl_base_rdma_completion_fn_t cbfunc, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata) void *cbcontext, void *cbdata)
{ {
const gni_post_type_t fma_ops[2] = {GNI_POST_FMA_PUT, GNI_POST_FMA_GET}; const gni_post_type_t fma_ops[2] = {GNI_POST_FMA_PUT, GNI_POST_FMA_GET};
@ -128,11 +132,11 @@ static inline int mca_btl_ugni_post (mca_btl_base_endpoint_t *endpoint, int get,
if (size <= mca_btl_ugni_component.ugni_fma_limit) { if (size <= mca_btl_ugni_component.ugni_fma_limit) {
return mca_btl_ugni_post_fma (endpoint, fma_ops[get], size, local_address, remote_address, return mca_btl_ugni_post_fma (endpoint, fma_ops[get], size, local_address, remote_address,
local_handle, remote_handle, cbfunc, cbcontext, cbdata); local_handle, remote_handle, order, cbfunc, cbcontext, cbdata);
} }
return mca_btl_ugni_post_bte (endpoint, rdma_ops[get], size, local_address, remote_address, return mca_btl_ugni_post_bte (endpoint, rdma_ops[get], size, local_address, remote_address,
local_handle, remote_handle, cbfunc, cbcontext, cbdata); local_handle, remote_handle, order, cbfunc, cbcontext, cbdata);
} }
static inline int mca_btl_ugni_repost (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_post_descriptor_t *post_desc) static inline int mca_btl_ugni_repost (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_post_descriptor_t *post_desc)

Просмотреть файл

@ -98,7 +98,6 @@ int mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
uint32_t flags, mca_btl_base_tag_t tag, uint32_t flags, mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t **descriptor) mca_btl_base_descriptor_t **descriptor)
{ {
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
size_t total_size = header_size + payload_size; size_t total_size = header_size + payload_size;
mca_btl_ugni_base_frag_t *frag = NULL; mca_btl_ugni_base_frag_t *frag = NULL;
size_t packed_size = payload_size; size_t packed_size = payload_size;

Просмотреть файл

@ -115,7 +115,9 @@ struct mca_btl_vader_component_t {
ompi_free_list_t vader_frags_eager; /**< free list of vader send frags */ ompi_free_list_t vader_frags_eager; /**< free list of vader send frags */
ompi_free_list_t vader_frags_max_send; /**< free list of vader max send frags (large fragments) */ ompi_free_list_t vader_frags_max_send; /**< free list of vader max send frags (large fragments) */
ompi_free_list_t vader_frags_user; /**< free list of small inline frags */ ompi_free_list_t vader_frags_user; /**< free list of small inline frags */
ompi_free_list_t vader_frags_rdma; /**< free list of vader put/get frags (single-copy) */ #if OPAL_BTL_VADER_HAVE_KNEM
ompi_free_list_t registration_handles; /**< registration handles for knem segments */
#endif
unsigned int fbox_threshold; /**< number of sends required before we setup a send fast box for a peer */ unsigned int fbox_threshold; /**< number of sends required before we setup a send fast box for a peer */
unsigned int fbox_max; /**< maximum number of send fast boxes to allocate */ unsigned int fbox_max; /**< maximum number of send fast boxes to allocate */
@ -208,21 +210,24 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
* @param descriptor (IN) Description of the data to be transferred * @param descriptor (IN) Description of the data to be transferred
*/ */
#if OPAL_BTL_VADER_HAVE_XPMEM #if OPAL_BTL_VADER_HAVE_XPMEM
int mca_btl_vader_put_xpmem (struct mca_btl_base_module_t *btl, int mca_btl_vader_put_xpmem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des); mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
#endif #endif
#if OPAL_BTL_VADER_HAVE_CMA #if OPAL_BTL_VADER_HAVE_CMA
int mca_btl_vader_put_cma (struct mca_btl_base_module_t *btl, int mca_btl_vader_put_cma (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des); mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
#endif #endif
#if OPAL_BTL_VADER_HAVE_KNEM #if OPAL_BTL_VADER_HAVE_KNEM
int mca_btl_vader_put_knem (struct mca_btl_base_module_t *btl, int mca_btl_vader_put_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des); mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
#endif #endif
/** /**
@ -233,21 +238,24 @@ int mca_btl_vader_put_knem (struct mca_btl_base_module_t *btl,
* @param descriptor (IN) Description of the data to be transferred * @param descriptor (IN) Description of the data to be transferred
*/ */
#if OPAL_BTL_VADER_HAVE_XPMEM #if OPAL_BTL_VADER_HAVE_XPMEM
int mca_btl_vader_get_xpmem (struct mca_btl_base_module_t *btl, int mca_btl_vader_get_xpmem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des); mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
#endif #endif
#if OPAL_BTL_VADER_HAVE_CMA #if OPAL_BTL_VADER_HAVE_CMA
int mca_btl_vader_get_cma (struct mca_btl_base_module_t *btl, int mca_btl_vader_get_cma (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des); mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
#endif #endif
#if OPAL_BTL_VADER_HAVE_KNEM #if OPAL_BTL_VADER_HAVE_KNEM
int mca_btl_vader_get_knem (struct mca_btl_base_module_t *btl, int mca_btl_vader_get_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des); mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
#endif #endif
/** /**
@ -260,6 +268,7 @@ mca_btl_base_descriptor_t* mca_btl_vader_alloc (struct mca_btl_base_module_t* bt
struct mca_btl_base_endpoint_t* endpoint, struct mca_btl_base_endpoint_t* endpoint,
uint8_t order, size_t size, uint32_t flags); uint8_t order, size_t size, uint32_t flags);
END_C_DECLS END_C_DECLS
#endif #endif

Просмотреть файл

@ -251,7 +251,6 @@ static int mca_btl_vader_component_register (void)
mca_btl_vader.super.btl_bandwidth = 10000; /* Mbs */ mca_btl_vader.super.btl_bandwidth = 10000; /* Mbs */
} }
mca_btl_vader.super.btl_seg_size = sizeof (mca_btl_vader_segment_t);
mca_btl_vader.super.btl_latency = 1; /* Microsecs */ mca_btl_vader.super.btl_latency = 1; /* Microsecs */
/* Call the BTL based to register its MCA params */ /* Call the BTL based to register its MCA params */
@ -272,7 +271,9 @@ static int mca_btl_vader_component_open(void)
OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_eager, ompi_free_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_eager, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_user, ompi_free_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_user, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_max_send, ompi_free_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_max_send, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_rdma, ompi_free_list_t); #if OPAL_BTL_VADER_HAVE_KNEM
OBJ_CONSTRUCT(&mca_btl_vader_component.registration_handles, ompi_free_list_t);
#endif
OBJ_CONSTRUCT(&mca_btl_vader_component.lock, opal_mutex_t); OBJ_CONSTRUCT(&mca_btl_vader_component.lock, opal_mutex_t);
OBJ_CONSTRUCT(&mca_btl_vader_component.pending_endpoints, opal_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.pending_endpoints, opal_list_t);
OBJ_CONSTRUCT(&mca_btl_vader_component.pending_fragments, opal_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.pending_fragments, opal_list_t);
@ -293,7 +294,9 @@ static int mca_btl_vader_component_close(void)
OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_eager); OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_eager);
OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_user); OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_user);
OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_max_send); OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_max_send);
OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_rdma); #if OPAL_BTL_VADER_HAVE_KNEM
OBJ_DESTRUCT(&mca_btl_vader_component.registration_handles);
#endif
OBJ_DESTRUCT(&mca_btl_vader_component.lock); OBJ_DESTRUCT(&mca_btl_vader_component.lock);
OBJ_DESTRUCT(&mca_btl_vader_component.pending_endpoints); OBJ_DESTRUCT(&mca_btl_vader_component.pending_endpoints);
OBJ_DESTRUCT(&mca_btl_vader_component.pending_fragments); OBJ_DESTRUCT(&mca_btl_vader_component.pending_fragments);
@ -349,7 +352,6 @@ static void mca_btl_vader_select_next_single_copy_mechanism (void)
static void mca_btl_vader_check_single_copy (void) static void mca_btl_vader_check_single_copy (void)
{ {
int initial_mechanism = mca_btl_vader_component.single_copy_mechanism; int initial_mechanism = mca_btl_vader_component.single_copy_mechanism;
int rc;
#if OPAL_BTL_VADER_HAVE_XPMEM #if OPAL_BTL_VADER_HAVE_XPMEM
if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) { if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
@ -564,7 +566,7 @@ failed:
void mca_btl_vader_poll_handle_frag (mca_btl_vader_hdr_t *hdr, struct mca_btl_base_endpoint_t *endpoint) void mca_btl_vader_poll_handle_frag (mca_btl_vader_hdr_t *hdr, struct mca_btl_base_endpoint_t *endpoint)
{ {
mca_btl_base_segment_t segments[2]; mca_btl_base_segment_t segments[2];
mca_btl_base_descriptor_t frag = {.des_local = segments, .des_local_count = 1}; mca_btl_base_descriptor_t frag = {.des_segments = segments, .des_segment_count = 1};
const mca_btl_active_message_callback_t *reg; const mca_btl_active_message_callback_t *reg;
if (hdr->flags & MCA_BTL_VADER_FLAG_COMPLETE) { if (hdr->flags & MCA_BTL_VADER_FLAG_COMPLETE) {
@ -584,7 +586,7 @@ void mca_btl_vader_poll_handle_frag (mca_btl_vader_hdr_t *hdr, struct mca_btl_ba
&segments[1].seg_addr.pval); &segments[1].seg_addr.pval);
segments[1].seg_len = hdr->sc_iov.iov_len; segments[1].seg_len = hdr->sc_iov.iov_len;
frag.des_local_count = 2; frag.des_segment_count = 2;
/* recv upcall */ /* recv upcall */
reg->cbfunc(&mca_btl_vader.super, hdr->tag, &frag, reg->cbdata); reg->cbfunc(&mca_btl_vader.super, hdr->tag, &frag, reg->cbdata);

Просмотреть файл

@ -31,11 +31,11 @@ static inline void mca_btl_vader_frag_constructor (mca_btl_vader_frag_t *frag)
if(frag->hdr != NULL) { if(frag->hdr != NULL) {
frag->hdr->frag = frag; frag->hdr->frag = frag;
frag->hdr->flags = 0; frag->hdr->flags = 0;
frag->segments[0].base.seg_addr.pval = (char *)(frag->hdr + 1); frag->segments[0].seg_addr.pval = (char *)(frag->hdr + 1);
} }
frag->base.des_local = &frag->segments->base; frag->base.des_segments = frag->segments;
frag->base.des_local_count = 1; frag->base.des_segment_count = 1;
frag->fbox = NULL; frag->fbox = NULL;
} }
@ -65,8 +65,6 @@ void mca_btl_vader_frag_init (ompi_free_list_item_t *item, void *ctx)
frag->my_list = &mca_btl_vader_component.vader_frags_eager; frag->my_list = &mca_btl_vader_component.vader_frags_eager;
} else if (mca_btl_vader.super.btl_max_send_size == data_size) { } else if (mca_btl_vader.super.btl_max_send_size == data_size) {
frag->my_list = &mca_btl_vader_component.vader_frags_max_send; frag->my_list = &mca_btl_vader_component.vader_frags_max_send;
} else {
frag->my_list = &mca_btl_vader_component.vader_frags_rdma;
} }
if (data_size) { if (data_size) {

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -57,15 +57,6 @@ struct mca_btl_vader_hdr_t {
}; };
typedef struct mca_btl_vader_hdr_t mca_btl_vader_hdr_t; typedef struct mca_btl_vader_hdr_t mca_btl_vader_hdr_t;
struct mca_btl_vader_segment_t {
mca_btl_base_segment_t base;
#if OPAL_BTL_VADER_HAVE_KNEM
uint64_t cookie;
intptr_t registered_base;
#endif
};
typedef struct mca_btl_vader_segment_t mca_btl_vader_segment_t;
/** /**
* shared memory send fragment derived type. * shared memory send fragment derived type.
*/ */
@ -73,7 +64,7 @@ struct mca_btl_vader_frag_t {
/** base object */ /** base object */
mca_btl_base_descriptor_t base; mca_btl_base_descriptor_t base;
/** storage for segment data (max 2) */ /** storage for segment data (max 2) */
mca_btl_vader_segment_t segments[2]; mca_btl_base_segment_t segments[2];
/** endpoint this fragment is active on */ /** endpoint this fragment is active on */
struct mca_btl_base_endpoint_t *endpoint; struct mca_btl_base_endpoint_t *endpoint;
/** fast box in use (or NULL) */ /** fast box in use (or NULL) */
@ -82,9 +73,6 @@ struct mca_btl_vader_frag_t {
mca_btl_vader_hdr_t *hdr; mca_btl_vader_hdr_t *hdr;
/** free list this fragment was allocated within */ /** free list this fragment was allocated within */
ompi_free_list_t *my_list; ompi_free_list_t *my_list;
#if OPAL_BTL_VADER_HAVE_KNEM
uint64_t cookie;
#endif
}; };
typedef struct mca_btl_vader_frag_t mca_btl_vader_frag_t; typedef struct mca_btl_vader_frag_t mca_btl_vader_frag_t;
@ -108,37 +96,16 @@ static inline int mca_btl_vader_frag_alloc (mca_btl_vader_frag_t **frag, ompi_fr
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
static inline int mca_btl_vader_frag_alloc_rdma (mca_btl_vader_frag_t **frag, ompi_free_list_t *list,
struct mca_btl_base_endpoint_t *endpoint) {
ompi_free_list_item_t *item;
OMPI_FREE_LIST_GET_MT(list, item);
*frag = (mca_btl_vader_frag_t *) item;
if (OPAL_LIKELY(NULL != item)) {
(*frag)->endpoint = endpoint;
}
return OPAL_SUCCESS;
}
static inline void mca_btl_vader_frag_return (mca_btl_vader_frag_t *frag) static inline void mca_btl_vader_frag_return (mca_btl_vader_frag_t *frag)
{ {
if (frag->hdr) { if (frag->hdr) {
frag->hdr->flags = 0; frag->hdr->flags = 0;
} }
frag->segments[0].base.seg_addr.pval = (char *)(frag->hdr + 1); frag->segments[0].seg_addr.pval = (char *)(frag->hdr + 1);
frag->base.des_local_count = 1; frag->base.des_segment_count = 1;
frag->fbox = NULL; frag->fbox = NULL;
#if OPAL_BTL_VADER_HAVE_KNEM
if (frag->cookie) {
/* NTH: explicity ignore the return code. Don't care about this cookie anymore anyway. */
(void) ioctl(mca_btl_vader.knem_fd, KNEM_CMD_DESTROY_REGION, &frag->cookie);
frag->cookie = 0;
}
#endif
OMPI_FREE_LIST_RETURN_MT(frag->my_list, (ompi_free_list_item_t *)frag); OMPI_FREE_LIST_RETURN_MT(frag->my_list, (ompi_free_list_item_t *)frag);
} }
@ -153,9 +120,6 @@ OBJ_CLASS_DECLARATION(mca_btl_vader_frag_t);
#define MCA_BTL_VADER_FRAG_ALLOC_USER(frag, endpoint) \ #define MCA_BTL_VADER_FRAG_ALLOC_USER(frag, endpoint) \
mca_btl_vader_frag_alloc (&(frag), &mca_btl_vader_component.vader_frags_user, endpoint) mca_btl_vader_frag_alloc (&(frag), &mca_btl_vader_component.vader_frags_user, endpoint)
#define MCA_BTL_VADER_FRAG_ALLOC_RDMA(frag, endpoint) \
mca_btl_vader_frag_alloc_rdma (&(frag), &mca_btl_vader_component.vader_frags_rdma, endpoint)
#define MCA_BTL_VADER_FRAG_RETURN(frag) mca_btl_vader_frag_return(frag) #define MCA_BTL_VADER_FRAG_RETURN(frag) mca_btl_vader_frag_return(frag)

Просмотреть файл

@ -33,11 +33,10 @@
* @param descriptor (IN) Description of the data to be transferred * @param descriptor (IN) Description of the data to be transferred
*/ */
#if OPAL_BTL_VADER_HAVE_XPMEM #if OPAL_BTL_VADER_HAVE_XPMEM
int mca_btl_vader_get (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, int mca_btl_vader_get_xpmem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
void *local_address, uint64_t remote_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_registration_handle_t *local_handle, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
mca_mpool_base_registration_t *reg; mca_mpool_base_registration_t *reg;
void *rem_ptr; void *rem_ptr;
@ -63,9 +62,10 @@ int mca_btl_vader_get (struct mca_btl_base_module_t *btl, struct mca_btl_base_en
#endif #endif
#if OPAL_BTL_VADER_HAVE_CMA #if OPAL_BTL_VADER_HAVE_CMA
int mca_btl_vader_get_cma (struct mca_btl_base_module_t *btl, int mca_btl_vader_get_cma (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des) mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
struct iovec src_iov = {.iov_base = (void *)(intptr_t) remote_address, .iov_len = size}; struct iovec src_iov = {.iov_base = (void *)(intptr_t) remote_address, .iov_len = size};
struct iovec dst_iov = {.iov_base = local_address, .iov_len = size}; struct iovec dst_iov = {.iov_base = local_address, .iov_len = size};
@ -78,36 +78,29 @@ int mca_btl_vader_get_cma (struct mca_btl_base_module_t *btl,
} }
/* always call the callback function */ /* always call the callback function */
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
frag->endpoint = endpoint;
mca_btl_vader_frag_complete (frag);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
#endif #endif
#if OPAL_BTL_VADER_HAVE_KNEM #if OPAL_BTL_VADER_HAVE_KNEM
int mca_btl_vader_get_knem (struct mca_btl_base_module_t *btl, int mca_btl_vader_get_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des) mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) des;
mca_btl_vader_segment_t *src = (mca_btl_vader_segment_t *) des->des_remote;
mca_btl_vader_segment_t *dst = (mca_btl_vader_segment_t *) des->des_local;
const size_t size = min(dst->base.seg_len, src->base.seg_len);
intptr_t offset = src->base.seg_addr.lval - src->registered_base;
struct knem_cmd_param_iovec recv_iovec; struct knem_cmd_param_iovec recv_iovec;
struct knem_cmd_inline_copy icopy; struct knem_cmd_inline_copy icopy;
/* Fill in the ioctl data fields. There's no async completion, so /* Fill in the ioctl data fields. There's no async completion, so
we don't need to worry about getting a slot, etc. */ we don't need to worry about getting a slot, etc. */
recv_iovec.base = (uintptr_t) dst->base.seg_addr.lval; recv_iovec.base = (uintptr_t) local_address;
recv_iovec.len = size; recv_iovec.len = size;
icopy.local_iovec_array = (uintptr_t) &recv_iovec; icopy.local_iovec_array = (uintptr_t) &recv_iovec;
icopy.local_iovec_nr = 1; icopy.local_iovec_nr = 1;
icopy.remote_cookie = src->cookie; icopy.remote_cookie = remote_handle->cookie;
icopy.remote_offset = offset; icopy.remote_offset = remote_address - remote_handle->base_addr;
icopy.write = 0; icopy.write = 0;
icopy.flags = 0; icopy.flags = 0;
@ -115,7 +108,7 @@ int mca_btl_vader_get_knem (struct mca_btl_base_module_t *btl,
* is greater than the cutoff. Not that if DMA is not supported * is greater than the cutoff. Not that if DMA is not supported
* or the user specified 0 for knem_dma_min the knem_dma_min was * or the user specified 0 for knem_dma_min the knem_dma_min was
* set to UINT_MAX in mca_btl_vader_knem_init. */ * set to UINT_MAX in mca_btl_vader_knem_init. */
if (mca_btl_vader_component.knem_dma_min <= dst->base.seg_len) { if (mca_btl_vader_component.knem_dma_min <= size) {
icopy.flags = KNEM_FLAG_DMA; icopy.flags = KNEM_FLAG_DMA;
} }
/* synchronous flags only, no need to specify icopy.async_status_index */ /* synchronous flags only, no need to specify icopy.async_status_index */
@ -131,10 +124,7 @@ int mca_btl_vader_get_knem (struct mca_btl_base_module_t *btl,
} }
/* always call the callback function */ /* always call the callback function */
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
frag->endpoint = endpoint;
mca_btl_vader_frag_complete (frag);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }

Просмотреть файл

@ -20,6 +20,71 @@
#include "opal/util/show_help.h" #include "opal/util/show_help.h"
struct mca_btl_vader_registration_handle_t {
ompi_free_list_item_t super;
mca_btl_base_registration_handle_t btl_handle;
};
typedef struct mca_btl_vader_registration_handle_t mca_btl_vader_registration_handle_t;
OBJ_CLASS_INSTANCE(mca_btl_vader_registration_handle_t, ompi_free_list_item_t, NULL, NULL);
static mca_btl_base_registration_handle_t *
mca_btl_vader_register_mem_knem (struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t *endpoint,
void *base, size_t size, uint32_t flags)
{
mca_btl_vader_registration_handle_t *handle = NULL;
struct knem_cmd_create_region knem_cr;
struct knem_cmd_param_iovec knem_iov;
/* NTH: TODO -- Replace this with just using an mpool once we can pass the
* protection flags through. */
OMPI_FREE_LIST_GET_MT(&mca_btl_vader.registration_handles, &handle);
if (OPAL_UNLIKELY(NULL == handle)) {
return NULL;
}
knem_iov.base = (uintptr_t) base;
knem_iov.len = size;
knem_cr.iovec_array = (uintptr_t) &knem_iov;
knem_cr.iovec_nr = 1;
knem_cr.protection = 0;
if (flags & MCA_BTL_REG_FLAG_REMOTE_READ) {
knem_cr.protection |= PROT_READ;
}
if (flags & MCA_BTL_REG_FLAG_REMOTE_WRITE) {
knem_cr.protection |= PROT_WRITE;
}
/* Vader will explicitly destroy this cookie */
knem_cr.flags = 0;
if (OPAL_UNLIKELY(ioctl(mca_btl_vader.knem_fd, KNEM_CMD_CREATE_REGION, &knem_cr) < 0)) {
OMPI_FREE_LIST_RETURN_MT(&mca_btl_vader.registration_handles, handle);
return NULL;
}
handle->btl_handle.cookie = knem_cr.cookie;
handle->btl_handle.base_addr = (intptr_t) base;
return &handle->btl_handle;
}
static int
mca_btl_vader_deregister_mem_knem (struct mca_btl_base_module_t* btl, struct mca_btl_base_registration_handle_t *handle)
{
mca_btl_vader_registration_handle_t *vader_handle =
(mca_btl_vader_registration_handle_t *)((intptr_t) handle - offsetof (mca_btl_vader_registration_handle_t, btl_handle));
/* NTH: explicity ignore the return code. Don't care about this cookie anymore anyway. */
(void) ioctl(mca_btl_vader.knem_fd, KNEM_CMD_DESTROY_REGION, &vader_handle->cookie);
return OPAL_SUCCESS;
}
int mca_btl_vader_knem_init (void) int mca_btl_vader_knem_init (void)
{ {
struct knem_cmd_info knem_info; struct knem_cmd_info knem_info;
@ -74,6 +139,11 @@ int mca_btl_vader_knem_init (void)
mca_btl_vader.super.btl_get = mca_btl_vader_get_knem; mca_btl_vader.super.btl_get = mca_btl_vader_get_knem;
mca_btl_vader.super.btl_put = mca_btl_vader_put_knem; mca_btl_vader.super.btl_put = mca_btl_vader_put_knem;
/* knem requires registration */
mca_btl_vader.super.btl_register_mem = mca_btl_vader_vader_register_mem_kem;
mca_btl_vader.super.btl_deregister_mem = mca_btl_vader_vader_deregister_mem_kem;
mca_btl_vader.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} while (0); } while (0);

Просмотреть файл

@ -17,6 +17,12 @@
#include <knem_io.h> #include <knem_io.h>
#include <sys/mman.h> #include <sys/mman.h>
/* At this time only knem requires a registration of "RDMA" buffers */
struct mca_btl_base_registration_handle_t {
uint64_t cookie;
intptr_t base_addr;
};
int mca_btl_vader_knem_init (void); int mca_btl_vader_knem_init (void);
int mca_btl_vader_knem_fini (void); int mca_btl_vader_knem_fini (void);
int mca_btl_vader_knem_progress (void); int mca_btl_vader_knem_progress (void);

Просмотреть файл

@ -48,7 +48,6 @@ static int vader_free (struct mca_btl_base_module_t* btl, mca_btl_base_descripto
static struct mca_btl_base_descriptor_t *vader_prepare_src ( static struct mca_btl_base_descriptor_t *vader_prepare_src (
struct mca_btl_base_module_t *btl, struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint, struct mca_btl_base_endpoint_t *endpoint,
mca_mpool_base_registration_t *registration,
struct opal_convertor_t *convertor, struct opal_convertor_t *convertor,
uint8_t order, uint8_t order,
size_t reserve, size_t reserve,
@ -98,19 +97,19 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n)
/* initialize fragment descriptor free lists */ /* initialize fragment descriptor free lists */
/* initialize free list for single copy (get, put) */ /* initialize free list for single copy (get, put) */
if (MCA_BTL_VADER_NONE != mca_btl_vader_component.single_copy_mechanism) { #if OPAL_BTL_VADER_HAVE_KNEM
rc = ompi_free_list_init_ex_new (&component->vader_frags_rdma, if (MCA_BTL_VADER_KNEM != mca_btl_vader_component.single_copy_mechanism) {
sizeof(mca_btl_vader_frag_t), 8, rc = ompi_free_list_init_new (&component->registration_handles,
OBJ_CLASS(mca_btl_vader_frag_t), sizeof(mca_btl_vader_registration_handle_t), 8,
0, opal_cache_line_size, OBJ_CLASS(mca_btl_vader_registration_handle_t),
component->vader_free_list_num, 0, 8, component->vader_free_list_num,
component->vader_free_list_max, component->vader_free_list_max,
component->vader_free_list_inc, component->vader_free_list_inc, NULL);
NULL, mca_btl_vader_frag_init, (void *) 0);
if (OPAL_SUCCESS != rc) { if (OPAL_SUCCESS != rc) {
return rc; return rc;
} }
} }
#endif
/* initialize free list for small send and inline fragments */ /* initialize free list for small send and inline fragments */
rc = ompi_free_list_init_ex_new(&component->vader_frags_user, rc = ompi_free_list_init_ex_new(&component->vader_frags_user,
@ -407,7 +406,7 @@ mca_btl_base_descriptor_t *mca_btl_vader_alloc(struct mca_btl_base_module_t *btl
} }
if (OPAL_LIKELY(frag != NULL)) { if (OPAL_LIKELY(frag != NULL)) {
frag->segments[0].base.seg_len = size; frag->segments[0].seg_len = size;
frag->base.des_flags = flags; frag->base.des_flags = flags;
frag->base.order = order; frag->base.order = order;
@ -436,7 +435,6 @@ static int vader_free (struct mca_btl_base_module_t *btl, mca_btl_base_descripto
*/ */
static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_module_t *btl, static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint, struct mca_btl_base_endpoint_t *endpoint,
mca_mpool_base_registration_t *registration,
struct opal_convertor_t *convertor, struct opal_convertor_t *convertor,
uint8_t order, size_t reserve, size_t *size, uint8_t order, size_t reserve, size_t *size,
uint32_t flags) uint32_t flags)
@ -449,7 +447,6 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
opal_convertor_get_current_pointer (convertor, &data_ptr); opal_convertor_get_current_pointer (convertor, &data_ptr);
if (OPAL_LIKELY(reserve)) {
/* in place send fragment */ /* in place send fragment */
if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) { if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) {
uint32_t iov_count = 1; uint32_t iov_count = 1;
@ -468,7 +465,7 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
iov.iov_len = *size; iov.iov_len = *size;
iov.iov_base = iov.iov_base =
(IOVBASE_TYPE *)(((uintptr_t)(frag->segments[0].base.seg_addr.pval)) + (IOVBASE_TYPE *)(((uintptr_t)(frag->segments[0].seg_addr.pval)) +
reserve); reserve);
rc = opal_convertor_pack (convertor, &iov, &iov_count, size); rc = opal_convertor_pack (convertor, &iov, &iov_count, size);
@ -477,7 +474,7 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
return NULL; return NULL;
} }
frag->segments[0].base.seg_len = *size + reserve; frag->segments[0].seg_len = *size + reserve;
} else { } else {
if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism) { if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism) {
if (OPAL_LIKELY(total_size <= mca_btl_vader.super.btl_eager_limit)) { if (OPAL_LIKELY(total_size <= mca_btl_vader.super.btl_eager_limit)) {
@ -503,10 +500,10 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
frag->hdr->sc_iov.iov_base = data_ptr; frag->hdr->sc_iov.iov_base = data_ptr;
frag->hdr->sc_iov.iov_len = *size; frag->hdr->sc_iov.iov_len = *size;
frag->segments[0].base.seg_len = reserve; frag->segments[0].seg_len = reserve;
frag->segments[1].base.seg_len = *size; frag->segments[1].seg_len = *size;
frag->segments[1].base.seg_addr.pval = data_ptr; frag->segments[1].seg_addr.pval = data_ptr;
frag->base.des_local_count = 2; frag->base.des_segment_count = 2;
} else { } else {
#endif #endif
@ -516,56 +513,19 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
* fragment does not belong to the caller */ * fragment does not belong to the caller */
fbox = mca_btl_vader_reserve_fbox (endpoint, total_size); fbox = mca_btl_vader_reserve_fbox (endpoint, total_size);
if (OPAL_LIKELY(fbox)) { if (OPAL_LIKELY(fbox)) {
frag->segments[0].base.seg_addr.pval = fbox; frag->segments[0].seg_addr.pval = fbox;
} }
frag->fbox = fbox; frag->fbox = fbox;
} }
/* NTH: the covertor adds some latency so we bypass it here */ /* NTH: the covertor adds some latency so we bypass it here */
memcpy ((void *)((uintptr_t)frag->segments[0].base.seg_addr.pval + reserve), data_ptr, *size); memcpy ((void *)((uintptr_t)frag->segments[0].seg_addr.pval + reserve), data_ptr, *size);
frag->segments[0].base.seg_len = total_size; frag->segments[0].seg_len = total_size;
#if OPAL_BTL_VADER_HAVE_XPMEM #if OPAL_BTL_VADER_HAVE_XPMEM
} }
#endif #endif
} }
} else {
/* put/get fragment */
if (MCA_BTL_VADER_NONE != mca_btl_vader_component.single_copy_mechanism) {
(void) MCA_BTL_VADER_FRAG_ALLOC_RDMA(frag, endpoint);
} else {
(void) MCA_BTL_VADER_FRAG_ALLOC_USER(frag, endpoint);
}
if (OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
frag->segments[0].base.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
frag->segments[0].base.seg_len = total_size;
#if OPAL_BTL_VADER_HAVE_KNEM
if (MCA_BTL_VADER_KNEM == mca_btl_vader_component.single_copy_mechanism) {
struct knem_cmd_create_region knem_cr;
struct knem_cmd_param_iovec knem_iov;
knem_iov.base = (uintptr_t) data_ptr;
knem_iov.len = total_size;
knem_cr.iovec_array = (uintptr_t) &knem_iov;
knem_cr.iovec_nr = 1;
knem_cr.protection = PROT_READ;
/* Vader will explicitly destroy this cookie */
knem_cr.flags = 0;
if (OPAL_UNLIKELY(ioctl(mca_btl_vader.knem_fd, KNEM_CMD_CREATE_REGION, &knem_cr) < 0)) {
MCA_BTL_VADER_FRAG_RETURN(frag);
return NULL;
}
frag->segments[0].cookie = knem_cr.cookie;
frag->segments[0].registered_base = (intptr_t) data_ptr;
frag->cookie = knem_cr.cookie;
}
#endif /* OPAL_BTL_SM_HAVE_KNEM */
}
frag->base.order = order; frag->base.order = order;
frag->base.des_flags = flags; frag->base.des_flags = flags;
@ -583,6 +543,9 @@ static int vader_ft_event (int state)
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
#if OPAL_BTL_VADER_HAVE_KNEM
#endif
static void mca_btl_vader_endpoint_constructor (mca_btl_vader_endpoint_t *ep) static void mca_btl_vader_endpoint_constructor (mca_btl_vader_endpoint_t *ep)
{ {
OBJ_CONSTRUCT(&ep->pending_frags, opal_list_t); OBJ_CONSTRUCT(&ep->pending_frags, opal_list_t);

Просмотреть файл

@ -35,11 +35,10 @@
* @param descriptor (IN) Description of the data to be transferred * @param descriptor (IN) Description of the data to be transferred
*/ */
#if OPAL_BTL_VADER_HAVE_XPMEM #if OPAL_BTL_VADER_HAVE_XPMEM
int mca_btl_vader_put (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, int mca_btl_vader_put_xpmem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
void *local_address, uint64_t remote_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_registration_handle_t *local_handle, mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
mca_mpool_base_registration_t *reg; mca_mpool_base_registration_t *reg;
void *rem_ptr; void *rem_ptr;
@ -61,9 +60,10 @@ int mca_btl_vader_put (struct mca_btl_base_module_t *btl, struct mca_btl_base_en
#endif #endif
#if OPAL_BTL_VADER_HAVE_CMA #if OPAL_BTL_VADER_HAVE_CMA
int mca_btl_vader_put_cma (struct mca_btl_base_module_t *btl, int mca_btl_vader_put_cma (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des) mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
struct iovec src_iov = {.iov_base = local_address, .iov_len = size}; struct iovec src_iov = {.iov_base = local_address, .iov_len = size};
struct iovec dst_iov = {.iov_base = (void *)(intptr_t) remote_address, .iov_len = size}; struct iovec dst_iov = {.iov_base = (void *)(intptr_t) remote_address, .iov_len = size};
@ -76,36 +76,29 @@ int mca_btl_vader_put_cma (struct mca_btl_base_module_t *btl,
} }
/* always call the callback function */ /* always call the callback function */
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
frag->endpoint = endpoint;
mca_btl_vader_frag_complete (frag);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
#endif #endif
#if OPAL_BTL_VADER_HAVE_KNEM #if OPAL_BTL_VADER_HAVE_KNEM
int mca_btl_vader_put_knem (struct mca_btl_base_module_t *btl, int mca_btl_vader_put_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des) mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) des;
mca_btl_vader_segment_t *src = (mca_btl_vader_segment_t *) des->des_local;
mca_btl_vader_segment_t *dst = (mca_btl_vader_segment_t *) des->des_remote;
const size_t size = min(dst->base.seg_len, src->base.seg_len);
intptr_t offset = dst->base.seg_addr.lval - dst->registered_base;
struct knem_cmd_param_iovec send_iovec; struct knem_cmd_param_iovec send_iovec;
struct knem_cmd_inline_copy icopy; struct knem_cmd_inline_copy icopy;
/* Fill in the ioctl data fields. There's no async completion, so /* Fill in the ioctl data fields. There's no async completion, so
we don't need to worry about getting a slot, etc. */ we don't need to worry about getting a slot, etc. */
send_iovec.base = (uintptr_t) src->base.seg_addr.lval; send_iovec.base = (uintptr_t) local_address;
send_iovec.len = size; send_iovec.len = size;
icopy.local_iovec_array = (uintptr_t) &send_iovec; icopy.local_iovec_array = (uintptr_t) &send_iovec;
icopy.local_iovec_nr = 1; icopy.local_iovec_nr = 1;
icopy.remote_cookie = dst->cookie; icopy.remote_cookie = remote_handle->cookie;
icopy.remote_offset = offset; icopy.remote_offset = remote_address - remote_handle->base_addr;
icopy.write = 1; icopy.write = 1;
icopy.flags = 0; icopy.flags = 0;
@ -113,7 +106,7 @@ int mca_btl_vader_put_knem (struct mca_btl_base_module_t *btl,
* is greater than the cutoff. Not that if DMA is not supported * is greater than the cutoff. Not that if DMA is not supported
* or the user specified 0 for knem_dma_min the knem_dma_min was * or the user specified 0 for knem_dma_min the knem_dma_min was
* set to UINT_MAX in mca_btl_vader_knem_init. */ * set to UINT_MAX in mca_btl_vader_knem_init. */
if (mca_btl_vader_component.knem_dma_min <= dst->base.seg_len) { if (mca_btl_vader_component.knem_dma_min <= size) {
icopy.flags = KNEM_FLAG_DMA; icopy.flags = KNEM_FLAG_DMA;
} }
/* synchronous flags only, no need to specify icopy.async_status_index */ /* synchronous flags only, no need to specify icopy.async_status_index */
@ -129,10 +122,7 @@ int mca_btl_vader_put_knem (struct mca_btl_base_module_t *btl,
} }
/* always call the callback function */ /* always call the callback function */
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
frag->endpoint = endpoint;
mca_btl_vader_frag_complete (frag);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }

Просмотреть файл

@ -40,7 +40,7 @@ int mca_btl_vader_send (struct mca_btl_base_module_t *btl,
mca_btl_base_tag_t tag) mca_btl_base_tag_t tag)
{ {
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) descriptor; mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) descriptor;
const size_t total_size = frag->segments[0].base.seg_len; const size_t total_size = frag->segments[0].seg_len;
if (OPAL_LIKELY(frag->fbox)) { if (OPAL_LIKELY(frag->fbox)) {
mca_btl_vader_fbox_send (frag->fbox, tag); mca_btl_vader_fbox_send (frag->fbox, tag);

Просмотреть файл

@ -78,7 +78,7 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
frag->hdr->tag = tag; frag->hdr->tag = tag;
/* write the match header (with MPI comm/tag/etc. info) */ /* write the match header (with MPI comm/tag/etc. info) */
memcpy (frag->segments[0].base.seg_addr.pval, header, header_size); memcpy (frag->segments[0].seg_addr.pval, header, header_size);
/* write the message data if there is any */ /* write the message data if there is any */
/* we can't use single-copy semantics here since as caller will consider the send /* we can't use single-copy semantics here since as caller will consider the send
@ -88,7 +88,7 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
struct iovec iov; struct iovec iov;
/* pack the data into the supplied buffer */ /* pack the data into the supplied buffer */
iov.iov_base = (IOVBASE_TYPE *)((uintptr_t)frag->segments[0].base.seg_addr.pval + header_size); iov.iov_base = (IOVBASE_TYPE *)((uintptr_t)frag->segments[0].seg_addr.pval + header_size);
iov.iov_len = length = payload_size; iov.iov_len = length = payload_size;
(void) opal_convertor_pack (convertor, &iov, &iov_count, &length); (void) opal_convertor_pack (convertor, &iov, &iov_count, &length);