* implement RDMA put
* remove the recv fragment code, since it really isn't needed * handle memory descriptor binding a bit more sanely, and use thresholds so that Portals does the unlink for us, when it feels like it. This commit was SVN r6575.
Этот коммит содержится в:
родитель
7642e24de2
Коммит
0ee12467d6
@ -219,23 +219,23 @@ mca_btl_portals_register(struct mca_btl_base_module_t* btl,
|
||||
|
||||
|
||||
mca_btl_base_descriptor_t*
|
||||
mca_btl_portals_alloc(struct mca_btl_base_module_t* btl,
|
||||
mca_btl_portals_alloc(struct mca_btl_base_module_t* btl_base,
|
||||
size_t size)
|
||||
{
|
||||
mca_btl_portals_module_t* portals_btl = (mca_btl_portals_module_t*) btl;
|
||||
mca_btl_portals_module_t* btl = (mca_btl_portals_module_t*) btl_base;
|
||||
mca_btl_portals_frag_t* frag;
|
||||
int rc;
|
||||
|
||||
if (size <= btl->btl_eager_limit) {
|
||||
OMPI_BTL_PORTALS_FRAG_ALLOC_EAGER(portals_btl, frag, rc);
|
||||
if (size <= btl->super.btl_eager_limit) {
|
||||
OMPI_BTL_PORTALS_FRAG_ALLOC_EAGER(btl, frag, rc);
|
||||
frag->segment.seg_len =
|
||||
size <= btl->btl_eager_limit ?
|
||||
size : btl->btl_eager_limit ;
|
||||
size <= btl->super.btl_eager_limit ?
|
||||
size : btl->super.btl_eager_limit ;
|
||||
} else {
|
||||
OMPI_BTL_PORTALS_FRAG_ALLOC_MAX(portals_btl, frag, rc);
|
||||
OMPI_BTL_PORTALS_FRAG_ALLOC_MAX(btl, frag, rc);
|
||||
frag->segment.seg_len =
|
||||
size <= btl->btl_max_send_size ?
|
||||
size : btl->btl_max_send_size ;
|
||||
size <= btl->super.btl_max_send_size ?
|
||||
size : btl->super.btl_max_send_size ;
|
||||
}
|
||||
|
||||
frag->base.des_flags = 0;
|
||||
@ -265,60 +265,78 @@ mca_btl_portals_free(struct mca_btl_base_module_t* btl_base,
|
||||
}
|
||||
|
||||
|
||||
/* BWB - fix me - this needs to do RDMA when we get there... */
|
||||
mca_btl_base_descriptor_t*
|
||||
mca_btl_portals_prepare_src(struct mca_btl_base_module_t* btl,
|
||||
mca_btl_portals_prepare_src(struct mca_btl_base_module_t* btl_base,
|
||||
struct mca_btl_base_endpoint_t* peer,
|
||||
mca_mpool_base_registration_t* registration,
|
||||
struct ompi_convertor_t* convertor,
|
||||
size_t reserve,
|
||||
size_t* size)
|
||||
{
|
||||
mca_btl_portals_module_t* btl = (mca_btl_portals_module_t*) btl_base;
|
||||
mca_btl_portals_frag_t* frag;
|
||||
size_t max_data = *size;
|
||||
struct iovec iov;
|
||||
uint32_t iov_count = 1;
|
||||
int32_t free_after;
|
||||
int rc;
|
||||
int ret;
|
||||
|
||||
if (max_data+reserve <= btl->btl_eager_limit) {
|
||||
if (0 == reserve && 0 == ompi_convertor_need_buffers(convertor)) {
|
||||
/* we can send right out of the buffer (woo!). */
|
||||
|
||||
OMPI_BTL_PORTALS_FRAG_ALLOC_USER(&btl->super, frag, ret);
|
||||
if(NULL == frag){
|
||||
return NULL;
|
||||
}
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = NULL;
|
||||
|
||||
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data,
|
||||
&free_after);
|
||||
|
||||
frag->segment.seg_len = max_data;
|
||||
frag->segment.seg_addr.pval = iov.iov_base;
|
||||
|
||||
} else if (max_data+reserve <= btl->super.btl_eager_limit) {
|
||||
/*
|
||||
* if we aren't pinning the data and the requested size is less
|
||||
* than the eager limit pack into a fragment from the eager pool
|
||||
* if we can't send out of the buffer directly and the
|
||||
* requested size is less than the eager limit, pack into a
|
||||
* fragment from the eager pool
|
||||
*/
|
||||
OMPI_BTL_PORTALS_FRAG_ALLOC_EAGER(btl, frag, rc);
|
||||
OMPI_BTL_PORTALS_FRAG_ALLOC_EAGER(btl, frag, ret);
|
||||
if (NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve;
|
||||
rc = ompi_convertor_pack(convertor, &iov, &iov_count,
|
||||
ret = ompi_convertor_pack(convertor, &iov, &iov_count,
|
||||
&max_data, &free_after);
|
||||
*size = max_data;
|
||||
if (rc < 0) {
|
||||
if (ret < 0) {
|
||||
OMPI_BTL_PORTALS_FRAG_RETURN_EAGER(btl, frag);
|
||||
return NULL;
|
||||
}
|
||||
frag->segment.seg_len = max_data + reserve;
|
||||
|
||||
} else {
|
||||
/*
|
||||
* otherwise pack as much data as we can into a fragment
|
||||
* that is the max send size.
|
||||
*/
|
||||
OMPI_BTL_PORTALS_FRAG_ALLOC_MAX(btl, frag, rc);
|
||||
OMPI_BTL_PORTALS_FRAG_ALLOC_MAX(btl, frag, ret);
|
||||
if (NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
if (max_data + reserve > btl->btl_max_send_size){
|
||||
max_data = btl->btl_max_send_size - reserve;
|
||||
if (max_data + reserve > btl->super.btl_max_send_size){
|
||||
max_data = btl->super.btl_max_send_size - reserve;
|
||||
}
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve;
|
||||
rc = ompi_convertor_pack(convertor, &iov, &iov_count,
|
||||
ret = ompi_convertor_pack(convertor, &iov, &iov_count,
|
||||
&max_data, &free_after);
|
||||
*size = max_data;
|
||||
if ( rc < 0 ) {
|
||||
if ( ret < 0 ) {
|
||||
OMPI_BTL_PORTALS_FRAG_RETURN_MAX(btl, frag);
|
||||
return NULL;
|
||||
}
|
||||
@ -343,11 +361,74 @@ mca_btl_portals_prepare_dst(struct mca_btl_base_module_t* btl_base,
|
||||
size_t reserve,
|
||||
size_t* size)
|
||||
{
|
||||
/* BWB - FIXME - Implement prepare_dst */
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"Warning: call to unimplemented function prepare_dst");
|
||||
struct mca_btl_portals_module_t *btl =
|
||||
(struct mca_btl_portals_module_t *) btl_base;
|
||||
mca_btl_portals_frag_t* frag;
|
||||
ptl_md_t md;
|
||||
ptl_handle_me_t me_h;
|
||||
ptl_handle_md_t md_h;
|
||||
int ret;
|
||||
|
||||
OMPI_BTL_PORTALS_FRAG_ALLOC_USER(&btl->super, frag, ret);
|
||||
if(NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frag->segment.seg_len = *size;
|
||||
frag->segment.seg_addr.pval = convertor->pBaseBuf + convertor->bConverted;
|
||||
frag->segment.seg_key.key64 = OPAL_THREAD_ADD64(&(btl->portals_rdma_key), 1);
|
||||
|
||||
frag->base.des_src = NULL;
|
||||
frag->base.des_src_cnt = 0;
|
||||
frag->base.des_dst = &frag->segment;
|
||||
frag->base.des_dst_cnt = 1;
|
||||
frag->base.des_flags = 0;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"rdma match posted for frag 0x%x, callback 0x%x, bits %lld",
|
||||
frag, frag->base.des_cbfunc, frag->segment.seg_key.key64));
|
||||
|
||||
/* create a match entry */
|
||||
ret = PtlMEAttach(btl->portals_ni_h,
|
||||
OMPI_BTL_PORTALS_RDMA_TABLE_ID,
|
||||
peer->endpoint_ptl_id,
|
||||
frag->segment.seg_key.key64, /* match */
|
||||
0, /* ignore */
|
||||
PTL_UNLINK,
|
||||
PTL_INS_AFTER,
|
||||
&me_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"Error creating recv reject ME: %d", ret);
|
||||
OMPI_BTL_PORTALS_FRAG_RETURN_USER(&btl->super, frag);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* setup the memory descriptor. RDMA should never need to be
|
||||
retransmitted, so we set the threshold for the 2 events it will
|
||||
receive (PUT/GET START and END). No need to track the unlinks
|
||||
later :) */
|
||||
md.start = frag->segment.seg_addr.pval;
|
||||
md.length = frag->segment.seg_len;
|
||||
md.threshold = 2; /* unlink after START / END */
|
||||
md.max_size = 0;
|
||||
md.options = PTL_MD_OP_PUT | PTL_MD_OP_GET;
|
||||
md.user_ptr = frag; /* keep a pointer to ourselves */
|
||||
md.eq_handle = btl->portals_eq_handles[OMPI_BTL_PORTALS_EQ_RDMA];
|
||||
|
||||
ret = PtlMDAttach(me_h,
|
||||
md,
|
||||
PTL_UNLINK,
|
||||
&md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"Error creating recv reject ME: %d", ret);
|
||||
PtlMEUnlink(me_h);
|
||||
OMPI_BTL_PORTALS_FRAG_RETURN_USER(&btl->super, frag);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return &frag->base;
|
||||
}
|
||||
|
||||
|
||||
|
@ -117,6 +117,9 @@ struct mca_btl_portals_module_t {
|
||||
/* queued sends */
|
||||
opal_list_t portals_queued_sends;
|
||||
|
||||
/* key to use for next rdma operation */
|
||||
volatile int64_t portals_rdma_key;
|
||||
|
||||
/* our portals network interface */
|
||||
ptl_handle_ni_t portals_ni_h;
|
||||
/* the limits returned from PtlNIInit for interface */
|
||||
|
@ -165,7 +165,7 @@ mca_btl_portals_component_open(void)
|
||||
mca_btl_portals_module.super.btl_bandwidth =
|
||||
param_register_int("bandwidth", 1000);
|
||||
|
||||
mca_btl_portals_module.super.btl_flags = MCA_BTL_FLAGS_SEND;
|
||||
mca_btl_portals_module.super.btl_flags = MCA_BTL_FLAGS_RDMA;
|
||||
|
||||
bzero(&(mca_btl_portals_module.portals_reg),
|
||||
sizeof(mca_btl_portals_module.portals_reg));
|
||||
@ -202,6 +202,7 @@ mca_btl_portals_component_open(void)
|
||||
mca_btl_portals_module.portals_ni_h = PTL_INVALID_HANDLE;
|
||||
mca_btl_portals_module.portals_sr_dropped = 0;
|
||||
mca_btl_portals_module.portals_outstanding_sends = 0;
|
||||
mca_btl_portals_module.portals_rdma_key = 1;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -31,8 +31,6 @@ mca_btl_portals_frag_common_send_constructor(mca_btl_portals_frag_t* frag)
|
||||
frag->segment.seg_addr.pval = frag + 1;
|
||||
frag->segment.seg_len = frag->size;
|
||||
frag->segment.seg_key.key64 = 0;
|
||||
|
||||
frag->type = OMPI_BTL_PORTALS_FRAG_SEND;
|
||||
}
|
||||
|
||||
|
||||
@ -61,8 +59,6 @@ mca_btl_portals_frag_user_constructor(mca_btl_portals_frag_t* frag)
|
||||
frag->base.des_src = 0;
|
||||
frag->base.des_src_cnt = 0;
|
||||
frag->size = 0;
|
||||
|
||||
frag->type = OMPI_BTL_PORTALS_FRAG_SEND;
|
||||
}
|
||||
|
||||
|
||||
|
@ -22,37 +22,18 @@ extern "C" {
|
||||
#endif
|
||||
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_btl_portals_frag_t);
|
||||
|
||||
typedef enum {
|
||||
OMPI_BTL_PORTALS_FRAG_SEND,
|
||||
OMPI_BTL_PORTALS_FRAG_RECV
|
||||
} mca_btl_portals_frag_type_t;
|
||||
|
||||
struct mca_btl_portals_send_frag_t {
|
||||
struct mca_btl_portals_module_t *btl;
|
||||
struct mca_btl_base_endpoint_t *endpoint;
|
||||
mca_btl_base_header_t hdr;
|
||||
};
|
||||
typedef struct mca_btl_portals_send_frag_t mca_btl_portals_send_frag_t;
|
||||
|
||||
struct mca_btl_portals_recv_frag_t {
|
||||
struct mca_btl_portals_recv_chunk_t *chunk;
|
||||
};
|
||||
typedef struct mca_btl_portals_recv_frag_t mca_btl_portals_recv_frag_t;
|
||||
|
||||
|
||||
/**
|
||||
* PORTALS send fraportalsent derived type.
|
||||
* Portals send fragment derived type
|
||||
*/
|
||||
struct mca_btl_portals_frag_t {
|
||||
mca_btl_base_descriptor_t base;
|
||||
mca_btl_base_segment_t segment;
|
||||
mca_btl_portals_frag_type_t type;
|
||||
struct mca_btl_portals_module_t *btl;
|
||||
struct mca_btl_base_endpoint_t *endpoint;
|
||||
mca_btl_base_header_t hdr;
|
||||
size_t size;
|
||||
|
||||
union {
|
||||
mca_btl_portals_send_frag_t send_frag;
|
||||
mca_btl_portals_recv_frag_t recv_frag;
|
||||
} u;
|
||||
};
|
||||
typedef struct mca_btl_portals_frag_t mca_btl_portals_frag_t;
|
||||
OBJ_CLASS_DECLARATION(mca_btl_portals_frag_t);
|
||||
@ -76,45 +57,48 @@ OBJ_CLASS_DECLARATION(mca_btl_portals_frag_user_t);
|
||||
* free list(s).
|
||||
*/
|
||||
|
||||
#define OMPI_BTL_PORTALS_FRAG_ALLOC_EAGER(btl, frag, rc) \
|
||||
#define OMPI_BTL_PORTALS_FRAG_ALLOC_EAGER(btl_macro, frag, rc) \
|
||||
{ \
|
||||
\
|
||||
opal_list_item_t *item; \
|
||||
OMPI_FREE_LIST_WAIT(&((mca_btl_portals_module_t*)btl)->portals_frag_eager, item, rc); \
|
||||
OMPI_FREE_LIST_WAIT(&((mca_btl_portals_module_t*)btl_macro)->portals_frag_eager, item, rc); \
|
||||
frag = (mca_btl_portals_frag_t*) item; \
|
||||
frag->btl = (mca_btl_portals_module_t*) btl_macro; \
|
||||
}
|
||||
|
||||
#define OMPI_BTL_PORTALS_FRAG_RETURN_EAGER(btl, frag) \
|
||||
#define OMPI_BTL_PORTALS_FRAG_RETURN_EAGER(btl_macro, frag) \
|
||||
{ \
|
||||
OMPI_FREE_LIST_RETURN(&((mca_btl_portals_module_t*)btl)->portals_frag_eager, \
|
||||
OMPI_FREE_LIST_RETURN(&((mca_btl_portals_module_t*)btl_macro)->portals_frag_eager, \
|
||||
(opal_list_item_t*)(frag)); \
|
||||
}
|
||||
|
||||
#define OMPI_BTL_PORTALS_FRAG_ALLOC_MAX(btl, frag, rc) \
|
||||
#define OMPI_BTL_PORTALS_FRAG_ALLOC_MAX(btl_macro, frag, rc) \
|
||||
{ \
|
||||
\
|
||||
opal_list_item_t *item; \
|
||||
OMPI_FREE_LIST_WAIT(&((mca_btl_portals_module_t*)btl)->portals_frag_max, item, rc); \
|
||||
OMPI_FREE_LIST_WAIT(&((mca_btl_portals_module_t*)btl_macro)->portals_frag_max, item, rc); \
|
||||
frag = (mca_btl_portals_frag_t*) item; \
|
||||
frag->btl = (mca_btl_portals_module_t*) btl_macro; \
|
||||
}
|
||||
|
||||
#define OMPI_BTL_PORTALS_FRAG_RETURN_MAX(btl, frag) \
|
||||
#define OMPI_BTL_PORTALS_FRAG_RETURN_MAX(btl_macro, frag) \
|
||||
{ \
|
||||
OMPI_FREE_LIST_RETURN(&((mca_btl_portals_module_t*)btl)->portals_frag_max, \
|
||||
OMPI_FREE_LIST_RETURN(&((mca_btl_portals_module_t*)btl_macro)->portals_frag_max, \
|
||||
(opal_list_item_t*)(frag)); \
|
||||
}
|
||||
|
||||
|
||||
#define OMPI_BTL_PORTALS_FRAG_ALLOC_USER(btl, frag, rc) \
|
||||
#define OMPI_BTL_PORTALS_FRAG_ALLOC_USER(btl_macro, frag, rc) \
|
||||
{ \
|
||||
opal_list_item_t *item; \
|
||||
OMPI_FREE_LIST_WAIT(&((mca_btl_portals_module_t*)btl)->portals_frag_user, item, rc); \
|
||||
OMPI_FREE_LIST_WAIT(&((mca_btl_portals_module_t*)btl_macro)->portals_frag_user, item, rc); \
|
||||
frag = (mca_btl_portals_frag_t*) item; \
|
||||
frag->btl = (mca_btl_portals_module_t*) btl_macro; \
|
||||
}
|
||||
|
||||
#define OMPI_BTL_PORTALS_FRAG_RETURN_USER(btl, frag) \
|
||||
#define OMPI_BTL_PORTALS_FRAG_RETURN_USER(btl_macro, frag) \
|
||||
{ \
|
||||
OMPI_FREE_LIST_RETURN(&((mca_btl_portals_module_t*)btl)->portals_frag_user, \
|
||||
OMPI_FREE_LIST_RETURN(&((mca_btl_portals_module_t*)btl_macro)->portals_frag_user, \
|
||||
(opal_list_item_t*)(frag)); \
|
||||
}
|
||||
|
||||
|
@ -21,12 +21,121 @@
|
||||
|
||||
#include "btl_portals.h"
|
||||
#include "btl_portals_rdma.h"
|
||||
#include "btl_portals_frag.h"
|
||||
|
||||
|
||||
int
|
||||
mca_btl_portals_process_rdma(mca_btl_portals_module_t *module,
|
||||
mca_btl_portals_process_rdma(mca_btl_portals_module_t *btl,
|
||||
ptl_event_t *ev)
|
||||
{
|
||||
mca_btl_portals_frag_t *frag =
|
||||
(mca_btl_portals_frag_t*) ev->md.user_ptr;
|
||||
|
||||
switch (ev->type) {
|
||||
case PTL_EVENT_SEND_START:
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"rdma: PTL_EVENT_SEND_START for 0x%x",
|
||||
frag));
|
||||
|
||||
if (ev->ni_fail_type != PTL_NI_OK) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"Failure to start rdma send event\n");
|
||||
/* unlink, since we don't expect to get an end or ack */
|
||||
PtlMDUnlink(ev->md_handle);
|
||||
frag->base.des_cbfunc(&btl->super,
|
||||
frag->endpoint,
|
||||
&frag->base,
|
||||
OMPI_ERROR);
|
||||
}
|
||||
break;
|
||||
|
||||
case PTL_EVENT_SEND_END:
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"rdma: PTL_EVENT_SEND_END for 0x%x",
|
||||
frag));
|
||||
|
||||
if (ev->ni_fail_type != PTL_NI_OK) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"Failure to end rdma send event\n");
|
||||
/* unlink, since we don't expect to get an ack */
|
||||
PtlMDUnlink(ev->md_handle);
|
||||
frag->base.des_cbfunc(&btl->super,
|
||||
frag->endpoint,
|
||||
&frag->base,
|
||||
OMPI_ERROR);
|
||||
}
|
||||
break;
|
||||
|
||||
case PTL_EVENT_ACK:
|
||||
/* ok, this is the real work - the message has been received
|
||||
on the other side. */
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"rdma: PTL_EVENT_ACK for 0x%x, Ox%x",
|
||||
frag, frag->base.des_cbfunc));
|
||||
|
||||
if (ev->ni_fail_type != PTL_NI_OK) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"Failure in rdma send event ack\n");
|
||||
frag->base.des_cbfunc(&btl->super,
|
||||
frag->endpoint,
|
||||
&frag->base,
|
||||
OMPI_ERROR);
|
||||
} else {
|
||||
assert(ev->mlength == frag->segment.seg_len);
|
||||
|
||||
/* let the PML know we're done... */
|
||||
frag->base.des_cbfunc(&btl->super,
|
||||
frag->endpoint,
|
||||
&frag->base,
|
||||
OMPI_SUCCESS);
|
||||
}
|
||||
break;
|
||||
|
||||
case PTL_EVENT_PUT_START:
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"rdma: PTL_EVENT_PUT_START for 0x%x",
|
||||
frag));
|
||||
|
||||
if (ev->ni_fail_type != PTL_NI_OK) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"Failure in rdma put start\n");
|
||||
/* unlink, since we don't expect to get an end */
|
||||
PtlMDUnlink(ev->md_handle);
|
||||
frag->base.des_cbfunc(&btl->super,
|
||||
frag->endpoint,
|
||||
&frag->base,
|
||||
OMPI_ERROR);
|
||||
}
|
||||
break;
|
||||
|
||||
case PTL_EVENT_PUT_END:
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"rdma: PTL_EVENT_PUT_END for 0x%x, Ox%x",
|
||||
frag, frag->base.des_cbfunc));
|
||||
|
||||
if (ev->ni_fail_type != PTL_NI_OK) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"Failure in rdma put end\n");
|
||||
frag->base.des_cbfunc(&btl->super,
|
||||
frag->endpoint,
|
||||
&frag->base,
|
||||
OMPI_ERROR);
|
||||
} else {
|
||||
assert(ev->mlength == frag->segment.seg_len);
|
||||
#if 0
|
||||
/* let the PML know we're done... */
|
||||
frag->base.des_cbfunc(&btl->super,
|
||||
frag->endpoint,
|
||||
&frag->base,
|
||||
OMPI_SUCCESS);
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -34,11 +143,55 @@ mca_btl_portals_process_rdma(mca_btl_portals_module_t *module,
|
||||
int
|
||||
mca_btl_portals_put(struct mca_btl_base_module_t* btl_base,
|
||||
struct mca_btl_base_endpoint_t* btl_peer,
|
||||
struct mca_btl_base_descriptor_t* decriptor)
|
||||
struct mca_btl_base_descriptor_t* descriptor)
|
||||
{
|
||||
mca_btl_portals_frag_t *frag = (mca_btl_portals_frag_t*) descriptor;
|
||||
ptl_md_t md;
|
||||
ptl_handle_md_t md_h;
|
||||
int ret;
|
||||
|
||||
frag->endpoint = btl_peer;
|
||||
|
||||
/* setup the send */
|
||||
md.start = frag->segment.seg_addr.pval;
|
||||
md.length = frag->segment.seg_len;
|
||||
md.threshold = 3; /* unlink after start, end, ack */
|
||||
md.max_size = 0;
|
||||
md.options = 0;
|
||||
md.user_ptr = frag; /* keep a pointer to ourselves */
|
||||
md.eq_handle = frag->btl->portals_eq_handles[OMPI_BTL_PORTALS_EQ_RDMA];
|
||||
|
||||
/* make a free-floater */
|
||||
ret = PtlMDBind(frag->btl->portals_ni_h,
|
||||
md,
|
||||
PTL_UNLINK,
|
||||
&md_h);
|
||||
if (ret != PTL_OK) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"Warning: call to unimplemented function put()");
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
"PtlMDBind failed with error %d", ret);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"rdma put called for frag 0x%x, callback 0x%xbits %lld",
|
||||
frag, frag->base.des_cbfunc, frag->base.des_dst[0].seg_key.key64));
|
||||
|
||||
ret = PtlPut(md_h,
|
||||
PTL_ACK_REQ,
|
||||
btl_peer->endpoint_ptl_id,
|
||||
OMPI_BTL_PORTALS_RDMA_TABLE_ID,
|
||||
0, /* ac_index - not used*/
|
||||
frag->base.des_dst[0].seg_key.key64, /* match bits */
|
||||
0, /* remote offset - not used */
|
||||
frag->hdr.tag); /* hdr_data - tag */
|
||||
if (ret != PTL_OK) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"PtlPut failed with error %d", ret);
|
||||
PtlMDUnlink(md_h);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
@ -177,6 +177,7 @@ mca_btl_portals_process_recv(mca_btl_portals_module_t *btl,
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"Failure to start event\n");
|
||||
} else {
|
||||
/* increase reference count on the memory chunk */
|
||||
OPAL_THREAD_ADD32(&(chunk->pending), 1);
|
||||
}
|
||||
break;
|
||||
@ -188,7 +189,7 @@ mca_btl_portals_process_recv(mca_btl_portals_module_t *btl,
|
||||
if (ev->ni_fail_type != PTL_NI_OK) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"Failure to end event\n");
|
||||
OPAL_THREAD_ADD32(&(chunk->pending), -1);
|
||||
mca_btl_portals_return_chunk_part(btl, chunk);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
@ -196,9 +197,10 @@ mca_btl_portals_process_recv(mca_btl_portals_module_t *btl,
|
||||
OPAL_OUTPUT_VERBOSE((95, mca_btl_portals_component.portals_output,
|
||||
"received data for tag %d\n", tag));
|
||||
|
||||
/* it's a user, so we have to manually setup the segment */
|
||||
/* grab a user fragment (since memory is already allocated in
|
||||
as part of the chunk), fill in the right bits, and call the
|
||||
callback */
|
||||
OMPI_BTL_PORTALS_FRAG_ALLOC_USER(btl, frag, ret);
|
||||
frag->type = OMPI_BTL_PORTALS_FRAG_RECV;
|
||||
frag->size = ev->mlength;
|
||||
frag->base.des_dst = &frag->segment;
|
||||
frag->base.des_dst_cnt = 1;
|
||||
@ -207,9 +209,6 @@ mca_btl_portals_process_recv(mca_btl_portals_module_t *btl,
|
||||
|
||||
frag->segment.seg_addr.pval = (((char*) ev->md.start) + ev->offset);
|
||||
frag->segment.seg_len = frag->size;
|
||||
frag->segment.seg_key.key64 = 0;
|
||||
|
||||
frag->u.recv_frag.chunk = chunk;
|
||||
|
||||
if (ev->md.length - (ev->offset + ev->mlength) < ev->md.max_size) {
|
||||
/* the chunk is full. It's deactivated automagically, but we
|
||||
@ -224,8 +223,8 @@ mca_btl_portals_process_recv(mca_btl_portals_module_t *btl,
|
||||
tag,
|
||||
&frag->base,
|
||||
btl->portals_reg[tag].cbdata);
|
||||
mca_btl_portals_return_chunk_part(btl, frag);
|
||||
OMPI_BTL_PORTALS_FRAG_RETURN_USER(&btl->super, frag);
|
||||
mca_btl_portals_return_chunk_part(btl, chunk);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
@ -124,9 +124,8 @@ mca_btl_portals_activate_chunk(mca_btl_portals_recv_chunk_t *chunk)
|
||||
|
||||
static inline void
|
||||
mca_btl_portals_return_chunk_part(mca_btl_portals_module_t *btl,
|
||||
mca_btl_portals_frag_t *frag)
|
||||
mca_btl_portals_recv_chunk_t *chunk)
|
||||
{
|
||||
mca_btl_portals_recv_chunk_t *chunk = frag->u.recv_frag.chunk;
|
||||
int ret;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((100, mca_btl_portals_component.portals_output,
|
||||
|
@ -41,8 +41,10 @@ mca_btl_portals_process_send(mca_btl_portals_module_t *btl,
|
||||
if (ev->ni_fail_type != PTL_NI_OK) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"Failure to start send event\n");
|
||||
/* unlink, since we don't expect to get an end or ack */
|
||||
PtlMDUnlink(ev->md_handle);
|
||||
frag->base.des_cbfunc(&btl->super,
|
||||
frag->u.send_frag.endpoint,
|
||||
frag->endpoint,
|
||||
&frag->base,
|
||||
OMPI_ERROR);
|
||||
}
|
||||
@ -55,8 +57,10 @@ mca_btl_portals_process_send(mca_btl_portals_module_t *btl,
|
||||
if (ev->ni_fail_type != PTL_NI_OK) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"Failure to end send event\n");
|
||||
/* unlink, since we don't expect to get an ack */
|
||||
PtlMDUnlink(ev->md_handle);
|
||||
frag->base.des_cbfunc(&btl->super,
|
||||
frag->u.send_frag.endpoint,
|
||||
frag->endpoint,
|
||||
&frag->base,
|
||||
OMPI_ERROR);
|
||||
}
|
||||
@ -69,17 +73,17 @@ mca_btl_portals_process_send(mca_btl_portals_module_t *btl,
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"send: PTL_EVENT_ACK for 0x%x, Ox%x",
|
||||
frag, frag->base.des_cbfunc));
|
||||
|
||||
if (ev->ni_fail_type != PTL_NI_OK) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"Failure to end send event\n");
|
||||
"Failure in send event ack\n");
|
||||
frag->base.des_cbfunc(&btl->super,
|
||||
frag->u.send_frag.endpoint,
|
||||
frag->endpoint,
|
||||
&frag->base,
|
||||
OMPI_ERROR);
|
||||
} else if (0 == ev->mlength) {
|
||||
/* other side did not receive the message */
|
||||
|
||||
/* BWB - implement check for retransmit */
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"message was dropped. Adding to front of queue list");
|
||||
opal_list_prepend(&(btl->portals_queued_sends),
|
||||
@ -88,17 +92,12 @@ mca_btl_portals_process_send(mca_btl_portals_module_t *btl,
|
||||
} else {
|
||||
/* the other side received the message */
|
||||
OPAL_THREAD_ADD32(&btl->portals_outstanding_sends, -1);
|
||||
/* we're done with the md - return it. Do this before
|
||||
anything else in case the PML releases resources, then
|
||||
gets more resources (ie, what's currently in this
|
||||
md) */
|
||||
PtlMDUnlink(ev->md_handle);
|
||||
|
||||
assert(ev->mlength == frag->segment.seg_len);
|
||||
|
||||
/* let the PML know we're done... */
|
||||
frag->base.des_cbfunc(&btl->super,
|
||||
frag->u.send_frag.endpoint,
|
||||
frag->endpoint,
|
||||
&frag->base,
|
||||
OMPI_SUCCESS);
|
||||
|
||||
@ -131,13 +130,11 @@ mca_btl_portals_send(struct mca_btl_base_module_t* btl_base,
|
||||
int32_t num_sends;
|
||||
int ret;
|
||||
|
||||
frag->u.send_frag.endpoint = endpoint;
|
||||
frag->u.send_frag.hdr.tag = tag;
|
||||
frag->u.send_frag.btl = btl;
|
||||
frag->endpoint = endpoint;
|
||||
frag->hdr.tag = tag;
|
||||
|
||||
num_sends = OPAL_THREAD_ADD32(&btl->portals_outstanding_sends, 1);
|
||||
|
||||
/* BWB - implement check for too many pending messages */
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"send called for frag 0x%x, 0x%x",
|
||||
frag, frag->base.des_cbfunc));
|
||||
|
@ -33,14 +33,14 @@ mca_btl_portals_send_frag(mca_btl_portals_frag_t *frag)
|
||||
/* setup the send */
|
||||
md.start = frag->segment.seg_addr.pval;
|
||||
md.length = frag->segment.seg_len;
|
||||
md.threshold = PTL_MD_THRESH_INF; /* unlink based on protocol */
|
||||
md.threshold = 3; /* unlink after start, end, ack */
|
||||
md.max_size = 0;
|
||||
md.options = 0; /* BWB - can we optimize? */
|
||||
md.user_ptr = frag; /* keep a pointer to ourselves */
|
||||
md.eq_handle = frag->u.send_frag.btl->portals_eq_handles[OMPI_BTL_PORTALS_EQ_SEND];
|
||||
md.eq_handle = frag->btl->portals_eq_handles[OMPI_BTL_PORTALS_EQ_SEND];
|
||||
|
||||
/* make a free-floater */
|
||||
ret = PtlMDBind(frag->u.send_frag.btl->portals_ni_h,
|
||||
ret = PtlMDBind(frag->btl->portals_ni_h,
|
||||
md,
|
||||
PTL_UNLINK,
|
||||
&md_h);
|
||||
@ -52,12 +52,12 @@ mca_btl_portals_send_frag(mca_btl_portals_frag_t *frag)
|
||||
|
||||
ret = PtlPut(md_h,
|
||||
PTL_ACK_REQ,
|
||||
frag->u.send_frag.endpoint->endpoint_ptl_id,
|
||||
frag->endpoint->endpoint_ptl_id,
|
||||
OMPI_BTL_PORTALS_SEND_TABLE_ID,
|
||||
0, /* ac_index - not used*/
|
||||
frag->segment.seg_key.key64, /* match bits */
|
||||
0, /* remote offset - not used */
|
||||
frag->u.send_frag.hdr.tag); /* hdr_data - tag */
|
||||
frag->hdr.tag); /* hdr_data - tag */
|
||||
if (ret != PTL_OK) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"PtlPut failed with error %d", ret);
|
||||
@ -81,9 +81,9 @@ mca_btl_portals_progress_queued_sends(struct mca_btl_portals_module_t *btl)
|
||||
"retransmit for frag 0x%x, 0x%x",
|
||||
frag, frag->base.des_cbfunc));
|
||||
return mca_btl_portals_send(&btl->super,
|
||||
frag->u.send_frag.endpoint,
|
||||
frag->endpoint,
|
||||
&(frag->base),
|
||||
frag->u.send_frag.hdr.tag);
|
||||
frag->hdr.tag);
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user