* put rdma operations in the send event queue instead of receive because it's
easier to do event accounting that way * greatly increase receive event and buffer sizes. We're still about half of what Cray defaults to, so I don't feel bad about the increases * Implement a pre-pinning optimization for eager fragments - will be pinned on first use and left pinned for the life of the fragment * Since we can't have two receive frag callbacks fired at the same time, don't have receive free list - just keep one receive fragment in the module. Saves a big free list and all that interaction. This commit was SVN r9915.
Этот коммит содержится в:
родитель
db03ca0cc0
Коммит
dcc6b47fa2
@ -153,7 +153,7 @@ mca_btl_portals_add_procs(struct mca_btl_base_module_t* btl_base,
|
||||
/* fill in send memory descriptor */
|
||||
mca_btl_portals_module.md_send.start = NULL;
|
||||
mca_btl_portals_module.md_send.length = 0;
|
||||
mca_btl_portals_module.md_send.threshold = 2; /* send and ack */
|
||||
mca_btl_portals_module.md_send.threshold = PTL_MD_THRESH_INF;
|
||||
mca_btl_portals_module.md_send.max_size = 0;
|
||||
mca_btl_portals_module.md_send.options = PTL_MD_EVENT_START_DISABLE;
|
||||
mca_btl_portals_module.md_send.user_ptr = NULL;
|
||||
@ -163,9 +163,6 @@ mca_btl_portals_add_procs(struct mca_btl_base_module_t* btl_base,
|
||||
ret = OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
opal_output_verbose(50, mca_btl_portals_component.portals_output,
|
||||
"count: %d", mca_btl_portals_module.portals_num_procs);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -233,21 +230,20 @@ mca_btl_base_descriptor_t*
|
||||
mca_btl_portals_alloc(struct mca_btl_base_module_t* btl_base,
|
||||
size_t size)
|
||||
{
|
||||
mca_btl_portals_frag_t* frag;
|
||||
int rc;
|
||||
mca_btl_portals_frag_t* frag;
|
||||
|
||||
assert(&mca_btl_portals_module == (mca_btl_portals_module_t*) btl_base);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"alloc called with size %d", size));
|
||||
|
||||
if (size <= mca_btl_portals_module.super.btl_eager_limit) {
|
||||
OMPI_BTL_PORTALS_FRAG_ALLOC_EAGER(&mca_btl_portals_module, frag, rc);
|
||||
if (OMPI_SUCCESS != rc) return NULL;
|
||||
frag->segments[0].seg_len =
|
||||
size <= mca_btl_portals_module.super.btl_eager_limit ?
|
||||
size : mca_btl_portals_module.super.btl_eager_limit ;
|
||||
} else {
|
||||
OMPI_BTL_PORTALS_FRAG_ALLOC_MAX(&mca_btl_portals_module, frag, rc);
|
||||
if (OMPI_SUCCESS != rc) return NULL;
|
||||
frag->segments[0].seg_len =
|
||||
size <= mca_btl_portals_module.super.btl_max_send_size ?
|
||||
size : mca_btl_portals_module.super.btl_max_send_size ;
|
||||
@ -256,10 +252,6 @@ mca_btl_portals_alloc(struct mca_btl_base_module_t* btl_base,
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_flags = 0;
|
||||
|
||||
/* can't setup off an alloc right now - we don't know how much the
|
||||
caller will actually use */
|
||||
frag->md_h = PTL_INVALID_HANDLE;
|
||||
|
||||
return &frag->base;
|
||||
}
|
||||
|
||||
@ -272,19 +264,22 @@ mca_btl_portals_free(struct mca_btl_base_module_t* btl_base,
|
||||
|
||||
assert(&mca_btl_portals_module == (mca_btl_portals_module_t*) btl_base);
|
||||
|
||||
if (frag->md_h != PTL_INVALID_HANDLE) {
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"rdma frag free frag 0x%x, callback 0x%x, bits %lld",
|
||||
frag, frag->base.des_cbfunc, frag->segments[0].seg_key.key64));
|
||||
PtlMDUnlink(frag->md_h);
|
||||
}
|
||||
|
||||
if (frag->size == 0) {
|
||||
OMPI_BTL_PORTALS_FRAG_RETURN_USER(&mca_btl_portals_module.super, frag);
|
||||
} else if (frag->size == mca_btl_portals_module.super.btl_eager_limit){
|
||||
if (frag->size == mca_btl_portals_module.super.btl_eager_limit){
|
||||
/* don't ever unlink eager frags */
|
||||
OMPI_BTL_PORTALS_FRAG_RETURN_EAGER(&mca_btl_portals_module.super, frag);
|
||||
} else if (frag->size == mca_btl_portals_module.super.btl_max_send_size) {
|
||||
if (frag->md_h != PTL_INVALID_HANDLE) {
|
||||
PtlMDUnlink(frag->md_h);
|
||||
frag->md_h = PTL_INVALID_HANDLE;
|
||||
}
|
||||
OMPI_BTL_PORTALS_FRAG_RETURN_MAX(&mca_btl_portals_module.super, frag);
|
||||
} else if (frag->size == 0) {
|
||||
if (frag->md_h != PTL_INVALID_HANDLE) {
|
||||
PtlMDUnlink(frag->md_h);
|
||||
frag->md_h = PTL_INVALID_HANDLE;
|
||||
}
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_ops, -1);
|
||||
OMPI_BTL_PORTALS_FRAG_RETURN_USER(&mca_btl_portals_module.super, frag);
|
||||
} else {
|
||||
return OMPI_ERR_BAD_PARAM;
|
||||
}
|
||||
@ -310,77 +305,44 @@ mca_btl_portals_prepare_src(struct mca_btl_base_module_t* btl_base,
|
||||
|
||||
assert(&mca_btl_portals_module == (mca_btl_portals_module_t*) btl_base);
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"prepare_src called with size %d", *size));
|
||||
|
||||
if (0 != ompi_convertor_need_buffers(convertor)) {
|
||||
/* if we need to use buffers to pack the data, grab either an
|
||||
eager or (if we need more space) max buffer, pack the data
|
||||
into the first segment, and return */
|
||||
if (max_data+reserve <= mca_btl_portals_module.super.btl_eager_limit) {
|
||||
/*
|
||||
* if we can't send out of the buffer directly and the
|
||||
* requested size is less than the eager limit, pack into a
|
||||
* fragment from the eager pool
|
||||
*/
|
||||
OMPI_BTL_PORTALS_FRAG_ALLOC_EAGER(&mca_btl_portals_module, frag, ret);
|
||||
if (0 != reserve || 0 != ompi_convertor_need_buffers(convertor)) {
|
||||
frag = (mca_btl_portals_frag_t*)
|
||||
mca_btl_portals_alloc(btl_base, max_data + reserve);
|
||||
if (NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = (unsigned char*) frag->segments[0].seg_addr.pval + reserve;
|
||||
ret = ompi_convertor_pack(convertor, &iov, &iov_count,
|
||||
&max_data, &free_after);
|
||||
*size = max_data;
|
||||
if (ret < 0) {
|
||||
OMPI_BTL_PORTALS_FRAG_RETURN_EAGER(&mca_btl_portals_module, frag);
|
||||
return NULL;
|
||||
if (max_data + reserve > frag->size) {
|
||||
max_data = frag->size - reserve;
|
||||
}
|
||||
frag->segments[0].seg_len = max_data + reserve;
|
||||
frag->base.des_src_cnt = 1;
|
||||
|
||||
} else {
|
||||
/*
|
||||
* otherwise pack as much data as we can into a fragment
|
||||
* that is the max send size.
|
||||
*/
|
||||
OMPI_BTL_PORTALS_FRAG_ALLOC_MAX(&mca_btl_portals_module, frag, ret);
|
||||
if (NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
if (max_data + reserve > mca_btl_portals_module.super.btl_max_send_size){
|
||||
max_data = mca_btl_portals_module.super.btl_max_send_size - reserve;
|
||||
}
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = (unsigned char*) frag->segments[0].seg_addr.pval + reserve;
|
||||
ret = ompi_convertor_pack(convertor, &iov, &iov_count,
|
||||
&max_data, &free_after);
|
||||
*size = max_data;
|
||||
if ( ret < 0 ) {
|
||||
OMPI_BTL_PORTALS_FRAG_RETURN_MAX(&mca_btl_portals_module, frag);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frag->segments[0].seg_len = max_data + reserve;
|
||||
frag->base.des_src_cnt = 1;
|
||||
}
|
||||
|
||||
/* clearly a send - delay setup of memory descriptor until send */
|
||||
frag->md_h = PTL_INVALID_HANDLE;
|
||||
|
||||
|
||||
} else {
|
||||
/* no need to pack - we can send directly out of the user's
|
||||
buffer. If we have reserve space, use an eager fragment
|
||||
and give the caller the eager space as reserve. If we have
|
||||
no reserve space needs, use a user frag */
|
||||
if (0 == reserve) {
|
||||
/* no need to pack - rdma operation out of user's buffer */
|
||||
ptl_md_t md;
|
||||
ptl_handle_me_t me_h;
|
||||
|
||||
/* user frags are always setup to use only one fragment */
|
||||
/* reserve space in the event queue for rdma operations immediately */
|
||||
while (OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_ops, 1) >
|
||||
mca_btl_portals_module.portals_max_outstanding_ops) {
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_ops, -1);
|
||||
mca_btl_portals_component_progress();
|
||||
}
|
||||
|
||||
OMPI_BTL_PORTALS_FRAG_ALLOC_USER(&mca_btl_portals_module.super, frag, ret);
|
||||
if(NULL == frag){
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_ops, -1);
|
||||
return NULL;
|
||||
}
|
||||
iov.iov_len = max_data;
|
||||
@ -391,11 +353,11 @@ mca_btl_portals_prepare_src(struct mca_btl_base_module_t* btl_base,
|
||||
|
||||
frag->segments[0].seg_len = max_data;
|
||||
frag->segments[0].seg_addr.pval = iov.iov_base;
|
||||
frag->segments[0].seg_key.key64 = OPAL_THREAD_ADD64(&(mca_btl_portals_module.portals_rdma_key), 1);
|
||||
frag->segments[0].seg_key.key64 =
|
||||
OPAL_THREAD_ADD64(&(mca_btl_portals_module.portals_rdma_key), 1);
|
||||
frag->base.des_src_cnt = 1;
|
||||
|
||||
/* either a put or get. figure out which later */
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"rdma src posted for frag 0x%x, callback 0x%x, bits %lld",
|
||||
frag, frag->base.des_cbfunc, frag->segments[0].seg_key.key64));
|
||||
@ -413,20 +375,18 @@ mca_btl_portals_prepare_src(struct mca_btl_base_module_t* btl_base,
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"Error creating rdma src ME: %d", ret);
|
||||
OMPI_BTL_PORTALS_FRAG_RETURN_USER(&mca_btl_portals_module.super, frag);
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_ops, -1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* setup the memory descriptor. RDMA should never need to be
|
||||
retransmitted, so we set the threshold for the event it will
|
||||
receive (PUT/GET START and END). No need to track the unlinks
|
||||
later :) */
|
||||
/* setup the memory descriptor */
|
||||
md.start = frag->segments[0].seg_addr.pval;
|
||||
md.length = frag->segments[0].seg_len;
|
||||
md.threshold = PTL_MD_THRESH_INF;
|
||||
md.max_size = 0;
|
||||
md.options = PTL_MD_OP_PUT | PTL_MD_OP_GET | PTL_MD_EVENT_START_DISABLE;
|
||||
md.user_ptr = frag; /* keep a pointer to ourselves */
|
||||
md.eq_handle = mca_btl_portals_module.portals_eq_handles[OMPI_BTL_PORTALS_EQ];
|
||||
md.eq_handle = mca_btl_portals_module.portals_eq_handles[OMPI_BTL_PORTALS_EQ_SEND];
|
||||
|
||||
ret = PtlMDAttach(me_h,
|
||||
md,
|
||||
@ -437,39 +397,9 @@ mca_btl_portals_prepare_src(struct mca_btl_base_module_t* btl_base,
|
||||
"Error creating rdma src MD: %d", ret);
|
||||
PtlMEUnlink(me_h);
|
||||
OMPI_BTL_PORTALS_FRAG_RETURN_USER(&mca_btl_portals_module.super, frag);
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_ops, -1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
} else {
|
||||
OMPI_BTL_PORTALS_FRAG_ALLOC_EAGER(&mca_btl_portals_module, frag, ret);
|
||||
if (NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = NULL;
|
||||
ret = ompi_convertor_pack(convertor, &iov, &iov_count,
|
||||
&max_data, &free_after);
|
||||
|
||||
*size = max_data;
|
||||
if (ret < 0) {
|
||||
OMPI_BTL_PORTALS_FRAG_RETURN_EAGER(&mca_btl_portals_module, frag);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frag->segments[0].seg_len = reserve;
|
||||
frag->segments[1].seg_addr.pval = iov.iov_base;
|
||||
frag->segments[1].seg_len = max_data;
|
||||
frag->base.des_src_cnt = 2;
|
||||
|
||||
frag->iov[0].iov_base = frag->segments[0].seg_addr.pval;
|
||||
frag->iov[0].iov_len = frag->segments[0].seg_len;
|
||||
frag->iov[1].iov_base = frag->segments[1].seg_addr.pval;
|
||||
frag->iov[1].iov_len = frag->segments[1].seg_len;
|
||||
|
||||
/* clearly a send - delay setup of memory descriptor until send */
|
||||
frag->md_h = PTL_INVALID_HANDLE;
|
||||
}
|
||||
}
|
||||
|
||||
frag->base.des_src = frag->segments;
|
||||
@ -497,22 +427,29 @@ mca_btl_portals_prepare_dst(struct mca_btl_base_module_t* btl_base,
|
||||
|
||||
assert(&mca_btl_portals_module == (mca_btl_portals_module_t*) btl_base);
|
||||
|
||||
/* reserve space in the event queue for rdma operations immediately */
|
||||
while (OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_ops, 1) >
|
||||
mca_btl_portals_module.portals_max_outstanding_ops) {
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_ops, -1);
|
||||
mca_btl_portals_component_progress();
|
||||
}
|
||||
|
||||
OMPI_BTL_PORTALS_FRAG_ALLOC_USER(&mca_btl_portals_module.super, frag, ret);
|
||||
if(NULL == frag) {
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_ops, -1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ompi_ddt_type_lb(convertor->pDesc, &lb);
|
||||
frag->segments[0].seg_len = *size;
|
||||
frag->segments[0].seg_addr.pval = convertor->pBaseBuf + lb + convertor->bConverted;
|
||||
frag->segments[0].seg_key.key64 = OPAL_THREAD_ADD64(&(mca_btl_portals_module.portals_rdma_key), 1);
|
||||
|
||||
frag->segments[0].seg_key.key64 =
|
||||
OPAL_THREAD_ADD64(&(mca_btl_portals_module.portals_rdma_key), 1);
|
||||
frag->base.des_src = NULL;
|
||||
frag->base.des_src_cnt = 0;
|
||||
frag->base.des_dst = frag->segments;
|
||||
frag->base.des_dst_cnt = 1;
|
||||
frag->base.des_flags = 0;
|
||||
frag->type = mca_btl_portals_frag_type_rdma;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"rdma dest posted for frag 0x%x, callback 0x%x, bits %lld",
|
||||
@ -530,21 +467,19 @@ mca_btl_portals_prepare_dst(struct mca_btl_base_module_t* btl_base,
|
||||
if (PTL_OK != ret) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"Error creating rdma dest ME: %d", ret);
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_ops, -1);
|
||||
OMPI_BTL_PORTALS_FRAG_RETURN_USER(&mca_btl_portals_module.super, frag);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* setup the memory descriptor. RDMA should never need to be
|
||||
retransmitted, so we set the threshold for the event it will
|
||||
receive (PUT/GET START and END). No need to track the unlinks
|
||||
later :) */
|
||||
/* setup the memory descriptor. */
|
||||
md.start = frag->segments[0].seg_addr.pval;
|
||||
md.length = frag->segments[0].seg_len;
|
||||
md.threshold = PTL_MD_THRESH_INF;
|
||||
md.max_size = 0;
|
||||
md.options = PTL_MD_OP_PUT | PTL_MD_OP_GET | PTL_MD_EVENT_START_DISABLE;
|
||||
md.user_ptr = frag; /* keep a pointer to ourselves */
|
||||
md.eq_handle = mca_btl_portals_module.portals_eq_handles[OMPI_BTL_PORTALS_EQ];
|
||||
md.eq_handle = mca_btl_portals_module.portals_eq_handles[OMPI_BTL_PORTALS_EQ_SEND];
|
||||
|
||||
ret = PtlMDAttach(me_h,
|
||||
md,
|
||||
@ -554,6 +489,7 @@ mca_btl_portals_prepare_dst(struct mca_btl_base_module_t* btl_base,
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"Error creating rdma dest MD: %d", ret);
|
||||
PtlMEUnlink(me_h);
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_ops, -1);
|
||||
OMPI_BTL_PORTALS_FRAG_RETURN_USER(&mca_btl_portals_module.super, frag);
|
||||
return NULL;
|
||||
}
|
||||
@ -570,16 +506,10 @@ mca_btl_portals_finalize(struct mca_btl_base_module_t *btl_base)
|
||||
assert(&mca_btl_portals_module == (mca_btl_portals_module_t*) btl_base);
|
||||
|
||||
/* finalize all communication */
|
||||
while (mca_btl_portals_module.portals_outstanding_sends > 0) {
|
||||
while (mca_btl_portals_module.portals_outstanding_ops > 0) {
|
||||
mca_btl_portals_component_progress();
|
||||
}
|
||||
|
||||
if (0 != opal_list_get_size(&(mca_btl_portals_module.portals_queued_sends))) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"Warning: there were %d queued sends not sent",
|
||||
opal_list_get_size(&(mca_btl_portals_module.portals_queued_sends)));
|
||||
}
|
||||
|
||||
if (mca_btl_portals_module.portals_num_procs != 0) {
|
||||
int i;
|
||||
|
||||
@ -597,7 +527,10 @@ mca_btl_portals_finalize(struct mca_btl_base_module_t *btl_base)
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(&mca_btl_portals_module.portals_recv_blocks);
|
||||
OBJ_DESTRUCT(&mca_btl_portals_module.portals_queued_sends);
|
||||
OBJ_DESTRUCT(&mca_btl_portals_module.portals_recv_frag);
|
||||
OBJ_DESTRUCT(&mca_btl_portals_module.portals_frag_eager);
|
||||
OBJ_DESTRUCT(&mca_btl_portals_module.portals_frag_max);
|
||||
OBJ_DESTRUCT(&mca_btl_portals_module.portals_frag_user);
|
||||
|
||||
if (PTL_INVALID_HANDLE != mca_btl_portals_module.portals_ni_h) {
|
||||
ret = PtlNIFini(mca_btl_portals_module.portals_ni_h);
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include "orte/class/orte_proc_table.h"
|
||||
|
||||
#include "btl_portals_endpoint.h"
|
||||
#include "btl_portals_frag.h"
|
||||
|
||||
#define OMPI_BTL_PORTALS_SEND_TABLE_ID (OMPI_BTL_PORTALS_STARTING_TABLE_ID + 0)
|
||||
#define OMPI_BTL_PORTALS_RDMA_TABLE_ID (OMPI_BTL_PORTALS_STARTING_TABLE_ID + 1)
|
||||
@ -66,12 +67,15 @@ struct mca_btl_portals_component_t {
|
||||
int portals_free_list_max_num;
|
||||
/* numer of elements to grow free lists */
|
||||
int portals_free_list_inc_num;
|
||||
|
||||
/* number of eager fragments */
|
||||
int portals_free_list_eager_max_num;
|
||||
};
|
||||
typedef struct mca_btl_portals_component_t mca_btl_portals_component_t;
|
||||
|
||||
|
||||
#define OMPI_BTL_PORTALS_EQ_SEND 0
|
||||
#define OMPI_BTL_PORTALS_EQ 1
|
||||
#define OMPI_BTL_PORTALS_EQ_RECV 1
|
||||
#define OMPI_BTL_PORTALS_EQ_SIZE 2
|
||||
|
||||
struct mca_btl_portals_module_t {
|
||||
@ -89,13 +93,15 @@ struct mca_btl_portals_module_t {
|
||||
ompi_free_list_t portals_frag_eager;
|
||||
ompi_free_list_t portals_frag_max;
|
||||
ompi_free_list_t portals_frag_user;
|
||||
ompi_free_list_t portals_frag_recv;
|
||||
|
||||
/* incoming send message receive memory descriptors */
|
||||
int portals_recv_mds_num;
|
||||
int portals_recv_mds_size;
|
||||
opal_list_t portals_recv_blocks;
|
||||
|
||||
/* frag for receive callbacks */
|
||||
mca_btl_portals_frag_recv_t portals_recv_frag;
|
||||
|
||||
/* event queues. Keep sends on own eq, since we can't control
|
||||
space for the ack otherwise */
|
||||
int portals_eq_sizes[OMPI_BTL_PORTALS_EQ_SIZE];
|
||||
@ -104,11 +110,11 @@ struct mca_btl_portals_module_t {
|
||||
/* "reject" entry for recv match list */
|
||||
ptl_handle_me_t portals_recv_reject_me_h;
|
||||
|
||||
/* number outstanding sends */
|
||||
volatile int32_t portals_outstanding_sends;
|
||||
int32_t portals_max_outstanding_sends;
|
||||
/* number outstanding sends and local rdma */
|
||||
volatile int32_t portals_outstanding_ops;
|
||||
int32_t portals_max_outstanding_ops;
|
||||
|
||||
/* queued sends */
|
||||
/* sends queued until there's time to send */
|
||||
opal_list_t portals_queued_sends;
|
||||
|
||||
/* key to use for next rdma operation */
|
||||
|
@ -119,7 +119,7 @@ mca_btl_portals_add_procs_compat(struct mca_btl_portals_module_t* btl,
|
||||
int ret;
|
||||
|
||||
if (use_modex) {
|
||||
int my_rid;
|
||||
int my_rid = 0;
|
||||
ptl_process_id_t *info;
|
||||
char *nidmap = NULL;
|
||||
char *pidmap = NULL;
|
||||
|
@ -20,6 +20,7 @@
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <limits.h>
|
||||
#if OMPI_BTL_PORTALS_REDSTORM
|
||||
#include <catamount/cnos_mpi_os.h>
|
||||
#endif
|
||||
@ -91,11 +92,11 @@ mca_btl_portals_component_open(void)
|
||||
"Debugging verbosity (0 - 100)",
|
||||
false,
|
||||
false,
|
||||
OMPI_BTL_PORTALS_DEFAULT_DEBUG_LEVEL,
|
||||
0,
|
||||
&(portals_output_stream.lds_verbose_level));
|
||||
#if OMPI_BTL_PORTALS_REDSTORM
|
||||
asprintf(&(portals_output_stream.lds_prefix),
|
||||
"btl: portals (%2d): ", cnos_get_rank());
|
||||
"btl: portals (%5d): ", cnos_get_rank());
|
||||
#else
|
||||
asprintf(&(portals_output_stream.lds_prefix),
|
||||
"btl: portals (%5d): ", getpid());
|
||||
@ -118,22 +119,29 @@ mca_btl_portals_component_open(void)
|
||||
"Initial number of elements to initialize in free lists",
|
||||
false,
|
||||
false,
|
||||
OMPI_BTL_PORTALS_DEFAULT_FREE_LIST_INIT_NUM,
|
||||
16,
|
||||
&(mca_btl_portals_component.portals_free_list_init_num));
|
||||
mca_base_param_reg_int(&mca_btl_portals_component.super.btl_version,
|
||||
"free_list_max_num",
|
||||
"Max number of elements to initialize in free lists",
|
||||
false,
|
||||
false,
|
||||
OMPI_BTL_PORTALS_DEFAULT_FREE_LIST_MAX_NUM,
|
||||
1024,
|
||||
&(mca_btl_portals_component.portals_free_list_max_num));
|
||||
mca_base_param_reg_int(&mca_btl_portals_component.super.btl_version,
|
||||
"free_list_inc_num",
|
||||
"Increment count for free lists",
|
||||
false,
|
||||
false,
|
||||
OMPI_BTL_PORTALS_DEFAULT_FREE_LIST_INC_NUM,
|
||||
16,
|
||||
&(mca_btl_portals_component.portals_free_list_inc_num));
|
||||
mca_base_param_reg_int(&mca_btl_portals_component.super.btl_version,
|
||||
"eager_frag_limit",
|
||||
"Maximum number of pre-pinned eager fragments",
|
||||
false,
|
||||
false,
|
||||
32,
|
||||
&(mca_btl_portals_component.portals_free_list_eager_max_num));
|
||||
|
||||
/*
|
||||
* fill default module state
|
||||
@ -143,7 +151,7 @@ mca_btl_portals_component_open(void)
|
||||
"Maximum size for eager frag",
|
||||
false,
|
||||
false,
|
||||
OMPI_BTL_PORTALS_DEFAULT_EAGER_LIMIT,
|
||||
32 * 1024,
|
||||
&dummy);
|
||||
mca_btl_portals_module.super.btl_eager_limit = dummy;
|
||||
|
||||
@ -152,7 +160,7 @@ mca_btl_portals_component_open(void)
|
||||
"Minimum size for a send frag",
|
||||
false,
|
||||
false,
|
||||
OMPI_BTL_PORTALS_DEFAULT_MIN_SEND_SIZE,
|
||||
32 * 1024,
|
||||
&dummy);
|
||||
mca_btl_portals_module.super.btl_min_send_size = dummy;
|
||||
mca_base_param_reg_int(&mca_btl_portals_component.super.btl_version,
|
||||
@ -160,7 +168,7 @@ mca_btl_portals_component_open(void)
|
||||
"Maximum size for a send frag",
|
||||
false,
|
||||
false,
|
||||
OMPI_BTL_PORTALS_DEFAULT_MAX_SEND_SIZE,
|
||||
64 * 1024,
|
||||
&dummy);
|
||||
mca_btl_portals_module.super.btl_max_send_size = dummy;
|
||||
mca_base_param_reg_int(&mca_btl_portals_component.super.btl_version,
|
||||
@ -168,7 +176,7 @@ mca_btl_portals_component_open(void)
|
||||
"Minimum size for a rdma frag",
|
||||
false,
|
||||
false,
|
||||
OMPI_BTL_PORTALS_DEFAULT_MIN_RDMA_SIZE,
|
||||
64 * 1024,
|
||||
&dummy);
|
||||
mca_btl_portals_module.super.btl_min_rdma_size = dummy;
|
||||
mca_base_param_reg_int(&mca_btl_portals_component.super.btl_version,
|
||||
@ -176,7 +184,7 @@ mca_btl_portals_component_open(void)
|
||||
"Maximum size for a rdma frag",
|
||||
false,
|
||||
false,
|
||||
OMPI_BTL_PORTALS_DEFAULT_MAX_RDMA_SIZE,
|
||||
INT_MAX,
|
||||
&dummy);
|
||||
mca_btl_portals_module.super.btl_max_rdma_size = dummy;
|
||||
|
||||
@ -205,11 +213,10 @@ mca_btl_portals_component_open(void)
|
||||
&dummy);
|
||||
mca_btl_portals_module.super.btl_bandwidth = dummy;
|
||||
|
||||
#if 0 /* it appears that copying is faster than iovecs at present */
|
||||
mca_btl_portals_module.super.btl_flags = MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE;
|
||||
#else
|
||||
/* send in place actually increases our latency because we have to
|
||||
hold on to the buffer until we're done with it, rather than
|
||||
copy and send. So don't use it for now. */
|
||||
mca_btl_portals_module.super.btl_flags = MCA_BTL_FLAGS_RDMA;
|
||||
#endif
|
||||
|
||||
mca_btl_portals_module.portals_num_procs = 0;
|
||||
bzero(&(mca_btl_portals_module.portals_reg),
|
||||
@ -222,23 +229,23 @@ mca_btl_portals_component_open(void)
|
||||
/* eq handles will be created when the module is instantiated.
|
||||
Set sizes here */
|
||||
mca_base_param_reg_int(&mca_btl_portals_component.super.btl_version,
|
||||
"eq_size",
|
||||
"Size of the event queue",
|
||||
"eq_recv_size",
|
||||
"Size of the receive event queue",
|
||||
false,
|
||||
false,
|
||||
OMPI_BTL_PORTALS_DEFAULT_RECV_QUEUE_SIZE,
|
||||
&(mca_btl_portals_module.portals_eq_sizes[OMPI_BTL_PORTALS_EQ]));
|
||||
16 * 1024,
|
||||
&(mca_btl_portals_module.portals_eq_sizes[OMPI_BTL_PORTALS_EQ_RECV]));
|
||||
|
||||
mca_base_param_reg_int(&mca_btl_portals_component.super.btl_version,
|
||||
"eq_send_max_pending",
|
||||
"Maximum number of pending send frags",
|
||||
"max_pending_ops",
|
||||
"Maximum number of pending send/rdma frags",
|
||||
false,
|
||||
false,
|
||||
OMPI_BTL_PORTALS_MAX_SENDS_PENDING,
|
||||
&(mca_btl_portals_module.portals_max_outstanding_sends));
|
||||
/* sends_pending * 2 for end, ack */
|
||||
8 * 1024,
|
||||
&(mca_btl_portals_module.portals_max_outstanding_ops));
|
||||
/* ops_pending * 2 for end, ack */
|
||||
mca_btl_portals_module.portals_eq_sizes[OMPI_BTL_PORTALS_EQ_SEND] =
|
||||
mca_btl_portals_module.portals_max_outstanding_sends * 2;
|
||||
mca_btl_portals_module.portals_max_outstanding_ops * 2;
|
||||
|
||||
mca_btl_portals_module.portals_recv_reject_me_h = PTL_INVALID_HANDLE;
|
||||
|
||||
@ -247,19 +254,19 @@ mca_btl_portals_component_open(void)
|
||||
"Number of send frag receive descriptors",
|
||||
false,
|
||||
false,
|
||||
OMPI_BTL_PORTALS_DEFAULT_RECV_MD_NUM,
|
||||
3,
|
||||
&(mca_btl_portals_module.portals_recv_mds_num));
|
||||
mca_base_param_reg_int(&mca_btl_portals_component.super.btl_version,
|
||||
"recv_md_size",
|
||||
"Size of send frag receive descriptors",
|
||||
false,
|
||||
false,
|
||||
OMPI_BTL_PORTALS_DEFAULT_RECV_MD_SIZE,
|
||||
10 * 1024 * 1024,
|
||||
&(mca_btl_portals_module.portals_recv_mds_size));
|
||||
|
||||
mca_btl_portals_module.portals_ni_h = PTL_INVALID_HANDLE;
|
||||
mca_btl_portals_module.portals_sr_dropped = 0;
|
||||
mca_btl_portals_module.portals_outstanding_sends = 0;
|
||||
mca_btl_portals_module.portals_outstanding_ops = 0;
|
||||
mca_btl_portals_module.portals_rdma_key = 1;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
@ -315,7 +322,6 @@ mca_btl_portals_component_init(int *num_btls,
|
||||
OBJ_CONSTRUCT(&(mca_btl_portals_module.portals_frag_eager), ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&(mca_btl_portals_module.portals_frag_max), ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&(mca_btl_portals_module.portals_frag_user), ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&(mca_btl_portals_module.portals_frag_recv), ompi_free_list_t);
|
||||
|
||||
/* eager frags */
|
||||
ompi_free_list_init(&(mca_btl_portals_module.portals_frag_eager),
|
||||
@ -323,7 +329,7 @@ mca_btl_portals_component_init(int *num_btls,
|
||||
mca_btl_portals_module.super.btl_eager_limit,
|
||||
OBJ_CLASS(mca_btl_portals_frag_eager_t),
|
||||
mca_btl_portals_component.portals_free_list_init_num,
|
||||
mca_btl_portals_component.portals_free_list_max_num,
|
||||
mca_btl_portals_component.portals_free_list_eager_max_num,
|
||||
mca_btl_portals_component.portals_free_list_inc_num,
|
||||
NULL);
|
||||
|
||||
@ -347,19 +353,16 @@ mca_btl_portals_component_init(int *num_btls,
|
||||
NULL);
|
||||
|
||||
/* recv frags */
|
||||
ompi_free_list_init(&(mca_btl_portals_module.portals_frag_recv),
|
||||
sizeof(mca_btl_portals_frag_recv_t),
|
||||
OBJ_CLASS(mca_btl_portals_frag_recv_t),
|
||||
mca_btl_portals_component.portals_free_list_init_num,
|
||||
mca_btl_portals_component.portals_free_list_max_num,
|
||||
mca_btl_portals_component.portals_free_list_inc_num,
|
||||
NULL);
|
||||
OBJ_CONSTRUCT(&(mca_btl_portals_module.portals_recv_frag),
|
||||
mca_btl_portals_frag_recv_t);
|
||||
|
||||
/* receive block list */
|
||||
OBJ_CONSTRUCT(&(mca_btl_portals_module.portals_recv_blocks), opal_list_t);
|
||||
|
||||
/* pending sends */
|
||||
OBJ_CONSTRUCT(&(mca_btl_portals_module.portals_queued_sends), opal_list_t);
|
||||
/* list for send requests that have to be delayed */
|
||||
OBJ_CONSTRUCT(&(mca_btl_portals_module.portals_queued_sends),
|
||||
opal_list_t);
|
||||
|
||||
*num_btls = 1;
|
||||
|
||||
opal_output_verbose(20, mca_btl_portals_component.portals_output,
|
||||
@ -386,18 +389,9 @@ mca_btl_portals_component_progress(void)
|
||||
while (true) {
|
||||
ret = PtlEQPoll(mca_btl_portals_module.portals_eq_handles,
|
||||
OMPI_BTL_PORTALS_EQ_SIZE,
|
||||
#if OMPI_BTL_PORTALS_REDSTORM
|
||||
0, /* timeout */
|
||||
#else
|
||||
/* with a timeout of 0, the reference
|
||||
implementation seems to get really unhappy
|
||||
really fast when communication starts between
|
||||
all peers at the same time. Slowing things
|
||||
down a bit seems to help a bunch. */
|
||||
1, /* timeout */
|
||||
#endif
|
||||
&ev,
|
||||
&which);
|
||||
&ev, /* event structure to update */
|
||||
&which); /* which queue the event came from - we don't care */
|
||||
switch (ret) {
|
||||
case PTL_OK:
|
||||
frag = ev.md.user_ptr;
|
||||
@ -406,7 +400,6 @@ mca_btl_portals_component_progress(void)
|
||||
switch (ev.type) {
|
||||
case PTL_EVENT_GET_START:
|
||||
/* generated on source (target) when a get from memory starts */
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((900, mca_btl_portals_component.portals_output,
|
||||
"PTL_EVENT_GET_START for 0x%x, %d",
|
||||
frag, (int) ev.hdr_data));
|
||||
@ -415,7 +408,6 @@ mca_btl_portals_component_progress(void)
|
||||
|
||||
case PTL_EVENT_GET_END:
|
||||
/* generated on source (target) when a get from memory ends */
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((900, mca_btl_portals_component.portals_output,
|
||||
"PTL_EVENT_GET_END for 0x%x, %d",
|
||||
frag, (int) ev.hdr_data));
|
||||
@ -424,7 +416,6 @@ mca_btl_portals_component_progress(void)
|
||||
|
||||
case PTL_EVENT_PUT_START:
|
||||
/* generated on destination (target) when a put into memory starts */
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((900, mca_btl_portals_component.portals_output,
|
||||
"PTL_EVENT_PUT_START for 0x%x, %d",
|
||||
frag, (int) ev.hdr_data));
|
||||
@ -446,7 +437,6 @@ mca_btl_portals_component_progress(void)
|
||||
|
||||
case PTL_EVENT_PUT_END:
|
||||
/* generated on destination (target) when a put into memory ends */
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((900, mca_btl_portals_component.portals_output,
|
||||
"PTL_EVENT_PUT_END for 0x%x, %d",
|
||||
frag, (int) ev.hdr_data));
|
||||
@ -465,13 +455,15 @@ mca_btl_portals_component_progress(void)
|
||||
block = ev.md.user_ptr;
|
||||
tag = ev.hdr_data;
|
||||
|
||||
OMPI_BTL_PORTALS_FRAG_ALLOC_RECV(&mca_btl_portals_module, frag, ret);
|
||||
/* if we ever make this thread hot, need to do
|
||||
something with the receive fragments */
|
||||
frag = &mca_btl_portals_module.portals_recv_frag;
|
||||
frag->segments[0].seg_addr.pval = (((char*) ev.md.start) + ev.offset);
|
||||
frag->segments[0].seg_len = ev.mlength;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"received send fragment %x (thresh: %d)",
|
||||
frag, ev.md.threshold));
|
||||
"received send fragment %x (thresh: %d, length %d)",
|
||||
frag, ev.md.threshold, (int) ev.mlength));
|
||||
|
||||
if (ev.md.length - (ev.offset + ev.mlength) < ev.md.max_size ||
|
||||
ev.md.threshold == 1) {
|
||||
@ -491,8 +483,6 @@ mca_btl_portals_component_progress(void)
|
||||
tag,
|
||||
&frag->base,
|
||||
mca_btl_portals_module.portals_reg[tag].cbdata);
|
||||
OMPI_BTL_PORTALS_FRAG_RETURN_RECV(&mca_btl_portals_module.super,
|
||||
frag);
|
||||
mca_btl_portals_return_block_part(&mca_btl_portals_module, block);
|
||||
}
|
||||
break;
|
||||
@ -502,8 +492,8 @@ mca_btl_portals_component_progress(void)
|
||||
returning data */
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((900, mca_btl_portals_component.portals_output,
|
||||
"PTL_EVENT_REPLY_START for 0x%x, %d, %d",
|
||||
frag, (int) frag->type, (int) ev.hdr_data));
|
||||
"PTL_EVENT_REPLY_START for 0x%x, %d",
|
||||
frag, (int) ev.hdr_data));
|
||||
|
||||
break;
|
||||
|
||||
@ -512,8 +502,7 @@ mca_btl_portals_component_progress(void)
|
||||
done returning data */
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"PTL_EVENT_REPLY_END for 0x%x, %d",
|
||||
frag, (int) frag->type));
|
||||
"PTL_EVENT_REPLY_END for 0x%x", frag));
|
||||
|
||||
/* let the PML know we're done */
|
||||
frag->base.des_cbfunc(&mca_btl_portals_module.super,
|
||||
@ -528,18 +517,12 @@ mca_btl_portals_component_progress(void)
|
||||
|
||||
#if OMPI_ENABLE_DEBUG
|
||||
OPAL_OUTPUT_VERBOSE((900, mca_btl_portals_component.portals_output,
|
||||
"PTL_EVENT_SEND_START for 0x%x, %d, %d",
|
||||
frag, (int) frag->type, (int) ev.hdr_data));
|
||||
"PTL_EVENT_SEND_START for 0x%x, %d",
|
||||
frag, (int) ev.hdr_data));
|
||||
|
||||
if (ev.ni_fail_type != PTL_NI_OK) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"Failure to start send event\n");
|
||||
if (ev.hdr_data < MCA_BTL_TAG_MAX) {
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_sends,
|
||||
-1);
|
||||
/* unlink, since we don't expect to get an end or ack */
|
||||
}
|
||||
PtlMDUnlink(ev.md_handle);
|
||||
frag->base.des_cbfunc(&mca_btl_portals_module.super,
|
||||
frag->endpoint,
|
||||
&frag->base,
|
||||
@ -552,18 +535,12 @@ mca_btl_portals_component_progress(void)
|
||||
/* generated on source (origin) when put stops sending */
|
||||
#if OMPI_ENABLE_DEBUG
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"PTL_EVENT_SEND_END for 0x%x, %d, %d",
|
||||
frag, (int) frag->type, (int) ev.hdr_data));
|
||||
"PTL_EVENT_SEND_END for 0x%x, %d",
|
||||
frag, (int) ev.hdr_data));
|
||||
|
||||
if (ev.ni_fail_type != PTL_NI_OK) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"Failure to end send event\n");
|
||||
if (ev.hdr_data < MCA_BTL_TAG_MAX) {
|
||||
/* unlink, since we don't expect to get an ack */
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_sends,
|
||||
-1);
|
||||
PtlMDUnlink(ev.md_handle);
|
||||
}
|
||||
frag->base.des_cbfunc(&mca_btl_portals_module.super,
|
||||
frag->endpoint,
|
||||
&frag->base,
|
||||
@ -580,20 +557,12 @@ mca_btl_portals_component_progress(void)
|
||||
Requeue the put on badness */
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"PTL_EVENT_ACK for 0x%x, %d",
|
||||
frag, (int) frag->type));
|
||||
|
||||
if (frag->type == mca_btl_portals_frag_type_send) {
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_sends,
|
||||
-1);
|
||||
}
|
||||
"PTL_EVENT_ACK for 0x%x", frag));
|
||||
|
||||
#if OMPI_ENABLE_DEBUG
|
||||
if (ev.ni_fail_type != PTL_NI_OK) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"Failure to ack event\n");
|
||||
/* unlink, since we don't expect to get an ack */
|
||||
PtlMDUnlink(ev.md_handle);
|
||||
frag->base.des_cbfunc(&mca_btl_portals_module.super,
|
||||
frag->endpoint,
|
||||
&frag->base,
|
||||
@ -608,14 +577,15 @@ mca_btl_portals_component_progress(void)
|
||||
buffer space available for receiving */
|
||||
opal_output_verbose(50,
|
||||
mca_btl_portals_component.portals_output,
|
||||
"message was dropped. Adding to front of queue list");
|
||||
opal_list_prepend(&(mca_btl_portals_module.portals_queued_sends),
|
||||
(opal_list_item_t*) frag);
|
||||
"message was dropped. Trying again");
|
||||
|
||||
mca_btl_portals_send(&mca_btl_portals_module.super,
|
||||
frag->endpoint,
|
||||
&frag->base,
|
||||
frag->hdr.tag);
|
||||
} else {
|
||||
/* other side received the message. should have
|
||||
received entire thing */
|
||||
|
||||
/* let the PML know we're done */
|
||||
frag->base.des_cbfunc(&mca_btl_portals_module.super,
|
||||
frag->endpoint,
|
||||
@ -623,7 +593,11 @@ mca_btl_portals_component_progress(void)
|
||||
OMPI_SUCCESS);
|
||||
}
|
||||
|
||||
if (frag->type == mca_btl_portals_frag_type_send) {
|
||||
opal_output_verbose(50, mca_btl_portals_component.portals_output, "fuck");
|
||||
|
||||
if (0 != frag->size) {
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_ops,
|
||||
-1);
|
||||
MCA_BTL_PORTALS_PROGRESS_QUEUED_SENDS();
|
||||
}
|
||||
|
||||
|
@ -34,9 +34,7 @@ mca_btl_portals_frag_common_send_constructor(mca_btl_portals_frag_t* frag)
|
||||
frag->segments[0].seg_len = frag->size;
|
||||
frag->segments[0].seg_key.key64 = 0;
|
||||
|
||||
frag->segments[1].seg_addr.pval = 0;
|
||||
frag->segments[1].seg_len = 0;
|
||||
frag->segments[1].seg_key.key64 = 0;
|
||||
frag->md_h = PTL_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
|
||||
@ -48,6 +46,16 @@ mca_btl_portals_frag_eager_constructor(mca_btl_portals_frag_t* frag)
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
mca_btl_portals_frag_eager_destructor(mca_btl_portals_frag_t* frag)
|
||||
{
|
||||
if (PTL_INVALID_HANDLE == frag->md_h) {
|
||||
PtlMDUnlink(frag->md_h);
|
||||
frag->md_h = PTL_INVALID_HANDLE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
mca_btl_portals_frag_max_constructor(mca_btl_portals_frag_t* frag)
|
||||
{
|
||||
@ -76,7 +84,6 @@ mca_btl_portals_frag_recv_constructor(mca_btl_portals_frag_t* frag)
|
||||
frag->base.des_src = NULL;
|
||||
frag->base.des_src_cnt = 0;
|
||||
frag->size = 0;
|
||||
frag->type = mca_btl_portals_frag_type_recv;
|
||||
}
|
||||
|
||||
|
||||
@ -90,7 +97,7 @@ OBJ_CLASS_INSTANCE(
|
||||
mca_btl_portals_frag_eager_t,
|
||||
mca_btl_base_descriptor_t,
|
||||
mca_btl_portals_frag_eager_constructor,
|
||||
NULL);
|
||||
mca_btl_portals_frag_eager_destructor);
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
mca_btl_portals_frag_max_t,
|
||||
|
@ -30,16 +30,15 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_btl_portals_frag_t);
|
||||
*/
|
||||
struct mca_btl_portals_frag_t {
|
||||
mca_btl_base_descriptor_t base;
|
||||
mca_btl_base_segment_t segments[2];
|
||||
ptl_md_iovec_t iov[2];
|
||||
mca_btl_base_segment_t segments[1];
|
||||
/* needed for retransmit case */
|
||||
struct mca_btl_base_endpoint_t *endpoint;
|
||||
/* needed for retransmit case */
|
||||
mca_btl_base_header_t hdr;
|
||||
enum { mca_btl_portals_frag_type_send,
|
||||
mca_btl_portals_frag_type_recv,
|
||||
mca_btl_portals_frag_type_rdma} type;
|
||||
/* handle to use for communication */
|
||||
ptl_handle_md_t md_h;
|
||||
/* size of the allocated memory region -- not the amount of data
|
||||
we need to send */
|
||||
size_t size;
|
||||
|
||||
};
|
||||
@ -59,6 +58,7 @@ OBJ_CLASS_DECLARATION(mca_btl_portals_frag_user_t);
|
||||
typedef struct mca_btl_portals_frag_t mca_btl_portals_frag_recv_t;
|
||||
OBJ_CLASS_DECLARATION(mca_btl_portals_frag_recv_t);
|
||||
|
||||
|
||||
/*
|
||||
* Macros to allocate/return descriptors from module specific
|
||||
* free list(s).
|
||||
@ -67,10 +67,14 @@ OBJ_CLASS_DECLARATION(mca_btl_portals_frag_recv_t);
|
||||
{ \
|
||||
\
|
||||
opal_list_item_t *item; \
|
||||
OMPI_FREE_LIST_WAIT(&((mca_btl_portals_module_t*)btl_macro)->portals_frag_eager, item, rc); \
|
||||
OMPI_FREE_LIST_GET(&((mca_btl_portals_module_t*)btl_macro)->portals_frag_eager, item, rc); \
|
||||
if (rc == OMPI_ERR_TEMP_OUT_OF_RESOURCE) { \
|
||||
OMPI_BTL_PORTALS_FRAG_ALLOC_MAX(btl_macro, frag, rc); \
|
||||
} \
|
||||
frag = (mca_btl_portals_frag_t*) item; \
|
||||
}
|
||||
|
||||
|
||||
#define OMPI_BTL_PORTALS_FRAG_RETURN_EAGER(btl_macro, frag) \
|
||||
{ \
|
||||
OMPI_FREE_LIST_RETURN(&((mca_btl_portals_module_t*)btl_macro)->portals_frag_eager, \
|
||||
@ -86,6 +90,7 @@ OBJ_CLASS_DECLARATION(mca_btl_portals_frag_recv_t);
|
||||
frag = (mca_btl_portals_frag_t*) item; \
|
||||
}
|
||||
|
||||
|
||||
#define OMPI_BTL_PORTALS_FRAG_RETURN_MAX(btl_macro, frag) \
|
||||
{ \
|
||||
OMPI_FREE_LIST_RETURN(&((mca_btl_portals_module_t*)btl_macro)->portals_frag_max, \
|
||||
@ -100,6 +105,7 @@ OBJ_CLASS_DECLARATION(mca_btl_portals_frag_recv_t);
|
||||
frag = (mca_btl_portals_frag_t*) item; \
|
||||
}
|
||||
|
||||
|
||||
#define OMPI_BTL_PORTALS_FRAG_RETURN_USER(btl_macro, frag) \
|
||||
{ \
|
||||
OMPI_FREE_LIST_RETURN(&((mca_btl_portals_module_t*)btl_macro)->portals_frag_user, \
|
||||
@ -107,21 +113,6 @@ OBJ_CLASS_DECLARATION(mca_btl_portals_frag_recv_t);
|
||||
}
|
||||
|
||||
|
||||
#define OMPI_BTL_PORTALS_FRAG_ALLOC_RECV(btl_macro, frag, rc) \
|
||||
{ \
|
||||
opal_list_item_t *item; \
|
||||
OMPI_FREE_LIST_WAIT(&((mca_btl_portals_module_t*)btl_macro)->portals_frag_recv, item, rc); \
|
||||
frag = (mca_btl_portals_frag_t*) item; \
|
||||
}
|
||||
|
||||
#define OMPI_BTL_PORTALS_FRAG_RETURN_RECV(btl_macro, frag) \
|
||||
{ \
|
||||
OMPI_FREE_LIST_RETURN(&((mca_btl_portals_module_t*)btl_macro)->portals_frag_recv, \
|
||||
(opal_list_item_t*)(frag)); \
|
||||
}
|
||||
|
||||
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
@ -42,7 +42,6 @@ mca_btl_portals_put(struct mca_btl_base_module_t* btl_base,
|
||||
|
||||
frag->endpoint = btl_peer;
|
||||
frag->hdr.tag = MCA_BTL_TAG_MAX;
|
||||
frag->type = mca_btl_portals_frag_type_rdma;
|
||||
|
||||
/* setup the send */
|
||||
assert(1 == frag->base.des_src_cnt);
|
||||
@ -58,7 +57,6 @@ mca_btl_portals_put(struct mca_btl_base_module_t* btl_base,
|
||||
if (ret != PTL_OK) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"PtlPut failed with error %d", ret);
|
||||
PtlMDUnlink(frag->md_h);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
@ -83,7 +81,6 @@ mca_btl_portals_get(struct mca_btl_base_module_t* btl_base,
|
||||
|
||||
frag->endpoint = btl_peer;
|
||||
frag->hdr.tag = MCA_BTL_TAG_MAX;
|
||||
frag->type = mca_btl_portals_frag_type_rdma;
|
||||
|
||||
ret = PtlGet(frag->md_h,
|
||||
*((mca_btl_base_endpoint_t*) btl_peer),
|
||||
@ -94,7 +91,6 @@ mca_btl_portals_get(struct mca_btl_base_module_t* btl_base,
|
||||
if (ret != PTL_OK) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"PtlGet failed with error %d", ret);
|
||||
PtlMDUnlink(frag->md_h);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
|
@ -96,12 +96,12 @@ mca_btl_portals_activate_block(mca_btl_portals_recv_block_t *block)
|
||||
md.length = block->length;
|
||||
/* try to throttle incoming sends so that we don't overrun the incoming
|
||||
queue size */
|
||||
md.threshold = mca_btl_portals_module.portals_eq_sizes[OMPI_BTL_PORTALS_EQ] /
|
||||
md.threshold = mca_btl_portals_module.portals_eq_sizes[OMPI_BTL_PORTALS_EQ_RECV] /
|
||||
(mca_btl_portals_module.portals_recv_mds_num * 2);
|
||||
md.max_size = block->btl->super.btl_max_send_size;
|
||||
md.options = PTL_MD_OP_PUT | PTL_MD_MAX_SIZE;
|
||||
md.user_ptr = block;
|
||||
md.eq_handle = block->btl->portals_eq_handles[OMPI_BTL_PORTALS_EQ];
|
||||
md.eq_handle = block->btl->portals_eq_handles[OMPI_BTL_PORTALS_EQ_RECV];
|
||||
|
||||
block->pending = 0;
|
||||
block->full = false;
|
||||
|
@ -35,95 +35,70 @@ mca_btl_portals_send(struct mca_btl_base_module_t* btl_base,
|
||||
mca_btl_base_tag_t tag)
|
||||
{
|
||||
mca_btl_portals_frag_t *frag = (mca_btl_portals_frag_t*) descriptor;
|
||||
int32_t num_sends;
|
||||
int ret;
|
||||
|
||||
assert(&mca_btl_portals_module == (mca_btl_portals_module_t*) btl_base);
|
||||
assert(frag->md_h == PTL_INVALID_HANDLE);
|
||||
|
||||
frag->endpoint = endpoint;
|
||||
frag->hdr.tag = tag;
|
||||
frag->type = mca_btl_portals_frag_type_send;
|
||||
|
||||
num_sends = OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_sends, 1);
|
||||
|
||||
/* make sure that we have enough space to send. This means that
|
||||
there is enough space in the event queue for all the events
|
||||
that may be deposited by outstanding sends */
|
||||
if (num_sends >= mca_btl_portals_module.portals_max_outstanding_sends) {
|
||||
opal_output_verbose(50, mca_btl_portals_component.portals_output,
|
||||
"no space for message 0x%x. Adding to back of queue",
|
||||
frag);
|
||||
opal_list_append(&(mca_btl_portals_module.portals_queued_sends),
|
||||
(opal_list_item_t*) frag);
|
||||
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_sends, -1);
|
||||
|
||||
ret = OMPI_SUCCESS;
|
||||
} else {
|
||||
int ret;
|
||||
ptl_handle_md_t md_h;
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"PtlPut (send) fragment %x", frag));
|
||||
|
||||
/* setup the send */
|
||||
if (1 == frag->base.des_src_cnt) {
|
||||
mca_btl_portals_module.md_send.start = frag->segments[0].seg_addr.pval;
|
||||
mca_btl_portals_module.md_send.length = frag->segments[0].seg_len;
|
||||
mca_btl_portals_module.md_send.options = PTL_MD_EVENT_START_DISABLE;
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"fragment info:\n"
|
||||
"\tstart: 0x%x\n"
|
||||
"\tlen: %d",
|
||||
frag->segments[0].seg_addr.pval,
|
||||
frag->segments[0].seg_len));
|
||||
} else {
|
||||
assert(2 == frag->base.des_src_cnt);
|
||||
mca_btl_portals_module.md_send.start = frag->iov;
|
||||
mca_btl_portals_module.md_send.length = 2;
|
||||
mca_btl_portals_module.md_send.options =
|
||||
PTL_MD_EVENT_START_DISABLE | PTL_MD_IOVEC;
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"fragment info:\n"
|
||||
"\tiov[0].iov_base: 0x%x\n"
|
||||
"\tiov[0].iov_len: %d\n"
|
||||
"\tiov[1].iov_base: 0x%x\n"
|
||||
"\tiov[1].iov_len: %d",
|
||||
frag->iov[0].iov_base,
|
||||
frag->iov[0].iov_len,
|
||||
frag->iov[1].iov_base,
|
||||
frag->iov[1].iov_len));
|
||||
if (OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_ops, 1) >
|
||||
mca_btl_portals_module.portals_max_outstanding_ops) {
|
||||
/* no space - queue and continute */
|
||||
opal_output_verbose(50, mca_btl_portals_component.portals_output,
|
||||
"no space for message 0x%x. Adding to back of queue",
|
||||
frag);
|
||||
OPAL_THREAD_ADD32(&mca_btl_portals_module.portals_outstanding_ops, -1);
|
||||
opal_list_append(&(mca_btl_portals_module.portals_queued_sends),
|
||||
(opal_list_item_t*) frag);
|
||||
}
|
||||
|
||||
if (frag->md_h == PTL_INVALID_HANDLE) {
|
||||
/* setup the send - always describe entire fragment */
|
||||
mca_btl_portals_module.md_send.start = frag->segments[0].seg_addr.pval;
|
||||
mca_btl_portals_module.md_send.length =
|
||||
0 == frag->size ? frag->segments[0].seg_len : frag->size;
|
||||
mca_btl_portals_module.md_send.options =
|
||||
PTL_MD_EVENT_START_DISABLE;
|
||||
mca_btl_portals_module.md_send.user_ptr = frag; /* keep a pointer to ourselves */
|
||||
|
||||
/* make a free-floater */
|
||||
ret = PtlMDBind(mca_btl_portals_module.portals_ni_h,
|
||||
mca_btl_portals_module.md_send,
|
||||
PTL_UNLINK,
|
||||
&md_h);
|
||||
&frag->md_h);
|
||||
if (ret != PTL_OK) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"PtlMDBind failed with error %d", ret);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
ret = PtlPut(md_h,
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output,
|
||||
"fragment info:\n"
|
||||
"\tstart: 0x%x\n"
|
||||
"\tlen: %d",
|
||||
frag->segments[0].seg_addr.pval,
|
||||
frag->segments[0].seg_len));
|
||||
|
||||
ret = PtlPutRegion(frag->md_h, /* memory descriptor */
|
||||
0, /* fragment offset */
|
||||
frag->segments[0].seg_len, /* fragment length */
|
||||
PTL_ACK_REQ,
|
||||
*((mca_btl_base_endpoint_t*) endpoint),
|
||||
OMPI_BTL_PORTALS_SEND_TABLE_ID,
|
||||
0, /* ac_index - not used */
|
||||
0, /* match bits */
|
||||
0, /* remote offset - not used */
|
||||
frag->hdr.tag); /* hdr_data - tag */
|
||||
frag->hdr.tag); /* hdr_data: tag */
|
||||
if (ret != PTL_OK) {
|
||||
opal_output(mca_btl_portals_component.portals_output,
|
||||
"send: PtlPut failed with error %d", ret);
|
||||
PtlMDUnlink(md_h);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -24,8 +24,8 @@
|
||||
|
||||
#define MCA_BTL_PORTALS_PROGRESS_QUEUED_SENDS() \
|
||||
if ((0 != opal_list_get_size(&(mca_btl_portals_module.portals_queued_sends))) && \
|
||||
(mca_btl_portals_module.portals_outstanding_sends < \
|
||||
mca_btl_portals_module.portals_max_outstanding_sends)) { \
|
||||
(mca_btl_portals_module.portals_outstanding_ops < \
|
||||
mca_btl_portals_module.portals_max_outstanding_ops)) { \
|
||||
mca_btl_portals_frag_t *qfrag = (mca_btl_portals_frag_t*) \
|
||||
opal_list_remove_first(&(mca_btl_portals_module.portals_queued_sends)); \
|
||||
OPAL_OUTPUT_VERBOSE((90, mca_btl_portals_component.portals_output, \
|
||||
|
@ -17,80 +17,6 @@
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
|
||||
# _MCA_btl_portals_config_val(config_name, define_name,
|
||||
# default_val, descrtiption)
|
||||
# -----------------------------------------------------
|
||||
AC_DEFUN([MCA_btl_portals_CONFIG_VAL], [
|
||||
AC_ARG_WITH([portals-$1], AC_HELP_STRING([--with-portals-$1],
|
||||
[$4 (default: $3)]))
|
||||
case "[$with_]m4_bpatsubst([portals-$1], -, _)" in
|
||||
"")
|
||||
$2=$3
|
||||
;;
|
||||
"no")
|
||||
AC_MSG_ERROR([--without-portals-$1 is invalid argument])
|
||||
;;
|
||||
*)
|
||||
$2="[$with_]m4_bpatsubst([portals-$1], -, _)"
|
||||
;;
|
||||
esac
|
||||
AC_DEFINE_UNQUOTED([$2], [[$]$2], [$4])
|
||||
])
|
||||
|
||||
|
||||
# _MCA_btl_portals_CONFIG_VALS()
|
||||
# ------------------------------
|
||||
AC_DEFUN([MCA_btl_portals_CONFIG_VALS], [
|
||||
# User configuration options
|
||||
MCA_btl_portals_CONFIG_VAL([debug-level],
|
||||
[OMPI_BTL_PORTALS_DEFAULT_DEBUG_LEVEL], [0],
|
||||
[debugging level for portals btl])
|
||||
|
||||
MCA_btl_portals_CONFIG_VAL([eager-limit],
|
||||
[OMPI_BTL_PORTALS_DEFAULT_EAGER_LIMIT], [32768],
|
||||
[max size for eager sends])
|
||||
|
||||
MCA_btl_portals_CONFIG_VAL([min-send-size],
|
||||
[OMPI_BTL_PORTALS_DEFAULT_MIN_SEND_SIZE], [32768],
|
||||
[min size for send fragments])
|
||||
MCA_btl_portals_CONFIG_VAL([max-send-size],
|
||||
[OMPI_BTL_PORTALS_DEFAULT_MAX_SEND_SIZE], [65536],
|
||||
[max size for send fragments])
|
||||
|
||||
MCA_btl_portals_CONFIG_VAL([md-size],
|
||||
[OMPI_BTL_PORTALS_DEFAULT_RECV_MD_SIZE], [1048576],
|
||||
[Size of receive memory descriptors])
|
||||
MCA_btl_portals_CONFIG_VAL([md-size],
|
||||
[OMPI_BTL_PORTALS_DEFAULT_RECV_MD_NUM], [3],
|
||||
[Number of receive memory descriptors])
|
||||
|
||||
MCA_btl_portals_CONFIG_VAL([min-rdma-size],
|
||||
[OMPI_BTL_PORTALS_DEFAULT_MIN_RDMA_SIZE], [65536],
|
||||
[min size for rdma fragments])
|
||||
MCA_btl_portals_CONFIG_VAL([max-rdma-size],
|
||||
[OMPI_BTL_PORTALS_DEFAULT_MAX_RDMA_SIZE], [2147483647],
|
||||
[max size for rdma fragments])
|
||||
|
||||
MCA_btl_portals_CONFIG_VAL([max-sends-pending],
|
||||
[OMPI_BTL_PORTALS_MAX_SENDS_PENDING], [64],
|
||||
[max number of sends pending at any time])
|
||||
MCA_btl_portals_CONFIG_VAL([recv-queue-size],
|
||||
[OMPI_BTL_PORTALS_DEFAULT_RECV_QUEUE_SIZE], [8192],
|
||||
[size of event queue for receiving frags])
|
||||
|
||||
MCA_btl_portals_CONFIG_VAL([free-list-init-num],
|
||||
[OMPI_BTL_PORTALS_DEFAULT_FREE_LIST_INIT_NUM], [8],
|
||||
[starting size of free lists])
|
||||
MCA_btl_portals_CONFIG_VAL([free-list-max-num],
|
||||
[OMPI_BTL_PORTALS_DEFAULT_FREE_LIST_MAX_NUM], [1024],
|
||||
[maximum size of free lists])
|
||||
MCA_btl_portals_CONFIG_VAL([free-list-inc-num],
|
||||
[OMPI_BTL_PORTALS_DEFAULT_FREE_LIST_INC_NUM], [32],
|
||||
[grow size for freelists])
|
||||
])
|
||||
|
||||
|
||||
# _MCA_btl_portals_CONFIG_PLATFORM()
|
||||
# ----------------------------------
|
||||
AC_DEFUN([MCA_btl_portals_CONFIG_PLATFORM], [
|
||||
@ -197,7 +123,6 @@ AC_DEFUN([MCA_btl_portals_CONFIG],[
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <${btl_portals_header_prefix}portals3.h>],
|
||||
[int i; PtlInit(&i);])],
|
||||
[AC_MSG_RESULT([yes])
|
||||
MCA_btl_portals_CONFIG_VALS()
|
||||
btl_portals_WRAPPER_EXTRA_LDFLAGS="$btl_portals_LDFLAGS"
|
||||
btl_portals_WRAPPER_EXTRA_LIBS="$btl_portals_LIBS"
|
||||
$1],
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user