Mostly fragment/LMR handling fixes:
- Grab the mpool_registration in _frag_common_constructor() - Save the LMR context in the segment key - No need for cookie variables - can just cast the frag - No need to memcpy() data when recv'ing - Add an LMR triplet to the fragment structure and initialize it in btl_udapl_alloc(). - Whitespace/typo fixes, remove some opal_output() calls Looks like I can use triplets describing sub-regions of registered LMR's. So I do this - prior to this patch I was sending the entire free list memory over, which isn't correct :) Back to an earlier problem - when sending address information right after connection establishment, the receiving end receives a DTO completion event and appears to have good data. But the sending end never receives a DTO completion event indicating the send completed, and never completes the client side of the connection. This commit was SVN r9386.
Этот коммит содержится в:
родитель
adf621fcce
Коммит
48d61cd99a
@ -344,13 +344,12 @@ mca_btl_base_descriptor_t* mca_btl_udapl_alloc(
|
|||||||
size : btl->btl_max_send_size;
|
size : btl->btl_max_send_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO - this the right place for this? */
|
/* Set up the LMR triplet from the frag segment */
|
||||||
if(OMPI_SUCCESS != mca_mpool_udapl_register(btl->btl_mpool,
|
/* Note that this triplet defines a sub-region of a registered LMR */
|
||||||
frag->segment.seg_addr.pval, size, 0, &frag->registration)) {
|
frag->triplet.lmr_context = frag->segment.seg_key.key32[0];
|
||||||
/* TODO - handle this fully */
|
frag->triplet.virtual_address = (DAT_VADDR)frag->segment.seg_addr.pval;
|
||||||
return NULL;
|
frag->triplet.segment_length = frag->segment.seg_len;
|
||||||
}
|
|
||||||
|
|
||||||
frag->btl = udapl_btl;
|
frag->btl = udapl_btl;
|
||||||
frag->base.des_src = &frag->segment;
|
frag->base.des_src = &frag->segment;
|
||||||
frag->base.des_src_cnt = 1;
|
frag->base.des_src_cnt = 1;
|
||||||
|
@ -366,7 +366,8 @@ static int mca_btl_udapl_finish_connect(mca_btl_udapl_module_t* btl,
|
|||||||
mca_btl_udapl_addr_t* addr;
|
mca_btl_udapl_addr_t* addr;
|
||||||
size_t i;
|
size_t i;
|
||||||
|
|
||||||
addr = (mca_btl_udapl_addr_t*)frag->hdr;
|
/*addr = (mca_btl_udapl_addr_t*)frag->hdr;*/
|
||||||
|
addr = (mca_btl_udapl_addr_t*)frag->segment.seg_addr.pval;
|
||||||
|
|
||||||
OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock);
|
OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock);
|
||||||
for(proc = (mca_btl_udapl_proc_t*)
|
for(proc = (mca_btl_udapl_proc_t*)
|
||||||
@ -401,7 +402,6 @@ static int mca_btl_udapl_accept_connect(mca_btl_udapl_module_t* btl,
|
|||||||
DAT_CR_HANDLE cr_handle)
|
DAT_CR_HANDLE cr_handle)
|
||||||
{
|
{
|
||||||
mca_btl_udapl_frag_t* frag;
|
mca_btl_udapl_frag_t* frag;
|
||||||
DAT_DTO_COOKIE cookie;
|
|
||||||
DAT_EP_HANDLE endpoint;
|
DAT_EP_HANDLE endpoint;
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
@ -423,14 +423,11 @@ static int mca_btl_udapl_accept_connect(mca_btl_udapl_module_t* btl,
|
|||||||
frag = (mca_btl_udapl_frag_t*)mca_btl_udapl_alloc(
|
frag = (mca_btl_udapl_frag_t*)mca_btl_udapl_alloc(
|
||||||
(mca_btl_base_module_t*)btl, sizeof(mca_btl_udapl_addr_t));
|
(mca_btl_base_module_t*)btl, sizeof(mca_btl_udapl_addr_t));
|
||||||
|
|
||||||
memcpy(frag->hdr, &btl->udapl_addr, sizeof(mca_btl_udapl_addr_t));
|
|
||||||
frag->endpoint = NULL;
|
frag->endpoint = NULL;
|
||||||
frag->type = MCA_BTL_UDAPL_CONN_RECV;
|
frag->type = MCA_BTL_UDAPL_CONN_RECV;
|
||||||
cookie.as_ptr = frag;
|
|
||||||
|
|
||||||
rc = dat_ep_post_recv(endpoint, 1,
|
rc = dat_ep_post_recv(endpoint, 1, &frag->triplet,
|
||||||
&((mca_mpool_udapl_registration_t*)frag->registration)->lmr_triplet,
|
(DAT_DTO_COOKIE)(void*)frag, DAT_COMPLETION_DEFAULT_FLAG);
|
||||||
cookie, DAT_COMPLETION_DEFAULT_FLAG);
|
|
||||||
if(DAT_SUCCESS != rc) {
|
if(DAT_SUCCESS != rc) {
|
||||||
mca_btl_udapl_error(rc, "dat_ep_post_send");
|
mca_btl_udapl_error(rc, "dat_ep_post_send");
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
@ -481,6 +478,7 @@ int mca_btl_udapl_component_progress()
|
|||||||
how about just worrying about eager frags for now?
|
how about just worrying about eager frags for now?
|
||||||
*/
|
*/
|
||||||
dto = &event.event_data.dto_completion_event_data;
|
dto = &event.event_data.dto_completion_event_data;
|
||||||
|
OPAL_OUTPUT((0, "DTO transferred %d bytes\n", dto->transfered_length));
|
||||||
|
|
||||||
/* Was the DTO successful? */
|
/* Was the DTO successful? */
|
||||||
if(DAT_DTO_SUCCESS != dto->status) {
|
if(DAT_DTO_SUCCESS != dto->status) {
|
||||||
|
@ -73,7 +73,6 @@ static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint)
|
|||||||
{
|
{
|
||||||
mca_btl_udapl_module_t* btl = endpoint->endpoint_btl;
|
mca_btl_udapl_module_t* btl = endpoint->endpoint_btl;
|
||||||
mca_btl_udapl_frag_t* frag;
|
mca_btl_udapl_frag_t* frag;
|
||||||
DAT_DTO_COOKIE cookie;
|
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
/* Create a new uDAPL endpoint and start the connection process */
|
/* Create a new uDAPL endpoint and start the connection process */
|
||||||
@ -94,25 +93,22 @@ static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Send our local address data over this EP */
|
/* Send our local address data over this EP */
|
||||||
/* Can't use btl_udapl_send here, the send will just get queued */
|
/* Can't use btl_udapl_send here, will start an infinite loop! */
|
||||||
frag = (mca_btl_udapl_frag_t*)mca_btl_udapl_alloc(
|
frag = (mca_btl_udapl_frag_t*)mca_btl_udapl_alloc(
|
||||||
(mca_btl_base_module_t*)btl, sizeof(mca_btl_udapl_addr_t));
|
(mca_btl_base_module_t*)btl, sizeof(mca_btl_udapl_addr_t));
|
||||||
|
|
||||||
memcpy(frag->hdr, &btl->udapl_addr, sizeof(mca_btl_udapl_addr_t));
|
memcpy(frag->segment.seg_addr.pval,
|
||||||
|
&btl->udapl_addr, sizeof(mca_btl_udapl_addr_t));
|
||||||
frag->endpoint = endpoint;
|
frag->endpoint = endpoint;
|
||||||
frag->type = MCA_BTL_UDAPL_CONN_SEND;
|
frag->type = MCA_BTL_UDAPL_CONN_SEND;
|
||||||
cookie.as_ptr = frag;
|
|
||||||
|
|
||||||
/* Do the actual send now.. */
|
/* Do the actual send now.. */
|
||||||
OPAL_OUTPUT((0, "posting send!\n"));
|
rc = dat_ep_post_send(endpoint->endpoint_ep, 1, &frag->triplet,
|
||||||
rc = dat_ep_post_send(endpoint->endpoint_ep, 1,
|
(DAT_DTO_COOKIE)(void*)frag, DAT_COMPLETION_DEFAULT_FLAG);
|
||||||
&((mca_mpool_udapl_registration_t*)frag->registration)->lmr_triplet,
|
|
||||||
cookie, DAT_COMPLETION_DEFAULT_FLAG);
|
|
||||||
if(DAT_SUCCESS != rc) {
|
if(DAT_SUCCESS != rc) {
|
||||||
mca_btl_udapl_error(rc, "dat_ep_post_send");
|
mca_btl_udapl_error(rc, "dat_ep_post_send");
|
||||||
goto failure;
|
goto failure;
|
||||||
}
|
}
|
||||||
OPAL_OUTPUT((0, "after post send\n"));
|
|
||||||
|
|
||||||
endpoint->endpoint_state = MCA_BTL_UDAPL_CONNECTING;
|
endpoint->endpoint_state = MCA_BTL_UDAPL_CONNECTING;
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
@ -18,15 +18,26 @@
|
|||||||
|
|
||||||
#include "btl_udapl.h"
|
#include "btl_udapl.h"
|
||||||
#include "btl_udapl_frag.h"
|
#include "btl_udapl_frag.h"
|
||||||
|
#include "ompi/mca/mpool/udapl/mpool_udapl.h"
|
||||||
|
|
||||||
|
|
||||||
static void mca_btl_udapl_frag_common_constructor(mca_btl_udapl_frag_t* frag)
|
static void mca_btl_udapl_frag_common_constructor(mca_btl_udapl_frag_t* frag)
|
||||||
{
|
{
|
||||||
|
mca_mpool_udapl_registration_t* reg = frag->base.super.user_data;
|
||||||
|
|
||||||
frag->base.des_src = NULL;
|
frag->base.des_src = NULL;
|
||||||
frag->base.des_src_cnt = 0;
|
frag->base.des_src_cnt = 0;
|
||||||
frag->base.des_dst = NULL;
|
frag->base.des_dst = NULL;
|
||||||
frag->base.des_dst_cnt = 0;
|
frag->base.des_dst_cnt = 0;
|
||||||
|
frag->registration = (mca_mpool_base_registration_t*)reg;
|
||||||
|
|
||||||
|
/* Don't understand why yet, but there are cases where reg is NULL -
|
||||||
|
that is, this memory has not been registered. So be careful not
|
||||||
|
to dereference a NULL pointer. */
|
||||||
|
if(NULL != reg) {
|
||||||
|
/* Save the LMR context so we can set up LMR subset triplets later */
|
||||||
|
frag->segment.seg_key.key32[0] = reg->lmr_triplet.lmr_context;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mca_btl_udapl_frag_eager_constructor(mca_btl_udapl_frag_t* frag)
|
static void mca_btl_udapl_frag_eager_constructor(mca_btl_udapl_frag_t* frag)
|
||||||
@ -34,7 +45,6 @@ static void mca_btl_udapl_frag_eager_constructor(mca_btl_udapl_frag_t* frag)
|
|||||||
frag->hdr = (mca_btl_base_header_t*)(frag + 1);
|
frag->hdr = (mca_btl_base_header_t*)(frag + 1);
|
||||||
frag->segment.seg_addr.pval = (unsigned char*)(frag->hdr + 1);
|
frag->segment.seg_addr.pval = (unsigned char*)(frag->hdr + 1);
|
||||||
frag->segment.seg_len = mca_btl_udapl_module.super.btl_eager_limit - sizeof(mca_btl_base_header_t);
|
frag->segment.seg_len = mca_btl_udapl_module.super.btl_eager_limit - sizeof(mca_btl_base_header_t);
|
||||||
frag->registration = NULL;
|
|
||||||
frag->size = mca_btl_udapl_component.udapl_eager_frag_size;
|
frag->size = mca_btl_udapl_component.udapl_eager_frag_size;
|
||||||
mca_btl_udapl_frag_common_constructor(frag);
|
mca_btl_udapl_frag_common_constructor(frag);
|
||||||
}
|
}
|
||||||
@ -44,7 +54,6 @@ static void mca_btl_udapl_frag_max_constructor(mca_btl_udapl_frag_t* frag)
|
|||||||
frag->hdr = (mca_btl_base_header_t*)(frag + 1);
|
frag->hdr = (mca_btl_base_header_t*)(frag + 1);
|
||||||
frag->segment.seg_addr.pval = (unsigned char*)(frag->hdr + 1);
|
frag->segment.seg_addr.pval = (unsigned char*)(frag->hdr + 1);
|
||||||
frag->segment.seg_len = mca_btl_udapl_module.super.btl_max_send_size - sizeof(mca_btl_base_header_t);
|
frag->segment.seg_len = mca_btl_udapl_module.super.btl_max_send_size - sizeof(mca_btl_base_header_t);
|
||||||
frag->registration = NULL;
|
|
||||||
frag->size = mca_btl_udapl_component.udapl_max_frag_size;
|
frag->size = mca_btl_udapl_component.udapl_max_frag_size;
|
||||||
mca_btl_udapl_frag_common_constructor(frag);
|
mca_btl_udapl_frag_common_constructor(frag);
|
||||||
}
|
}
|
||||||
|
@ -48,6 +48,7 @@ struct mca_btl_udapl_frag_t {
|
|||||||
struct mca_btl_udapl_module_t* btl;
|
struct mca_btl_udapl_module_t* btl;
|
||||||
struct mca_btl_base_endpoint_t *endpoint;
|
struct mca_btl_base_endpoint_t *endpoint;
|
||||||
struct mca_mpool_base_registration_t* registration;
|
struct mca_mpool_base_registration_t* registration;
|
||||||
|
DAT_LMR_TRIPLET triplet;
|
||||||
|
|
||||||
mca_btl_base_header_t *hdr;
|
mca_btl_base_header_t *hdr;
|
||||||
size_t size;
|
size_t size;
|
||||||
@ -58,15 +59,12 @@ OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_t);
|
|||||||
|
|
||||||
|
|
||||||
typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_eager_t;
|
typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_eager_t;
|
||||||
|
|
||||||
OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_eager_t);
|
OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_eager_t);
|
||||||
|
|
||||||
typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_max_t;
|
typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_max_t;
|
||||||
|
|
||||||
OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_max_t);
|
OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_max_t);
|
||||||
|
|
||||||
typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_user_t;
|
typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_user_t;
|
||||||
|
|
||||||
OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_user_t);
|
OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_user_t);
|
||||||
|
|
||||||
|
|
||||||
|
@ -132,11 +132,6 @@ mca_btl_udapl_proc_t* mca_btl_udapl_proc_create(ompi_proc_t* ompi_proc)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(mca_btl_udapl_component.udapl_debug) {
|
|
||||||
opal_output(0, "udapl_proc_create got %d addrs\n",
|
|
||||||
size / sizeof(mca_btl_udapl_addr_t));
|
|
||||||
}
|
|
||||||
|
|
||||||
if((size % sizeof(mca_btl_udapl_addr_t)) != 0) {
|
if((size % sizeof(mca_btl_udapl_addr_t)) != 0) {
|
||||||
opal_output(0, "[%s:%d] invalid udapl address for peer [%d,%d,%d]",
|
opal_output(0, "[%s:%d] invalid udapl address for peer [%d,%d,%d]",
|
||||||
__FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name));
|
__FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name));
|
||||||
|
@ -125,12 +125,12 @@ int mca_mpool_udapl_find(
|
|||||||
|
|
||||||
int mca_mpool_udapl_release(
|
int mca_mpool_udapl_release(
|
||||||
struct mca_mpool_base_module_t* mpool,
|
struct mca_mpool_base_module_t* mpool,
|
||||||
mca_mpool_base_registration_t* registraion
|
mca_mpool_base_registration_t* registration
|
||||||
);
|
);
|
||||||
|
|
||||||
int mca_mpool_udapl_retain(
|
int mca_mpool_udapl_retain(
|
||||||
struct mca_mpool_base_module_t* mpool,
|
struct mca_mpool_base_module_t* mpool,
|
||||||
mca_mpool_base_registration_t* registraion
|
mca_mpool_base_registration_t* registration
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user