1
1

Mostly fragment/LMR handling fixes:

- Grab the mpool_registration in _frag_common_constructor()
 - Save the LMR context in the segment key
 - No need for cookie variables - can just cast the frag
 - No need to memcpy() data when recv'ing
 - Add an LMR triplet to the fragment structure and initialize it
   in btl_udapl_alloc().
 - Whitespace/typo fixes, remove some opal_output() calls

Looks like I can use triplets describing sub-regions of registered LMR's.  So I
do this - prior to this patch I was sending the entire free list memory over,
which isn't correct :)

Back to an earlier problem - when sending address information right after
connection establishment, the receiving end receives a DTO completion event and
appears to have good data.  But the sending end never receives a DTO completion
event indicating the send completed, and never completes the client side of the
connection.

This commit was SVN r9386.
Этот коммит содержится в:
Andrew Friedley 2006-03-23 16:21:08 +00:00
родитель adf621fcce
Коммит 48d61cd99a
7 изменённых файлов: 32 добавлений и 37 удалений

Просмотреть файл

@ -344,13 +344,12 @@ mca_btl_base_descriptor_t* mca_btl_udapl_alloc(
size : btl->btl_max_send_size; size : btl->btl_max_send_size;
} }
/* TODO - this the right place for this? */ /* Set up the LMR triplet from the frag segment */
if(OMPI_SUCCESS != mca_mpool_udapl_register(btl->btl_mpool, /* Note that this triplet defines a sub-region of a registered LMR */
frag->segment.seg_addr.pval, size, 0, &frag->registration)) { frag->triplet.lmr_context = frag->segment.seg_key.key32[0];
/* TODO - handle this fully */ frag->triplet.virtual_address = (DAT_VADDR)frag->segment.seg_addr.pval;
return NULL; frag->triplet.segment_length = frag->segment.seg_len;
}
frag->btl = udapl_btl; frag->btl = udapl_btl;
frag->base.des_src = &frag->segment; frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1; frag->base.des_src_cnt = 1;

Просмотреть файл

@ -366,7 +366,8 @@ static int mca_btl_udapl_finish_connect(mca_btl_udapl_module_t* btl,
mca_btl_udapl_addr_t* addr; mca_btl_udapl_addr_t* addr;
size_t i; size_t i;
addr = (mca_btl_udapl_addr_t*)frag->hdr; /*addr = (mca_btl_udapl_addr_t*)frag->hdr;*/
addr = (mca_btl_udapl_addr_t*)frag->segment.seg_addr.pval;
OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock); OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock);
for(proc = (mca_btl_udapl_proc_t*) for(proc = (mca_btl_udapl_proc_t*)
@ -401,7 +402,6 @@ static int mca_btl_udapl_accept_connect(mca_btl_udapl_module_t* btl,
DAT_CR_HANDLE cr_handle) DAT_CR_HANDLE cr_handle)
{ {
mca_btl_udapl_frag_t* frag; mca_btl_udapl_frag_t* frag;
DAT_DTO_COOKIE cookie;
DAT_EP_HANDLE endpoint; DAT_EP_HANDLE endpoint;
int rc; int rc;
@ -423,14 +423,11 @@ static int mca_btl_udapl_accept_connect(mca_btl_udapl_module_t* btl,
frag = (mca_btl_udapl_frag_t*)mca_btl_udapl_alloc( frag = (mca_btl_udapl_frag_t*)mca_btl_udapl_alloc(
(mca_btl_base_module_t*)btl, sizeof(mca_btl_udapl_addr_t)); (mca_btl_base_module_t*)btl, sizeof(mca_btl_udapl_addr_t));
memcpy(frag->hdr, &btl->udapl_addr, sizeof(mca_btl_udapl_addr_t));
frag->endpoint = NULL; frag->endpoint = NULL;
frag->type = MCA_BTL_UDAPL_CONN_RECV; frag->type = MCA_BTL_UDAPL_CONN_RECV;
cookie.as_ptr = frag;
rc = dat_ep_post_recv(endpoint, 1, rc = dat_ep_post_recv(endpoint, 1, &frag->triplet,
&((mca_mpool_udapl_registration_t*)frag->registration)->lmr_triplet, (DAT_DTO_COOKIE)(void*)frag, DAT_COMPLETION_DEFAULT_FLAG);
cookie, DAT_COMPLETION_DEFAULT_FLAG);
if(DAT_SUCCESS != rc) { if(DAT_SUCCESS != rc) {
mca_btl_udapl_error(rc, "dat_ep_post_send"); mca_btl_udapl_error(rc, "dat_ep_post_send");
return OMPI_ERROR; return OMPI_ERROR;
@ -481,6 +478,7 @@ int mca_btl_udapl_component_progress()
how about just worrying about eager frags for now? how about just worrying about eager frags for now?
*/ */
dto = &event.event_data.dto_completion_event_data; dto = &event.event_data.dto_completion_event_data;
OPAL_OUTPUT((0, "DTO transferred %d bytes\n", dto->transfered_length));
/* Was the DTO successful? */ /* Was the DTO successful? */
if(DAT_DTO_SUCCESS != dto->status) { if(DAT_DTO_SUCCESS != dto->status) {

Просмотреть файл

@ -73,7 +73,6 @@ static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint)
{ {
mca_btl_udapl_module_t* btl = endpoint->endpoint_btl; mca_btl_udapl_module_t* btl = endpoint->endpoint_btl;
mca_btl_udapl_frag_t* frag; mca_btl_udapl_frag_t* frag;
DAT_DTO_COOKIE cookie;
int rc; int rc;
/* Create a new uDAPL endpoint and start the connection process */ /* Create a new uDAPL endpoint and start the connection process */
@ -94,25 +93,22 @@ static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint)
} }
/* Send our local address data over this EP */ /* Send our local address data over this EP */
/* Can't use btl_udapl_send here, the send will just get queued */ /* Can't use btl_udapl_send here, will start an infinite loop! */
frag = (mca_btl_udapl_frag_t*)mca_btl_udapl_alloc( frag = (mca_btl_udapl_frag_t*)mca_btl_udapl_alloc(
(mca_btl_base_module_t*)btl, sizeof(mca_btl_udapl_addr_t)); (mca_btl_base_module_t*)btl, sizeof(mca_btl_udapl_addr_t));
memcpy(frag->hdr, &btl->udapl_addr, sizeof(mca_btl_udapl_addr_t)); memcpy(frag->segment.seg_addr.pval,
&btl->udapl_addr, sizeof(mca_btl_udapl_addr_t));
frag->endpoint = endpoint; frag->endpoint = endpoint;
frag->type = MCA_BTL_UDAPL_CONN_SEND; frag->type = MCA_BTL_UDAPL_CONN_SEND;
cookie.as_ptr = frag;
/* Do the actual send now.. */ /* Do the actual send now.. */
OPAL_OUTPUT((0, "posting send!\n")); rc = dat_ep_post_send(endpoint->endpoint_ep, 1, &frag->triplet,
rc = dat_ep_post_send(endpoint->endpoint_ep, 1, (DAT_DTO_COOKIE)(void*)frag, DAT_COMPLETION_DEFAULT_FLAG);
&((mca_mpool_udapl_registration_t*)frag->registration)->lmr_triplet,
cookie, DAT_COMPLETION_DEFAULT_FLAG);
if(DAT_SUCCESS != rc) { if(DAT_SUCCESS != rc) {
mca_btl_udapl_error(rc, "dat_ep_post_send"); mca_btl_udapl_error(rc, "dat_ep_post_send");
goto failure; goto failure;
} }
OPAL_OUTPUT((0, "after post send\n"));
endpoint->endpoint_state = MCA_BTL_UDAPL_CONNECTING; endpoint->endpoint_state = MCA_BTL_UDAPL_CONNECTING;
return OMPI_SUCCESS; return OMPI_SUCCESS;

Просмотреть файл

@ -18,15 +18,26 @@
#include "btl_udapl.h" #include "btl_udapl.h"
#include "btl_udapl_frag.h" #include "btl_udapl_frag.h"
#include "ompi/mca/mpool/udapl/mpool_udapl.h"
static void mca_btl_udapl_frag_common_constructor(mca_btl_udapl_frag_t* frag) static void mca_btl_udapl_frag_common_constructor(mca_btl_udapl_frag_t* frag)
{ {
mca_mpool_udapl_registration_t* reg = frag->base.super.user_data;
frag->base.des_src = NULL; frag->base.des_src = NULL;
frag->base.des_src_cnt = 0; frag->base.des_src_cnt = 0;
frag->base.des_dst = NULL; frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0; frag->base.des_dst_cnt = 0;
frag->registration = (mca_mpool_base_registration_t*)reg;
/* Don't understand why yet, but there are cases where reg is NULL -
that is, this memory has not been registered. So be careful not
to dereference a NULL pointer. */
if(NULL != reg) {
/* Save the LMR context so we can set up LMR subset triplets later */
frag->segment.seg_key.key32[0] = reg->lmr_triplet.lmr_context;
}
} }
static void mca_btl_udapl_frag_eager_constructor(mca_btl_udapl_frag_t* frag) static void mca_btl_udapl_frag_eager_constructor(mca_btl_udapl_frag_t* frag)
@ -34,7 +45,6 @@ static void mca_btl_udapl_frag_eager_constructor(mca_btl_udapl_frag_t* frag)
frag->hdr = (mca_btl_base_header_t*)(frag + 1); frag->hdr = (mca_btl_base_header_t*)(frag + 1);
frag->segment.seg_addr.pval = (unsigned char*)(frag->hdr + 1); frag->segment.seg_addr.pval = (unsigned char*)(frag->hdr + 1);
frag->segment.seg_len = mca_btl_udapl_module.super.btl_eager_limit - sizeof(mca_btl_base_header_t); frag->segment.seg_len = mca_btl_udapl_module.super.btl_eager_limit - sizeof(mca_btl_base_header_t);
frag->registration = NULL;
frag->size = mca_btl_udapl_component.udapl_eager_frag_size; frag->size = mca_btl_udapl_component.udapl_eager_frag_size;
mca_btl_udapl_frag_common_constructor(frag); mca_btl_udapl_frag_common_constructor(frag);
} }
@ -44,7 +54,6 @@ static void mca_btl_udapl_frag_max_constructor(mca_btl_udapl_frag_t* frag)
frag->hdr = (mca_btl_base_header_t*)(frag + 1); frag->hdr = (mca_btl_base_header_t*)(frag + 1);
frag->segment.seg_addr.pval = (unsigned char*)(frag->hdr + 1); frag->segment.seg_addr.pval = (unsigned char*)(frag->hdr + 1);
frag->segment.seg_len = mca_btl_udapl_module.super.btl_max_send_size - sizeof(mca_btl_base_header_t); frag->segment.seg_len = mca_btl_udapl_module.super.btl_max_send_size - sizeof(mca_btl_base_header_t);
frag->registration = NULL;
frag->size = mca_btl_udapl_component.udapl_max_frag_size; frag->size = mca_btl_udapl_component.udapl_max_frag_size;
mca_btl_udapl_frag_common_constructor(frag); mca_btl_udapl_frag_common_constructor(frag);
} }

Просмотреть файл

@ -48,6 +48,7 @@ struct mca_btl_udapl_frag_t {
struct mca_btl_udapl_module_t* btl; struct mca_btl_udapl_module_t* btl;
struct mca_btl_base_endpoint_t *endpoint; struct mca_btl_base_endpoint_t *endpoint;
struct mca_mpool_base_registration_t* registration; struct mca_mpool_base_registration_t* registration;
DAT_LMR_TRIPLET triplet;
mca_btl_base_header_t *hdr; mca_btl_base_header_t *hdr;
size_t size; size_t size;
@ -58,15 +59,12 @@ OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_t);
typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_eager_t; typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_eager_t;
OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_eager_t); OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_eager_t);
typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_max_t; typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_max_t;
OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_max_t); OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_max_t);
typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_user_t; typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_user_t;
OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_user_t); OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_user_t);

Просмотреть файл

@ -132,11 +132,6 @@ mca_btl_udapl_proc_t* mca_btl_udapl_proc_create(ompi_proc_t* ompi_proc)
return NULL; return NULL;
} }
if(mca_btl_udapl_component.udapl_debug) {
opal_output(0, "udapl_proc_create got %d addrs\n",
size / sizeof(mca_btl_udapl_addr_t));
}
if((size % sizeof(mca_btl_udapl_addr_t)) != 0) { if((size % sizeof(mca_btl_udapl_addr_t)) != 0) {
opal_output(0, "[%s:%d] invalid udapl address for peer [%d,%d,%d]", opal_output(0, "[%s:%d] invalid udapl address for peer [%d,%d,%d]",
__FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name)); __FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name));

Просмотреть файл

@ -125,12 +125,12 @@ int mca_mpool_udapl_find(
int mca_mpool_udapl_release( int mca_mpool_udapl_release(
struct mca_mpool_base_module_t* mpool, struct mca_mpool_base_module_t* mpool,
mca_mpool_base_registration_t* registraion mca_mpool_base_registration_t* registration
); );
int mca_mpool_udapl_retain( int mca_mpool_udapl_retain(
struct mca_mpool_base_module_t* mpool, struct mca_mpool_base_module_t* mpool,
mca_mpool_base_registration_t* registraion mca_mpool_base_registration_t* registration
); );