From 48d61cd99ab19dc767541b30342785e477d5a1d0 Mon Sep 17 00:00:00 2001 From: Andrew Friedley Date: Thu, 23 Mar 2006 16:21:08 +0000 Subject: [PATCH] Mostly fragment/LMR handling fixes: - Grab the mpool_registration in _frag_common_constructor() - Save the LMR context in the segment key - No need for cookie variables - can just cast the frag - No need to memcpy() data when recv'ing - Add an LMR triplet to the fragment structure and initialize it in btl_udapl_alloc(). - Whitespace/typo fixes, remove some opal_output() calls Looks like I can use triplets describing sub-regions of registered LMR's. So I do this - prior to this patch I was sending the entire free list memory over, which isn't correct :) Back to an earlier problem - when sending address information right after connection establishment, the receiving end receives a DTO completion event and appears to have good data. But the sending end never receives a DTO completion event indicating the send completed, and never completes the client side of the connection. This commit was SVN r9386. --- ompi/mca/btl/udapl/btl_udapl.c | 13 ++++++------- ompi/mca/btl/udapl/btl_udapl_component.c | 12 +++++------- ompi/mca/btl/udapl/btl_udapl_endpoint.c | 14 +++++--------- ompi/mca/btl/udapl/btl_udapl_frag.c | 17 +++++++++++++---- ompi/mca/btl/udapl/btl_udapl_frag.h | 4 +--- ompi/mca/btl/udapl/btl_udapl_proc.c | 5 ----- ompi/mca/mpool/udapl/mpool_udapl.h | 4 ++-- 7 files changed, 32 insertions(+), 37 deletions(-) diff --git a/ompi/mca/btl/udapl/btl_udapl.c b/ompi/mca/btl/udapl/btl_udapl.c index 80888d1038..25e07633d4 100644 --- a/ompi/mca/btl/udapl/btl_udapl.c +++ b/ompi/mca/btl/udapl/btl_udapl.c @@ -344,13 +344,12 @@ mca_btl_base_descriptor_t* mca_btl_udapl_alloc( size : btl->btl_max_send_size; } - /* TODO - this the right place for this? */ - if(OMPI_SUCCESS != mca_mpool_udapl_register(btl->btl_mpool, - frag->segment.seg_addr.pval, size, 0, &frag->registration)) { - /* TODO - handle this fully */ - return NULL; - } - + /* Set up the LMR triplet from the frag segment */ + /* Note that this triplet defines a sub-region of a registered LMR */ + frag->triplet.lmr_context = frag->segment.seg_key.key32[0]; + frag->triplet.virtual_address = (DAT_VADDR)frag->segment.seg_addr.pval; + frag->triplet.segment_length = frag->segment.seg_len; + frag->btl = udapl_btl; frag->base.des_src = &frag->segment; frag->base.des_src_cnt = 1; diff --git a/ompi/mca/btl/udapl/btl_udapl_component.c b/ompi/mca/btl/udapl/btl_udapl_component.c index a312bf0fb3..f7bae943b7 100644 --- a/ompi/mca/btl/udapl/btl_udapl_component.c +++ b/ompi/mca/btl/udapl/btl_udapl_component.c @@ -366,7 +366,8 @@ static int mca_btl_udapl_finish_connect(mca_btl_udapl_module_t* btl, mca_btl_udapl_addr_t* addr; size_t i; - addr = (mca_btl_udapl_addr_t*)frag->hdr; + /*addr = (mca_btl_udapl_addr_t*)frag->hdr;*/ + addr = (mca_btl_udapl_addr_t*)frag->segment.seg_addr.pval; OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock); for(proc = (mca_btl_udapl_proc_t*) @@ -401,7 +402,6 @@ static int mca_btl_udapl_accept_connect(mca_btl_udapl_module_t* btl, DAT_CR_HANDLE cr_handle) { mca_btl_udapl_frag_t* frag; - DAT_DTO_COOKIE cookie; DAT_EP_HANDLE endpoint; int rc; @@ -423,14 +423,11 @@ static int mca_btl_udapl_accept_connect(mca_btl_udapl_module_t* btl, frag = (mca_btl_udapl_frag_t*)mca_btl_udapl_alloc( (mca_btl_base_module_t*)btl, sizeof(mca_btl_udapl_addr_t)); - memcpy(frag->hdr, &btl->udapl_addr, sizeof(mca_btl_udapl_addr_t)); frag->endpoint = NULL; frag->type = MCA_BTL_UDAPL_CONN_RECV; - cookie.as_ptr = frag; - rc = dat_ep_post_recv(endpoint, 1, - &((mca_mpool_udapl_registration_t*)frag->registration)->lmr_triplet, - cookie, DAT_COMPLETION_DEFAULT_FLAG); + rc = dat_ep_post_recv(endpoint, 1, &frag->triplet, + (DAT_DTO_COOKIE)(void*)frag, DAT_COMPLETION_DEFAULT_FLAG); if(DAT_SUCCESS != rc) { mca_btl_udapl_error(rc, "dat_ep_post_send"); return OMPI_ERROR; @@ -481,6 +478,7 @@ int mca_btl_udapl_component_progress() how about just worrying about eager frags for now? */ dto = &event.event_data.dto_completion_event_data; + OPAL_OUTPUT((0, "DTO transferred %d bytes\n", dto->transfered_length)); /* Was the DTO successful? */ if(DAT_DTO_SUCCESS != dto->status) { diff --git a/ompi/mca/btl/udapl/btl_udapl_endpoint.c b/ompi/mca/btl/udapl/btl_udapl_endpoint.c index 62e1de30c6..ff82933cc2 100644 --- a/ompi/mca/btl/udapl/btl_udapl_endpoint.c +++ b/ompi/mca/btl/udapl/btl_udapl_endpoint.c @@ -73,7 +73,6 @@ static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint) { mca_btl_udapl_module_t* btl = endpoint->endpoint_btl; mca_btl_udapl_frag_t* frag; - DAT_DTO_COOKIE cookie; int rc; /* Create a new uDAPL endpoint and start the connection process */ @@ -94,25 +93,22 @@ static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint) } /* Send our local address data over this EP */ - /* Can't use btl_udapl_send here, the send will just get queued */ + /* Can't use btl_udapl_send here, will start an infinite loop! */ frag = (mca_btl_udapl_frag_t*)mca_btl_udapl_alloc( (mca_btl_base_module_t*)btl, sizeof(mca_btl_udapl_addr_t)); - memcpy(frag->hdr, &btl->udapl_addr, sizeof(mca_btl_udapl_addr_t)); + memcpy(frag->segment.seg_addr.pval, + &btl->udapl_addr, sizeof(mca_btl_udapl_addr_t)); frag->endpoint = endpoint; frag->type = MCA_BTL_UDAPL_CONN_SEND; - cookie.as_ptr = frag; /* Do the actual send now.. */ - OPAL_OUTPUT((0, "posting send!\n")); - rc = dat_ep_post_send(endpoint->endpoint_ep, 1, - &((mca_mpool_udapl_registration_t*)frag->registration)->lmr_triplet, - cookie, DAT_COMPLETION_DEFAULT_FLAG); + rc = dat_ep_post_send(endpoint->endpoint_ep, 1, &frag->triplet, + (DAT_DTO_COOKIE)(void*)frag, DAT_COMPLETION_DEFAULT_FLAG); if(DAT_SUCCESS != rc) { mca_btl_udapl_error(rc, "dat_ep_post_send"); goto failure; } - OPAL_OUTPUT((0, "after post send\n")); endpoint->endpoint_state = MCA_BTL_UDAPL_CONNECTING; return OMPI_SUCCESS; diff --git a/ompi/mca/btl/udapl/btl_udapl_frag.c b/ompi/mca/btl/udapl/btl_udapl_frag.c index 01e4de624b..81ee58ed77 100644 --- a/ompi/mca/btl/udapl/btl_udapl_frag.c +++ b/ompi/mca/btl/udapl/btl_udapl_frag.c @@ -18,15 +18,26 @@ #include "btl_udapl.h" #include "btl_udapl_frag.h" - +#include "ompi/mca/mpool/udapl/mpool_udapl.h" static void mca_btl_udapl_frag_common_constructor(mca_btl_udapl_frag_t* frag) -{ +{ + mca_mpool_udapl_registration_t* reg = frag->base.super.user_data; + frag->base.des_src = NULL; frag->base.des_src_cnt = 0; frag->base.des_dst = NULL; frag->base.des_dst_cnt = 0; + frag->registration = (mca_mpool_base_registration_t*)reg; + + /* Don't understand why yet, but there are cases where reg is NULL - + that is, this memory has not been registered. So be careful not + to dereference a NULL pointer. */ + if(NULL != reg) { + /* Save the LMR context so we can set up LMR subset triplets later */ + frag->segment.seg_key.key32[0] = reg->lmr_triplet.lmr_context; + } } static void mca_btl_udapl_frag_eager_constructor(mca_btl_udapl_frag_t* frag) @@ -34,7 +45,6 @@ static void mca_btl_udapl_frag_eager_constructor(mca_btl_udapl_frag_t* frag) frag->hdr = (mca_btl_base_header_t*)(frag + 1); frag->segment.seg_addr.pval = (unsigned char*)(frag->hdr + 1); frag->segment.seg_len = mca_btl_udapl_module.super.btl_eager_limit - sizeof(mca_btl_base_header_t); - frag->registration = NULL; frag->size = mca_btl_udapl_component.udapl_eager_frag_size; mca_btl_udapl_frag_common_constructor(frag); } @@ -44,7 +54,6 @@ static void mca_btl_udapl_frag_max_constructor(mca_btl_udapl_frag_t* frag) frag->hdr = (mca_btl_base_header_t*)(frag + 1); frag->segment.seg_addr.pval = (unsigned char*)(frag->hdr + 1); frag->segment.seg_len = mca_btl_udapl_module.super.btl_max_send_size - sizeof(mca_btl_base_header_t); - frag->registration = NULL; frag->size = mca_btl_udapl_component.udapl_max_frag_size; mca_btl_udapl_frag_common_constructor(frag); } diff --git a/ompi/mca/btl/udapl/btl_udapl_frag.h b/ompi/mca/btl/udapl/btl_udapl_frag.h index 41daedb6d6..f523263c8a 100644 --- a/ompi/mca/btl/udapl/btl_udapl_frag.h +++ b/ompi/mca/btl/udapl/btl_udapl_frag.h @@ -48,6 +48,7 @@ struct mca_btl_udapl_frag_t { struct mca_btl_udapl_module_t* btl; struct mca_btl_base_endpoint_t *endpoint; struct mca_mpool_base_registration_t* registration; + DAT_LMR_TRIPLET triplet; mca_btl_base_header_t *hdr; size_t size; @@ -58,15 +59,12 @@ OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_t); typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_eager_t; - OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_eager_t); typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_max_t; - OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_max_t); typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_user_t; - OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_user_t); diff --git a/ompi/mca/btl/udapl/btl_udapl_proc.c b/ompi/mca/btl/udapl/btl_udapl_proc.c index 56330829fd..236f99bc74 100644 --- a/ompi/mca/btl/udapl/btl_udapl_proc.c +++ b/ompi/mca/btl/udapl/btl_udapl_proc.c @@ -132,11 +132,6 @@ mca_btl_udapl_proc_t* mca_btl_udapl_proc_create(ompi_proc_t* ompi_proc) return NULL; } - if(mca_btl_udapl_component.udapl_debug) { - opal_output(0, "udapl_proc_create got %d addrs\n", - size / sizeof(mca_btl_udapl_addr_t)); - } - if((size % sizeof(mca_btl_udapl_addr_t)) != 0) { opal_output(0, "[%s:%d] invalid udapl address for peer [%d,%d,%d]", __FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name)); diff --git a/ompi/mca/mpool/udapl/mpool_udapl.h b/ompi/mca/mpool/udapl/mpool_udapl.h index 70599d5135..8e4cae5021 100644 --- a/ompi/mca/mpool/udapl/mpool_udapl.h +++ b/ompi/mca/mpool/udapl/mpool_udapl.h @@ -125,12 +125,12 @@ int mca_mpool_udapl_find( int mca_mpool_udapl_release( struct mca_mpool_base_module_t* mpool, - mca_mpool_base_registration_t* registraion + mca_mpool_base_registration_t* registration ); int mca_mpool_udapl_retain( struct mca_mpool_base_module_t* mpool, - mca_mpool_base_registration_t* registraion + mca_mpool_base_registration_t* registration );