From b7e0484c37a4283dbfc16f26b8420ee55d81c89c Mon Sep 17 00:00:00 2001 From: Andrew Friedley Date: Fri, 7 Jul 2006 21:48:16 +0000 Subject: [PATCH] Give up on dat_ep_query() and instead manually send our address information across the wire after connection establishment. I've introduced a race condition - seeing occasional LOCAL_LENGTH errors on the receive side. I think I'm mixing up eager/max somehow - will look at it more on monday. This commit was SVN r10690. --- ompi/mca/btl/udapl/btl_udapl_component.c | 62 +++++++++++++++++++++--- ompi/mca/btl/udapl/btl_udapl_endpoint.c | 17 ++----- ompi/mca/btl/udapl/btl_udapl_endpoint.h | 1 + ompi/mca/btl/udapl/btl_udapl_frag.h | 4 +- 4 files changed, 63 insertions(+), 21 deletions(-) diff --git a/ompi/mca/btl/udapl/btl_udapl_component.c b/ompi/mca/btl/udapl/btl_udapl_component.c index 9151517b81..d3f7e9c75c 100644 --- a/ompi/mca/btl/udapl/btl_udapl_component.c +++ b/ompi/mca/btl/udapl/btl_udapl_component.c @@ -385,6 +385,48 @@ static int mca_btl_udapl_accept_connect(mca_btl_udapl_module_t* btl, } +static inline int mca_btl_udapl_sendrecv(mca_btl_udapl_module_t* btl, + DAT_EP_HANDLE* endpoint) +{ + mca_btl_udapl_frag_t* frag; + DAT_DTO_COOKIE cookie; + int rc; + + /* Post a receive to get the peer's address data */ + frag = (mca_btl_udapl_frag_t*)mca_btl_udapl_alloc( + (mca_btl_base_module_t*)btl, sizeof(mca_btl_udapl_addr_t)); + cookie.as_ptr = frag; + + frag->type = MCA_BTL_UDAPL_CONN_RECV; + + rc = dat_ep_post_recv(endpoint, 1, + &frag->triplet, cookie, DAT_COMPLETION_DEFAULT_FLAG); + if(DAT_SUCCESS != rc) { + MCA_BTL_UDAPL_ERROR(rc, "dat_ep_post_recv"); + return OMPI_ERROR; + } + + + /* Send our local address data over this EP */ + frag = (mca_btl_udapl_frag_t*)mca_btl_udapl_alloc( + (mca_btl_base_module_t*)btl, sizeof(mca_btl_udapl_addr_t)); + cookie.as_ptr = frag; + + memcpy(frag->segment.seg_addr.pval, + &btl->udapl_addr, sizeof(mca_btl_udapl_addr_t)); + frag->type = MCA_BTL_UDAPL_CONN_SEND; + + rc = dat_ep_post_send(endpoint, 1, + &frag->triplet, cookie, DAT_COMPLETION_DEFAULT_FLAG); + if(DAT_SUCCESS != rc) { + MCA_BTL_UDAPL_ERROR(rc, "dat_ep_post_send"); + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} + + /* * uDAPL component progress. */ @@ -422,8 +464,8 @@ int mca_btl_udapl_component_progress() /* Was the DTO successful? */ if(DAT_DTO_SUCCESS != dto->status) { OPAL_OUTPUT((0, - "btl_udapl ***** DTO error %d *****\n", - dto->status)); + "btl_udapl ***** DTO error %d %d %d %p*****\n", + dto->status, frag->type, frag->size, dto->ep_handle)); break; } @@ -510,9 +552,6 @@ int mca_btl_udapl_component_progress() /*OPAL_OUTPUT((0, "btl_udapl UDAPL_RECV %d", dto->transfered_length));*/ - /* OPAL_OUTPUT((0, "recv from %s %d %p\n", - inet_ntoa(addr->sin_addr), ntohs(addr->sin_port), - frag->endpoint));*/ frag->segment.seg_addr.pval = frag->hdr + 1; frag->segment.seg_len = dto->transfered_length - sizeof(mca_btl_base_header_t); @@ -543,6 +582,17 @@ int mca_btl_udapl_component_progress() break; } + case MCA_BTL_UDAPL_CONN_RECV: + mca_btl_udapl_endpoint_finish_connect(btl, + frag->segment.seg_addr.pval, + event.event_data.connect_event_data.ep_handle); + /* No break - fall through to free */ + case MCA_BTL_UDAPL_CONN_SEND: + frag->segment.seg_len = + mca_btl_udapl_module.super.btl_eager_limit; + mca_btl_udapl_free((mca_btl_base_module_t*)btl, + (mca_btl_base_descriptor_t*)frag); + break; default: OPAL_OUTPUT((0, "WARNING unknown frag type: %d\n", frag->type)); @@ -570,7 +620,7 @@ int mca_btl_udapl_component_progress() /* Both the client and server side of a connection generate this event */ - mca_btl_udapl_endpoint_finish_connect(btl, + mca_btl_udapl_sendrecv(btl, event.event_data.connect_event_data.ep_handle); count++; diff --git a/ompi/mca/btl/udapl/btl_udapl_endpoint.c b/ompi/mca/btl/udapl/btl_udapl_endpoint.c index e3c114fe64..9e10336004 100644 --- a/ompi/mca/btl/udapl/btl_udapl_endpoint.c +++ b/ompi/mca/btl/udapl/btl_udapl_endpoint.c @@ -46,7 +46,7 @@ void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint, static int mca_btl_udapl_endpoint_finish_eager(mca_btl_udapl_endpoint_t*); static int mca_btl_udapl_endpoint_finish_max(mca_btl_udapl_endpoint_t*); - + int mca_btl_udapl_endpoint_send(mca_btl_base_endpoint_t* endpoint, mca_btl_udapl_frag_t* frag) { @@ -289,24 +289,14 @@ failure_create: */ int mca_btl_udapl_endpoint_finish_connect(struct mca_btl_udapl_module_t* btl, + mca_btl_udapl_addr_t* addr, DAT_EP_HANDLE endpoint) { mca_btl_udapl_proc_t* proc; mca_btl_base_endpoint_t* ep; - DAT_EP_PARAM param; size_t i; int rc; - /* Query the DAT EP for address information. */ - /* TODO - refer to the hack comment about setting the port in btl_udapl.c */ - rc = dat_ep_query(endpoint, - DAT_EP_FIELD_REMOTE_IA_ADDRESS_PTR | DAT_EP_FIELD_REMOTE_PORT_QUAL, - ¶m); - if(DAT_SUCCESS != rc) { - MCA_BTL_UDAPL_ERROR(rc, "dat_ep_query"); - return OMPI_ERROR; - } - /* Search for the matching BTL EP */ OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock); for(proc = (mca_btl_udapl_proc_t*) @@ -321,8 +311,7 @@ int mca_btl_udapl_endpoint_finish_connect(struct mca_btl_udapl_module_t* btl, /* Does this endpoint match? */ /* TODO - Check that the DAT_CONN_QUAL's match too */ if(ep->endpoint_btl == btl && - !memcmp(param.remote_ia_address_ptr, - &ep->endpoint_addr.addr, sizeof(DAT_SOCK_ADDR))) { + !memcmp(addr, &ep->endpoint_addr, sizeof(DAT_SOCK_ADDR))) { OPAL_THREAD_LOCK(&ep->endpoint_lock); if(MCA_BTL_UDAPL_CONN_EAGER == ep->endpoint_state) { diff --git a/ompi/mca/btl/udapl/btl_udapl_endpoint.h b/ompi/mca/btl/udapl/btl_udapl_endpoint.h index 7f7b9b0e1d..11afa65184 100644 --- a/ompi/mca/btl/udapl/btl_udapl_endpoint.h +++ b/ompi/mca/btl/udapl/btl_udapl_endpoint.h @@ -117,6 +117,7 @@ void mca_btl_udapl_endpoint_post_oob_recv(void); */ int mca_btl_udapl_endpoint_finish_connect(struct mca_btl_udapl_module_t* btl, + mca_btl_udapl_addr_t* addr, DAT_EP_HANDLE endpoint); #if defined(c_plusplus) || defined(__cplusplus) diff --git a/ompi/mca/btl/udapl/btl_udapl_frag.h b/ompi/mca/btl/udapl/btl_udapl_frag.h index 57e5c885ab..b65545751e 100644 --- a/ompi/mca/btl/udapl/btl_udapl_frag.h +++ b/ompi/mca/btl/udapl/btl_udapl_frag.h @@ -35,7 +35,9 @@ typedef enum { MCA_BTL_UDAPL_SEND, MCA_BTL_UDAPL_RECV, MCA_BTL_UDAPL_PUT, - MCA_BTL_UDAPL_GET + MCA_BTL_UDAPL_GET, + MCA_BTL_UDAPL_CONN_RECV, + MCA_BTL_UDAPL_CONN_SEND } mca_btl_udapl_frag_type_t;