1
1

Give up on dat_ep_query() and instead manually send our address information across the wire after connection establishment.

I've introduced a race condition - seeing occasional LOCAL_LENGTH errors on the receive side.  I think I'm mixing up eager/max somehow - will look at it more on monday.

This commit was SVN r10690.
Этот коммит содержится в:
Andrew Friedley 2006-07-07 21:48:16 +00:00
родитель 13f1f4d86e
Коммит b7e0484c37
4 изменённых файлов: 63 добавлений и 21 удалений

Просмотреть файл

@ -385,6 +385,48 @@ static int mca_btl_udapl_accept_connect(mca_btl_udapl_module_t* btl,
}
static inline int mca_btl_udapl_sendrecv(mca_btl_udapl_module_t* btl,
DAT_EP_HANDLE* endpoint)
{
mca_btl_udapl_frag_t* frag;
DAT_DTO_COOKIE cookie;
int rc;
/* Post a receive to get the peer's address data */
frag = (mca_btl_udapl_frag_t*)mca_btl_udapl_alloc(
(mca_btl_base_module_t*)btl, sizeof(mca_btl_udapl_addr_t));
cookie.as_ptr = frag;
frag->type = MCA_BTL_UDAPL_CONN_RECV;
rc = dat_ep_post_recv(endpoint, 1,
&frag->triplet, cookie, DAT_COMPLETION_DEFAULT_FLAG);
if(DAT_SUCCESS != rc) {
MCA_BTL_UDAPL_ERROR(rc, "dat_ep_post_recv");
return OMPI_ERROR;
}
/* Send our local address data over this EP */
frag = (mca_btl_udapl_frag_t*)mca_btl_udapl_alloc(
(mca_btl_base_module_t*)btl, sizeof(mca_btl_udapl_addr_t));
cookie.as_ptr = frag;
memcpy(frag->segment.seg_addr.pval,
&btl->udapl_addr, sizeof(mca_btl_udapl_addr_t));
frag->type = MCA_BTL_UDAPL_CONN_SEND;
rc = dat_ep_post_send(endpoint, 1,
&frag->triplet, cookie, DAT_COMPLETION_DEFAULT_FLAG);
if(DAT_SUCCESS != rc) {
MCA_BTL_UDAPL_ERROR(rc, "dat_ep_post_send");
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
/*
* uDAPL component progress.
*/
@ -422,8 +464,8 @@ int mca_btl_udapl_component_progress()
/* Was the DTO successful? */
if(DAT_DTO_SUCCESS != dto->status) {
OPAL_OUTPUT((0,
"btl_udapl ***** DTO error %d *****\n",
dto->status));
"btl_udapl ***** DTO error %d %d %d %p*****\n",
dto->status, frag->type, frag->size, dto->ep_handle));
break;
}
@ -510,9 +552,6 @@ int mca_btl_udapl_component_progress()
/*OPAL_OUTPUT((0, "btl_udapl UDAPL_RECV %d",
dto->transfered_length));*/
/* OPAL_OUTPUT((0, "recv from %s %d %p\n",
inet_ntoa(addr->sin_addr), ntohs(addr->sin_port),
frag->endpoint));*/
frag->segment.seg_addr.pval = frag->hdr + 1;
frag->segment.seg_len = dto->transfered_length -
sizeof(mca_btl_base_header_t);
@ -543,6 +582,17 @@ int mca_btl_udapl_component_progress()
break;
}
case MCA_BTL_UDAPL_CONN_RECV:
mca_btl_udapl_endpoint_finish_connect(btl,
frag->segment.seg_addr.pval,
event.event_data.connect_event_data.ep_handle);
/* No break - fall through to free */
case MCA_BTL_UDAPL_CONN_SEND:
frag->segment.seg_len =
mca_btl_udapl_module.super.btl_eager_limit;
mca_btl_udapl_free((mca_btl_base_module_t*)btl,
(mca_btl_base_descriptor_t*)frag);
break;
default:
OPAL_OUTPUT((0, "WARNING unknown frag type: %d\n",
frag->type));
@ -570,7 +620,7 @@ int mca_btl_udapl_component_progress()
/* Both the client and server side of a connection generate
this event */
mca_btl_udapl_endpoint_finish_connect(btl,
mca_btl_udapl_sendrecv(btl,
event.event_data.connect_event_data.ep_handle);
count++;

Просмотреть файл

@ -46,7 +46,7 @@ void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint,
static int mca_btl_udapl_endpoint_finish_eager(mca_btl_udapl_endpoint_t*);
static int mca_btl_udapl_endpoint_finish_max(mca_btl_udapl_endpoint_t*);
int mca_btl_udapl_endpoint_send(mca_btl_base_endpoint_t* endpoint,
mca_btl_udapl_frag_t* frag)
{
@ -289,24 +289,14 @@ failure_create:
*/
int mca_btl_udapl_endpoint_finish_connect(struct mca_btl_udapl_module_t* btl,
mca_btl_udapl_addr_t* addr,
DAT_EP_HANDLE endpoint)
{
mca_btl_udapl_proc_t* proc;
mca_btl_base_endpoint_t* ep;
DAT_EP_PARAM param;
size_t i;
int rc;
/* Query the DAT EP for address information. */
/* TODO - refer to the hack comment about setting the port in btl_udapl.c */
rc = dat_ep_query(endpoint,
DAT_EP_FIELD_REMOTE_IA_ADDRESS_PTR | DAT_EP_FIELD_REMOTE_PORT_QUAL,
&param);
if(DAT_SUCCESS != rc) {
MCA_BTL_UDAPL_ERROR(rc, "dat_ep_query");
return OMPI_ERROR;
}
/* Search for the matching BTL EP */
OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock);
for(proc = (mca_btl_udapl_proc_t*)
@ -321,8 +311,7 @@ int mca_btl_udapl_endpoint_finish_connect(struct mca_btl_udapl_module_t* btl,
/* Does this endpoint match? */
/* TODO - Check that the DAT_CONN_QUAL's match too */
if(ep->endpoint_btl == btl &&
!memcmp(param.remote_ia_address_ptr,
&ep->endpoint_addr.addr, sizeof(DAT_SOCK_ADDR))) {
!memcmp(addr, &ep->endpoint_addr, sizeof(DAT_SOCK_ADDR))) {
OPAL_THREAD_LOCK(&ep->endpoint_lock);
if(MCA_BTL_UDAPL_CONN_EAGER == ep->endpoint_state) {

Просмотреть файл

@ -117,6 +117,7 @@ void mca_btl_udapl_endpoint_post_oob_recv(void);
*/
int mca_btl_udapl_endpoint_finish_connect(struct mca_btl_udapl_module_t* btl,
mca_btl_udapl_addr_t* addr,
DAT_EP_HANDLE endpoint);
#if defined(c_plusplus) || defined(__cplusplus)

Просмотреть файл

@ -35,7 +35,9 @@ typedef enum {
MCA_BTL_UDAPL_SEND,
MCA_BTL_UDAPL_RECV,
MCA_BTL_UDAPL_PUT,
MCA_BTL_UDAPL_GET
MCA_BTL_UDAPL_GET,
MCA_BTL_UDAPL_CONN_RECV,
MCA_BTL_UDAPL_CONN_SEND
} mca_btl_udapl_frag_type_t;