Give up on dat_ep_query() and instead manually send our address information across the wire after connection establishment.
I've introduced a race condition - seeing occasional LOCAL_LENGTH errors on the receive side. I think I'm mixing up eager/max somehow - will look at it more on monday. This commit was SVN r10690.
Этот коммит содержится в:
родитель
13f1f4d86e
Коммит
b7e0484c37
@ -385,6 +385,48 @@ static int mca_btl_udapl_accept_connect(mca_btl_udapl_module_t* btl,
|
||||
}
|
||||
|
||||
|
||||
static inline int mca_btl_udapl_sendrecv(mca_btl_udapl_module_t* btl,
|
||||
DAT_EP_HANDLE* endpoint)
|
||||
{
|
||||
mca_btl_udapl_frag_t* frag;
|
||||
DAT_DTO_COOKIE cookie;
|
||||
int rc;
|
||||
|
||||
/* Post a receive to get the peer's address data */
|
||||
frag = (mca_btl_udapl_frag_t*)mca_btl_udapl_alloc(
|
||||
(mca_btl_base_module_t*)btl, sizeof(mca_btl_udapl_addr_t));
|
||||
cookie.as_ptr = frag;
|
||||
|
||||
frag->type = MCA_BTL_UDAPL_CONN_RECV;
|
||||
|
||||
rc = dat_ep_post_recv(endpoint, 1,
|
||||
&frag->triplet, cookie, DAT_COMPLETION_DEFAULT_FLAG);
|
||||
if(DAT_SUCCESS != rc) {
|
||||
MCA_BTL_UDAPL_ERROR(rc, "dat_ep_post_recv");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
|
||||
/* Send our local address data over this EP */
|
||||
frag = (mca_btl_udapl_frag_t*)mca_btl_udapl_alloc(
|
||||
(mca_btl_base_module_t*)btl, sizeof(mca_btl_udapl_addr_t));
|
||||
cookie.as_ptr = frag;
|
||||
|
||||
memcpy(frag->segment.seg_addr.pval,
|
||||
&btl->udapl_addr, sizeof(mca_btl_udapl_addr_t));
|
||||
frag->type = MCA_BTL_UDAPL_CONN_SEND;
|
||||
|
||||
rc = dat_ep_post_send(endpoint, 1,
|
||||
&frag->triplet, cookie, DAT_COMPLETION_DEFAULT_FLAG);
|
||||
if(DAT_SUCCESS != rc) {
|
||||
MCA_BTL_UDAPL_ERROR(rc, "dat_ep_post_send");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* uDAPL component progress.
|
||||
*/
|
||||
@ -422,8 +464,8 @@ int mca_btl_udapl_component_progress()
|
||||
/* Was the DTO successful? */
|
||||
if(DAT_DTO_SUCCESS != dto->status) {
|
||||
OPAL_OUTPUT((0,
|
||||
"btl_udapl ***** DTO error %d *****\n",
|
||||
dto->status));
|
||||
"btl_udapl ***** DTO error %d %d %d %p*****\n",
|
||||
dto->status, frag->type, frag->size, dto->ep_handle));
|
||||
break;
|
||||
}
|
||||
|
||||
@ -510,9 +552,6 @@ int mca_btl_udapl_component_progress()
|
||||
/*OPAL_OUTPUT((0, "btl_udapl UDAPL_RECV %d",
|
||||
dto->transfered_length));*/
|
||||
|
||||
/* OPAL_OUTPUT((0, "recv from %s %d %p\n",
|
||||
inet_ntoa(addr->sin_addr), ntohs(addr->sin_port),
|
||||
frag->endpoint));*/
|
||||
frag->segment.seg_addr.pval = frag->hdr + 1;
|
||||
frag->segment.seg_len = dto->transfered_length -
|
||||
sizeof(mca_btl_base_header_t);
|
||||
@ -543,6 +582,17 @@ int mca_btl_udapl_component_progress()
|
||||
|
||||
break;
|
||||
}
|
||||
case MCA_BTL_UDAPL_CONN_RECV:
|
||||
mca_btl_udapl_endpoint_finish_connect(btl,
|
||||
frag->segment.seg_addr.pval,
|
||||
event.event_data.connect_event_data.ep_handle);
|
||||
/* No break - fall through to free */
|
||||
case MCA_BTL_UDAPL_CONN_SEND:
|
||||
frag->segment.seg_len =
|
||||
mca_btl_udapl_module.super.btl_eager_limit;
|
||||
mca_btl_udapl_free((mca_btl_base_module_t*)btl,
|
||||
(mca_btl_base_descriptor_t*)frag);
|
||||
break;
|
||||
default:
|
||||
OPAL_OUTPUT((0, "WARNING unknown frag type: %d\n",
|
||||
frag->type));
|
||||
@ -570,7 +620,7 @@ int mca_btl_udapl_component_progress()
|
||||
/* Both the client and server side of a connection generate
|
||||
this event */
|
||||
|
||||
mca_btl_udapl_endpoint_finish_connect(btl,
|
||||
mca_btl_udapl_sendrecv(btl,
|
||||
event.event_data.connect_event_data.ep_handle);
|
||||
|
||||
count++;
|
||||
|
@ -46,7 +46,7 @@ void mca_btl_udapl_endpoint_recv(int status, orte_process_name_t* endpoint,
|
||||
static int mca_btl_udapl_endpoint_finish_eager(mca_btl_udapl_endpoint_t*);
|
||||
static int mca_btl_udapl_endpoint_finish_max(mca_btl_udapl_endpoint_t*);
|
||||
|
||||
|
||||
|
||||
int mca_btl_udapl_endpoint_send(mca_btl_base_endpoint_t* endpoint,
|
||||
mca_btl_udapl_frag_t* frag)
|
||||
{
|
||||
@ -289,24 +289,14 @@ failure_create:
|
||||
*/
|
||||
|
||||
int mca_btl_udapl_endpoint_finish_connect(struct mca_btl_udapl_module_t* btl,
|
||||
mca_btl_udapl_addr_t* addr,
|
||||
DAT_EP_HANDLE endpoint)
|
||||
{
|
||||
mca_btl_udapl_proc_t* proc;
|
||||
mca_btl_base_endpoint_t* ep;
|
||||
DAT_EP_PARAM param;
|
||||
size_t i;
|
||||
int rc;
|
||||
|
||||
/* Query the DAT EP for address information. */
|
||||
/* TODO - refer to the hack comment about setting the port in btl_udapl.c */
|
||||
rc = dat_ep_query(endpoint,
|
||||
DAT_EP_FIELD_REMOTE_IA_ADDRESS_PTR | DAT_EP_FIELD_REMOTE_PORT_QUAL,
|
||||
¶m);
|
||||
if(DAT_SUCCESS != rc) {
|
||||
MCA_BTL_UDAPL_ERROR(rc, "dat_ep_query");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* Search for the matching BTL EP */
|
||||
OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock);
|
||||
for(proc = (mca_btl_udapl_proc_t*)
|
||||
@ -321,8 +311,7 @@ int mca_btl_udapl_endpoint_finish_connect(struct mca_btl_udapl_module_t* btl,
|
||||
/* Does this endpoint match? */
|
||||
/* TODO - Check that the DAT_CONN_QUAL's match too */
|
||||
if(ep->endpoint_btl == btl &&
|
||||
!memcmp(param.remote_ia_address_ptr,
|
||||
&ep->endpoint_addr.addr, sizeof(DAT_SOCK_ADDR))) {
|
||||
!memcmp(addr, &ep->endpoint_addr, sizeof(DAT_SOCK_ADDR))) {
|
||||
|
||||
OPAL_THREAD_LOCK(&ep->endpoint_lock);
|
||||
if(MCA_BTL_UDAPL_CONN_EAGER == ep->endpoint_state) {
|
||||
|
@ -117,6 +117,7 @@ void mca_btl_udapl_endpoint_post_oob_recv(void);
|
||||
*/
|
||||
|
||||
int mca_btl_udapl_endpoint_finish_connect(struct mca_btl_udapl_module_t* btl,
|
||||
mca_btl_udapl_addr_t* addr,
|
||||
DAT_EP_HANDLE endpoint);
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
|
@ -35,7 +35,9 @@ typedef enum {
|
||||
MCA_BTL_UDAPL_SEND,
|
||||
MCA_BTL_UDAPL_RECV,
|
||||
MCA_BTL_UDAPL_PUT,
|
||||
MCA_BTL_UDAPL_GET
|
||||
MCA_BTL_UDAPL_GET,
|
||||
MCA_BTL_UDAPL_CONN_RECV,
|
||||
MCA_BTL_UDAPL_CONN_SEND
|
||||
} mca_btl_udapl_frag_type_t;
|
||||
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user