1
1

change so that we only check connection queue when expecting a connection; create a mca parameter that controls frequency at which the async queue is checked

This commit was SVN r14511.
Этот коммит содержится в:
Donald Kerr 2007-04-25 17:46:25 +00:00
родитель 7d0f51e6b9
Коммит 80d984441f
5 изменённых файлов: 42 добавлений и 29 удалений

Просмотреть файл

@ -313,6 +313,10 @@ mca_btl_udapl_init(DAT_NAME_PTR ia_name, mca_btl_udapl_module_t* btl)
btl->udapl_eager_rdma_endpoint_count = 0;
OBJ_CONSTRUCT(&btl->udapl_eager_rdma_lock, opal_mutex_t);
/* initialize miscellaneous variables */
btl->udapl_async_events = 0;
btl->udapl_connect_inprogress = 0;
/* TODO - Set up SRQ when it is supported */
return OMPI_SUCCESS;

Просмотреть файл

@ -89,7 +89,7 @@ struct mca_btl_udapl_component_t {
int32_t udapl_eager_rdma_win; /**< number of eager RDMA fragments
recieved before returning credits to
sender */
int32_t udapl_async_events; /**< dequeue asynchronous events */
opal_list_t udapl_procs; /**< list of udapl proc structures */
opal_mutex_t udapl_lock; /**< lock for accessing module state */
char* udapl_mpool_name; /**< name of memory pool */
@ -136,6 +136,9 @@ struct mca_btl_udapl_module_t {
* with eager rdma
* connections
*/
int32_t udapl_async_events;
int32_t udapl_connect_inprogress;
/* module specific limits */
int udapl_evd_qlen;
int udapl_max_request_dtos; /**< maximum number of outstanding consumer

Просмотреть файл

@ -592,9 +592,6 @@ int mca_btl_udapl_component_progress()
mca_btl_udapl_module_t* btl;
static int32_t inprogress = 0;
DAT_EVENT event;
#if defined(__SVR4) && defined(__sun)
DAT_COUNT nmore; /* used by dat_evd_wait, see comment below */
#endif
size_t i;
int32_t j, rdma_ep_count;
int count = 0;
@ -809,17 +806,9 @@ int mca_btl_udapl_component_progress()
}
/* Check connection EVD */
while(DAT_SUCCESS ==
#if defined(__SVR4) && defined(__sun)
/* There is a bug is Solaris udapl implementation
* such that dat_evd_dequeue does not dequeue
* DAT_CONNECTION_REQUEST_EVENT. Workaround is to use
* wait. This should be removed when fix available.
*/
dat_evd_wait(btl->udapl_evd_conn, 0, 1, &event, &nmore)) {
#else
dat_evd_dequeue(btl->udapl_evd_conn, &event)) {
#endif
while((btl->udapl_connect_inprogress > 0) && (DAT_SUCCESS ==
dat_evd_dequeue(btl->udapl_evd_conn, &event))) {
switch(event.event_number) {
case DAT_CONNECTION_REQUEST_EVENT:
/* Accept a new connection */
@ -857,22 +846,28 @@ int mca_btl_udapl_component_progress()
}
/* Check async EVD */
while(DAT_SUCCESS ==
if (btl->udapl_async_events == mca_btl_udapl_component.udapl_async_events) {
btl->udapl_async_events = 0;
while(DAT_SUCCESS ==
dat_evd_dequeue(btl->udapl_evd_async, &event)) {
switch(event.event_number) {
case DAT_ASYNC_ERROR_EVD_OVERFLOW:
case DAT_ASYNC_ERROR_IA_CATASTROPHIC:
case DAT_ASYNC_ERROR_EP_BROKEN:
case DAT_ASYNC_ERROR_TIMED_OUT:
case DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR:
BTL_OUTPUT(("WARNING: async event ignored : %d",
event.event_number));
break;
default:
BTL_OUTPUT(("WARNING unknown async event: %d\n",
event.event_number));
switch(event.event_number) {
case DAT_ASYNC_ERROR_EVD_OVERFLOW:
case DAT_ASYNC_ERROR_IA_CATASTROPHIC:
case DAT_ASYNC_ERROR_EP_BROKEN:
case DAT_ASYNC_ERROR_TIMED_OUT:
case DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR:
BTL_OUTPUT(("WARNING: async event ignored : %d",
event.event_number));
break;
default:
BTL_OUTPUT(("WARNING unknown async event: %d\n",
event.event_number));
}
}
} else {
btl->udapl_async_events++;
}
/*

Просмотреть файл

@ -495,6 +495,8 @@ static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint)
return ORTE_ERR_OUT_OF_RESOURCE;
}
OPAL_THREAD_ADD32(&(endpoint->endpoint_btl->udapl_connect_inprogress), 1);
/* Pack our address information */
rc = orte_dss.pack(buf, &addr->port, 1, ORTE_UINT64);
if(ORTE_SUCCESS != rc) {
@ -588,7 +590,8 @@ void mca_btl_udapl_endpoint_connect(mca_btl_udapl_endpoint_t* endpoint)
int rc;
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
OPAL_THREAD_ADD32(&(btl->udapl_connect_inprogress), 1);
/* Nasty test to prevent deadlock and unwanted connection attempts */
/* This right here is the whole point of using the ORTE/RML handshake */
if((MCA_BTL_UDAPL_CONN_EAGER == endpoint->endpoint_state &&
@ -766,6 +769,7 @@ static int mca_btl_udapl_endpoint_finish_max(mca_btl_udapl_endpoint_t* endpoint)
int rc;
endpoint->endpoint_state = MCA_BTL_UDAPL_CONNECTED;
OPAL_THREAD_ADD32(&(endpoint->endpoint_btl->udapl_connect_inprogress), -1);
/* post eager/max recv buffers */
mca_btl_udapl_endpoint_post_recv(endpoint,

Просмотреть файл

@ -208,6 +208,13 @@ int mca_btl_udapl_register_mca_params(void)
&mca_btl_udapl_component.udapl_eager_rdma_guarantee,
REGINT_GE_ZERO), tmp_rc, rc);
CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("async_events",
"The asynchronous event queue will only be "
"checked after entering progress this number of times.",
100000000,
&mca_btl_udapl_component.udapl_async_events,
REGINT_GE_ONE), tmp_rc, rc);
/* register uDAPL module parameters */
CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("evd_qlen",
"The event dispatcher queue length.",