1
1

change so that we only check connection queue when expecting a connection; create a mca parameter that controls frequency at which the async queue is checked

This commit was SVN r14511.
Этот коммит содержится в:
Donald Kerr 2007-04-25 17:46:25 +00:00
родитель 7d0f51e6b9
Коммит 80d984441f
5 изменённых файлов: 42 добавлений и 29 удалений

Просмотреть файл

@ -313,6 +313,10 @@ mca_btl_udapl_init(DAT_NAME_PTR ia_name, mca_btl_udapl_module_t* btl)
btl->udapl_eager_rdma_endpoint_count = 0; btl->udapl_eager_rdma_endpoint_count = 0;
OBJ_CONSTRUCT(&btl->udapl_eager_rdma_lock, opal_mutex_t); OBJ_CONSTRUCT(&btl->udapl_eager_rdma_lock, opal_mutex_t);
/* initialize miscellaneous variables */
btl->udapl_async_events = 0;
btl->udapl_connect_inprogress = 0;
/* TODO - Set up SRQ when it is supported */ /* TODO - Set up SRQ when it is supported */
return OMPI_SUCCESS; return OMPI_SUCCESS;

Просмотреть файл

@ -89,7 +89,7 @@ struct mca_btl_udapl_component_t {
int32_t udapl_eager_rdma_win; /**< number of eager RDMA fragments int32_t udapl_eager_rdma_win; /**< number of eager RDMA fragments
recieved before returning credits to recieved before returning credits to
sender */ sender */
int32_t udapl_async_events; /**< dequeue asynchronous events */
opal_list_t udapl_procs; /**< list of udapl proc structures */ opal_list_t udapl_procs; /**< list of udapl proc structures */
opal_mutex_t udapl_lock; /**< lock for accessing module state */ opal_mutex_t udapl_lock; /**< lock for accessing module state */
char* udapl_mpool_name; /**< name of memory pool */ char* udapl_mpool_name; /**< name of memory pool */
@ -136,6 +136,9 @@ struct mca_btl_udapl_module_t {
* with eager rdma * with eager rdma
* connections * connections
*/ */
int32_t udapl_async_events;
int32_t udapl_connect_inprogress;
/* module specific limits */ /* module specific limits */
int udapl_evd_qlen; int udapl_evd_qlen;
int udapl_max_request_dtos; /**< maximum number of outstanding consumer int udapl_max_request_dtos; /**< maximum number of outstanding consumer

Просмотреть файл

@ -592,9 +592,6 @@ int mca_btl_udapl_component_progress()
mca_btl_udapl_module_t* btl; mca_btl_udapl_module_t* btl;
static int32_t inprogress = 0; static int32_t inprogress = 0;
DAT_EVENT event; DAT_EVENT event;
#if defined(__SVR4) && defined(__sun)
DAT_COUNT nmore; /* used by dat_evd_wait, see comment below */
#endif
size_t i; size_t i;
int32_t j, rdma_ep_count; int32_t j, rdma_ep_count;
int count = 0; int count = 0;
@ -809,17 +806,9 @@ int mca_btl_udapl_component_progress()
} }
/* Check connection EVD */ /* Check connection EVD */
while(DAT_SUCCESS == while((btl->udapl_connect_inprogress > 0) && (DAT_SUCCESS ==
#if defined(__SVR4) && defined(__sun) dat_evd_dequeue(btl->udapl_evd_conn, &event))) {
/* There is a bug is Solaris udapl implementation
* such that dat_evd_dequeue does not dequeue
* DAT_CONNECTION_REQUEST_EVENT. Workaround is to use
* wait. This should be removed when fix available.
*/
dat_evd_wait(btl->udapl_evd_conn, 0, 1, &event, &nmore)) {
#else
dat_evd_dequeue(btl->udapl_evd_conn, &event)) {
#endif
switch(event.event_number) { switch(event.event_number) {
case DAT_CONNECTION_REQUEST_EVENT: case DAT_CONNECTION_REQUEST_EVENT:
/* Accept a new connection */ /* Accept a new connection */
@ -857,22 +846,28 @@ int mca_btl_udapl_component_progress()
} }
/* Check async EVD */ /* Check async EVD */
while(DAT_SUCCESS == if (btl->udapl_async_events == mca_btl_udapl_component.udapl_async_events) {
btl->udapl_async_events = 0;
while(DAT_SUCCESS ==
dat_evd_dequeue(btl->udapl_evd_async, &event)) { dat_evd_dequeue(btl->udapl_evd_async, &event)) {
switch(event.event_number) { switch(event.event_number) {
case DAT_ASYNC_ERROR_EVD_OVERFLOW: case DAT_ASYNC_ERROR_EVD_OVERFLOW:
case DAT_ASYNC_ERROR_IA_CATASTROPHIC: case DAT_ASYNC_ERROR_IA_CATASTROPHIC:
case DAT_ASYNC_ERROR_EP_BROKEN: case DAT_ASYNC_ERROR_EP_BROKEN:
case DAT_ASYNC_ERROR_TIMED_OUT: case DAT_ASYNC_ERROR_TIMED_OUT:
case DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR: case DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR:
BTL_OUTPUT(("WARNING: async event ignored : %d", BTL_OUTPUT(("WARNING: async event ignored : %d",
event.event_number)); event.event_number));
break; break;
default: default:
BTL_OUTPUT(("WARNING unknown async event: %d\n", BTL_OUTPUT(("WARNING unknown async event: %d\n",
event.event_number)); event.event_number));
}
} }
} else {
btl->udapl_async_events++;
} }
/* /*

Просмотреть файл

@ -495,6 +495,8 @@ static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint)
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
OPAL_THREAD_ADD32(&(endpoint->endpoint_btl->udapl_connect_inprogress), 1);
/* Pack our address information */ /* Pack our address information */
rc = orte_dss.pack(buf, &addr->port, 1, ORTE_UINT64); rc = orte_dss.pack(buf, &addr->port, 1, ORTE_UINT64);
if(ORTE_SUCCESS != rc) { if(ORTE_SUCCESS != rc) {
@ -588,7 +590,8 @@ void mca_btl_udapl_endpoint_connect(mca_btl_udapl_endpoint_t* endpoint)
int rc; int rc;
OPAL_THREAD_LOCK(&endpoint->endpoint_lock); OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
OPAL_THREAD_ADD32(&(btl->udapl_connect_inprogress), 1);
/* Nasty test to prevent deadlock and unwanted connection attempts */ /* Nasty test to prevent deadlock and unwanted connection attempts */
/* This right here is the whole point of using the ORTE/RML handshake */ /* This right here is the whole point of using the ORTE/RML handshake */
if((MCA_BTL_UDAPL_CONN_EAGER == endpoint->endpoint_state && if((MCA_BTL_UDAPL_CONN_EAGER == endpoint->endpoint_state &&
@ -766,6 +769,7 @@ static int mca_btl_udapl_endpoint_finish_max(mca_btl_udapl_endpoint_t* endpoint)
int rc; int rc;
endpoint->endpoint_state = MCA_BTL_UDAPL_CONNECTED; endpoint->endpoint_state = MCA_BTL_UDAPL_CONNECTED;
OPAL_THREAD_ADD32(&(endpoint->endpoint_btl->udapl_connect_inprogress), -1);
/* post eager/max recv buffers */ /* post eager/max recv buffers */
mca_btl_udapl_endpoint_post_recv(endpoint, mca_btl_udapl_endpoint_post_recv(endpoint,

Просмотреть файл

@ -208,6 +208,13 @@ int mca_btl_udapl_register_mca_params(void)
&mca_btl_udapl_component.udapl_eager_rdma_guarantee, &mca_btl_udapl_component.udapl_eager_rdma_guarantee,
REGINT_GE_ZERO), tmp_rc, rc); REGINT_GE_ZERO), tmp_rc, rc);
CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("async_events",
"The asynchronous event queue will only be "
"checked after entering progress this number of times.",
100000000,
&mca_btl_udapl_component.udapl_async_events,
REGINT_GE_ONE), tmp_rc, rc);
/* register uDAPL module parameters */ /* register uDAPL module parameters */
CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("evd_qlen", CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("evd_qlen",
"The event dispatcher queue length.", "The event dispatcher queue length.",