1
1

btl/ugni: move connection check into sync event

This commit makes datagram checks time based and reduces their
frequency when only the wildcard datagram is posted. This change
improves latency on knl systems.

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
Nathan Hjelm 2017-03-14 10:10:05 -06:00
родитель 330b11c8ab
Коммит 2e42b0afbd
5 изменённых файлов: 33 добавлений и 8 удалений

Просмотреть файл

@ -53,6 +53,9 @@
/** number of rdma completion queue items to remove per progress loop */
#define MCA_BTL_UGNI_COMPLETIONS_PER_LOOP 16
/** how often to check for connection requests */
#define MCA_BTL_UGNI_CONNECT_USEC 10
/**
* Modex data
*/
@ -167,6 +170,9 @@ typedef struct mca_btl_ugni_module_t {
gni_ep_handle_t wildcard_ep;
struct mca_btl_base_endpoint_t *local_ep;
volatile int32_t active_datagrams;
opal_event_t connection_event;
struct mca_btl_ugni_endpoint_attr_t wc_remote_attr, wc_local_attr;
gni_cq_handle_t smsg_remote_cq;
@ -422,6 +428,7 @@ int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_
int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
int mca_btl_ugni_progress_send_wait_list (struct mca_btl_base_endpoint_t *endpoint);
int mca_btl_ugni_progress_datagram (mca_btl_ugni_device_t *device);
mca_btl_base_descriptor_t *
mca_btl_ugni_alloc(struct mca_btl_base_module_t *btl,

Просмотреть файл

@ -156,6 +156,8 @@ int mca_btl_ugni_add_procs (struct mca_btl_base_module_t* btl, size_t nprocs,
mca_btl_ugni_spawn_progress_thread(btl);
}
opal_event_evtimer_add (&ugni_module->connection_event, (&(struct timeval) {.tv_sec = 0, .tv_usec = MCA_BTL_UGNI_CONNECT_USEC}));
ugni_module->initialized = true;
}

Просмотреть файл

@ -499,9 +499,9 @@ mca_btl_ugni_component_init (int *num_btl_modules,
return base_modules;
}
static inline int
mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device)
int mca_btl_ugni_progress_datagram (mca_btl_ugni_device_t *device)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_component.modules;
mca_btl_base_endpoint_t *ep;
gni_ep_handle_t handle;
int count = 0, rc;
@ -542,6 +542,7 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni
BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep));
ep->dg_posted = false;
(void) opal_atomic_add_32 (&ugni_module->active_datagrams, -1);
}
(void) mca_btl_ugni_ep_connect_progress (ep);
@ -705,16 +706,12 @@ mca_btl_ugni_progress_wait_list (mca_btl_ugni_module_t *ugni_module)
static int mca_btl_ugni_component_progress (void)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_component.modules;
static volatile int32_t call_count = 0;
int32_t current_call;
int count = 0;
current_call = OPAL_THREAD_ADD32(&call_count, 1);
count += mca_btl_ugni_progress_remote_smsg (ugni_module);
if ((current_call & 0x7) == 0) {
count += mca_btl_ugni_progress_datagram (ugni_module, ugni_module->devices);
if (ugni_module->active_datagrams) {
count += mca_btl_ugni_progress_datagram (ugni_module->devices);
}
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {

Просмотреть файл

@ -311,6 +311,9 @@ static int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep)
rc = GNI_EpPostDataWId (ep->smsg_ep_handle->gni_handle, &ep->mailbox->attr, sizeof (ep->mailbox->attr),
ep->remote_attr, sizeof (*ep->remote_attr),
MCA_BTL_UGNI_CONNECT_DIRECTED_ID | ep->index);
if (OPAL_LIKELY(GNI_RC_SUCCESS == rc)) {
(void) opal_atomic_add_32 (&ugni_module->active_datagrams, 1);
}
return mca_btl_rc_ugni_to_opal (rc);
}

Просмотреть файл

@ -61,6 +61,16 @@ mca_btl_ugni_module_t mca_btl_ugni_module = {
}
};
static void *mca_btl_ugni_datagram_event (int foo, short bar, void *arg)
{
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) arg;
mca_btl_ugni_device_t *device = ugni_module->devices;
mca_btl_ugni_progress_datagram (device);
opal_event_evtimer_add (&ugni_module->connection_event, (&(struct timeval) {.tv_sec = 0, .tv_usec = MCA_BTL_UGNI_CONNECT_USEC}));
}
int
mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module)
{
@ -74,6 +84,10 @@ mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module)
ugni_module->initialized = false;
ugni_module->nlocal_procs = 0;
ugni_module->connected_peer_count = 0;
ugni_module->active_datagrams = 0;
opal_event_evtimer_set (opal_sync_event_base, &ugni_module->connection_event,
mca_btl_ugni_datagram_event, ugni_module);
OBJ_CONSTRUCT(&ugni_module->failed_frags, opal_list_t);
OBJ_CONSTRUCT(&ugni_module->failed_frags_lock, opal_mutex_t);
@ -170,6 +184,8 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
if (GNI_RC_SUCCESS != rc) {
BTL_VERBOSE(("btl/ugni error destroying endpoint - %s",gni_err_str[rc]));
}
opal_event_del (&ugni_module->connection_event);
}
for (int i = 0 ; i < MCA_BTL_UGNI_LIST_MAX ; ++i) {