1
1

Merge pull request #3169 from hjelmn/btl_ugni_2_0

More btl/ugni updates
Этот коммит содержится в:
Nathan Hjelm 2017-03-14 13:23:13 -06:00 коммит произвёл GitHub
родитель 37214eda09 6b210fa2c4
Коммит db9232f8d6
6 изменённых файлов: 41 добавлений и 10 удалений

Просмотреть файл

@ -53,6 +53,9 @@
/** number of rdma completion queue items to remove per progress loop */
#define MCA_BTL_UGNI_COMPLETIONS_PER_LOOP 16
/** how often to check for connection requests */
#define MCA_BTL_UGNI_CONNECT_USEC 10
/**
* Modex data
*/
@ -167,6 +170,9 @@ typedef struct mca_btl_ugni_module_t {
gni_ep_handle_t wildcard_ep;
struct mca_btl_base_endpoint_t *local_ep;
volatile int32_t active_datagrams;
opal_event_t connection_event;
struct mca_btl_ugni_endpoint_attr_t wc_remote_attr, wc_local_attr;
gni_cq_handle_t smsg_remote_cq;
@ -422,6 +428,7 @@ int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_
int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
int mca_btl_ugni_progress_send_wait_list (struct mca_btl_base_endpoint_t *endpoint);
int mca_btl_ugni_progress_datagram (mca_btl_ugni_device_t *device);
mca_btl_base_descriptor_t *
mca_btl_ugni_alloc(struct mca_btl_base_module_t *btl,

Просмотреть файл

@ -156,6 +156,8 @@ int mca_btl_ugni_add_procs (struct mca_btl_base_module_t* btl, size_t nprocs,
mca_btl_ugni_spawn_progress_thread(btl);
}
opal_event_evtimer_add (&ugni_module->connection_event, (&(struct timeval) {.tv_sec = 0, .tv_usec = MCA_BTL_UGNI_CONNECT_USEC}));
ugni_module->initialized = true;
}

Просмотреть файл

@ -499,9 +499,9 @@ mca_btl_ugni_component_init (int *num_btl_modules,
return base_modules;
}
static inline int
mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device)
int mca_btl_ugni_progress_datagram (mca_btl_ugni_device_t *device)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_component.modules;
mca_btl_base_endpoint_t *ep;
gni_ep_handle_t handle;
int count = 0, rc;
@ -542,6 +542,7 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni
BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep));
ep->dg_posted = false;
(void) opal_atomic_add_32 (&ugni_module->active_datagrams, -1);
}
(void) mca_btl_ugni_ep_connect_progress (ep);
@ -705,16 +706,12 @@ mca_btl_ugni_progress_wait_list (mca_btl_ugni_module_t *ugni_module)
static int mca_btl_ugni_component_progress (void)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_component.modules;
static volatile int32_t call_count = 0;
int32_t current_call;
int count = 0;
current_call = OPAL_THREAD_ADD32(&call_count, 1);
count += mca_btl_ugni_progress_remote_smsg (ugni_module);
if ((current_call & 0x7) == 0) {
count += mca_btl_ugni_progress_datagram (ugni_module, ugni_module->devices);
if (ugni_module->active_datagrams) {
count += mca_btl_ugni_progress_datagram (ugni_module->devices);
}
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {

Просмотреть файл

@ -311,6 +311,9 @@ static int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep)
rc = GNI_EpPostDataWId (ep->smsg_ep_handle->gni_handle, &ep->mailbox->attr, sizeof (ep->mailbox->attr),
ep->remote_attr, sizeof (*ep->remote_attr),
MCA_BTL_UGNI_CONNECT_DIRECTED_ID | ep->index);
if (OPAL_LIKELY(GNI_RC_SUCCESS == rc)) {
(void) opal_atomic_add_32 (&ugni_module->active_datagrams, 1);
}
return mca_btl_rc_ugni_to_opal (rc);
}

Просмотреть файл

@ -61,6 +61,16 @@ mca_btl_ugni_module_t mca_btl_ugni_module = {
}
};
static void *mca_btl_ugni_datagram_event (int foo, short bar, void *arg)
{
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) arg;
mca_btl_ugni_device_t *device = ugni_module->devices;
mca_btl_ugni_progress_datagram (device);
opal_event_evtimer_add (&ugni_module->connection_event, (&(struct timeval) {.tv_sec = 0, .tv_usec = MCA_BTL_UGNI_CONNECT_USEC}));
}
int
mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module)
{
@ -74,6 +84,10 @@ mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module)
ugni_module->initialized = false;
ugni_module->nlocal_procs = 0;
ugni_module->connected_peer_count = 0;
ugni_module->active_datagrams = 0;
opal_event_evtimer_set (opal_sync_event_base, &ugni_module->connection_event,
mca_btl_ugni_datagram_event, ugni_module);
OBJ_CONSTRUCT(&ugni_module->failed_frags, opal_list_t);
OBJ_CONSTRUCT(&ugni_module->failed_frags_lock, opal_mutex_t);
@ -170,6 +184,8 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
if (GNI_RC_SUCCESS != rc) {
BTL_VERBOSE(("btl/ugni error destroying endpoint - %s",gni_err_str[rc]));
}
opal_event_del (&ugni_module->connection_event);
}
for (int i = 0 ; i < MCA_BTL_UGNI_LIST_MAX ; ++i) {

Просмотреть файл

@ -118,6 +118,13 @@ int mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
size_t packed_size = payload_size;
int rc;
if (OPAL_UNLIKELY(opal_list_get_size (&endpoint->frag_wait_list))) {
if (NULL != descriptor) {
*descriptor = NULL;
}
return OPAL_ERR_OUT_OF_RESOURCE;
}
do {
BTL_VERBOSE(("btl/ugni isend sending fragment from %d -> %d. length = %" PRIu64
" endoint state %d", OPAL_PROC_MY_NAME.vpid, endpoint->peer_proc->proc_name.vpid,
@ -134,8 +141,7 @@ int mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
}
assert (packed_size == payload_size);
if (OPAL_UNLIKELY(NULL == frag || OPAL_SUCCESS != mca_btl_ugni_check_endpoint_state (endpoint) ||
opal_list_get_size (&endpoint->frag_wait_list))) {
if (OPAL_UNLIKELY(NULL == frag || OPAL_SUCCESS != mca_btl_ugni_check_endpoint_state (endpoint))) {
break;
}