1
1

btl/ugni: fix another connection race

This commit fixes a race that can occur when two threads are in the
ugni progress function at the same time. This race occurs when one
thread calls GNI_PostDataProbeById then goes to sleep then another
thread calls GNI_PostDataProbeById then GNI_EpPostDataWaitById before
the other thread wakes up. If this happens the first thread will print
a warning on GNI_EpPostDataWaitById about no matching post.

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
Nathan Hjelm 2016-08-08 15:38:11 -06:00
родитель ba77d9beff
Коммит adb668209b

Просмотреть файл

@ -430,8 +430,8 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
/* check for datagram completion */
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock); /* TODO: may not need lock for this function */
grc = GNI_PostDataProbeById (ugni_module->device->dev_handle, &datagram_id);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) {
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
return 0;
}
@ -447,7 +447,6 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
}
/* wait for the incoming datagram to complete (in case it isn't) */
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock); /* TODO: may not need lock for this function */
grc = GNI_EpPostDataWaitById (handle, datagram_id, -1, &post_state,
&remote_addr, &remote_id);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);