Add a backoff mechanism for re-establishing communication
This commit was SVN r26366.
Этот коммит содержится в:
родитель
74ade8b181
Коммит
0ae2277796
@ -44,8 +44,6 @@ ompi_mtl_portals4_flowctl_init(void)
|
||||
OBJ_CLASS(ompi_mtl_portals4_pending_request_t),
|
||||
1, -1, 1);
|
||||
|
||||
OBJ_CONSTRUCT(&ompi_mtl_portals4.flowctl.mutex, opal_mutex_t);
|
||||
|
||||
ompi_mtl_portals4.flowctl.slots = (ompi_mtl_portals4.queue_size - 3) / 3;
|
||||
|
||||
ompi_mtl_portals4.flowctl.alert_req.type = portals4_req_flowctl;
|
||||
@ -200,6 +198,9 @@ ompi_mtl_portals4_flowctl_init(void)
|
||||
|
||||
ompi_mtl_portals4.flowctl.num_children = 0;
|
||||
|
||||
gettimeofday(&ompi_mtl_portals4.flowctl.tv, NULL);
|
||||
ompi_mtl_portals4.flowctl.backoff_count = 0;
|
||||
|
||||
ret = OMPI_SUCCESS;
|
||||
|
||||
error:
|
||||
@ -544,6 +545,7 @@ flowctl_fanout_callback(ptl_event_t *ev,
|
||||
ompi_mtl_portals4_base_request_t *ptl_base_request)
|
||||
{
|
||||
int ret;
|
||||
struct timeval tv;
|
||||
|
||||
ompi_mtl_portals4.flowctl.flowctl_active = false;
|
||||
ret = PtlPTEnable(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx);
|
||||
@ -554,6 +556,16 @@ flowctl_fanout_callback(ptl_event_t *ev,
|
||||
return ret;
|
||||
}
|
||||
|
||||
gettimeofday(&tv, NULL);
|
||||
if (((tv.tv_sec * 1000000 + tv.tv_usec) -
|
||||
(ompi_mtl_portals4.flowctl.tv.tv_sec * 1000000 + ompi_mtl_portals4.flowctl.tv.tv_usec))
|
||||
< 1000000 * ompi_mtl_portals4.flowctl.backoff_count) {
|
||||
usleep(++ompi_mtl_portals4.flowctl.backoff_count);
|
||||
} else {
|
||||
ompi_mtl_portals4.flowctl.backoff_count = 0;
|
||||
}
|
||||
ompi_mtl_portals4.flowctl.tv = tv;
|
||||
|
||||
ompi_mtl_portals4_pending_list_progress();
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
|
||||
|
@ -38,7 +38,6 @@ struct ompi_mtl_portals4_flowctl_t {
|
||||
opal_list_t active_sends;
|
||||
opal_list_t pending_sends;
|
||||
opal_free_list_t pending_fl;
|
||||
opal_mutex_t mutex;
|
||||
int32_t slots;
|
||||
|
||||
ompi_mtl_portals4_base_request_t alert_req;
|
||||
@ -70,6 +69,10 @@ struct ompi_mtl_portals4_flowctl_t {
|
||||
/** Flow control restart fan-out ME. */
|
||||
ptl_handle_me_t fanout_me_h;
|
||||
|
||||
/** last restart time */
|
||||
struct timeval tv;
|
||||
int backoff_count;
|
||||
|
||||
size_t num_procs;
|
||||
size_t num_children;
|
||||
ptl_process_t children[2];
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user