Drain pipe from service thread to main thread during shutdown. By this
point, the event engine has been shut down until btl finalization is done, so opal_progress in the wait loop is not an option - we have to drain from inside the btl. Clean up the looping structure for the finalize routine Update copyrights. This commit was SVN r21620.
Этот коммит содержится в:
родитель
ac34b1de69
Коммит
2f3c0b4fcf
@ -1,5 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
|
||||||
|
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
|
||||||
*
|
*
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -670,6 +671,33 @@ int ompi_btl_openib_fd_run_in_main(ompi_btl_openib_fd_main_callback_fn_t *callba
|
|||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
ompi_btl_openib_fd_main_thread_drain(void)
|
||||||
|
{
|
||||||
|
int nfds, ret;
|
||||||
|
fd_set rfds;
|
||||||
|
struct timeval tv;
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
FD_ZERO(&rfds);
|
||||||
|
FD_SET(pipe_to_main_thread[0], &rfds);
|
||||||
|
nfds = pipe_to_main_thread[0] + 1;
|
||||||
|
|
||||||
|
tv.tv_sec = 0;
|
||||||
|
tv.tv_usec = 0;
|
||||||
|
|
||||||
|
ret = select(nfds, &rfds, NULL, NULL, &tv);
|
||||||
|
if (ret > 0) {
|
||||||
|
main_thread_event_callback(pipe_to_main_thread[0], 0, NULL);
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Finalize
|
* Finalize
|
||||||
* Called by main thread
|
* Called by main thread
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||||
|
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
|
||||||
*
|
*
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -62,6 +63,13 @@ int ompi_btl_openib_fd_run_in_service(ompi_btl_openib_fd_main_callback_fn_t call
|
|||||||
int ompi_btl_openib_fd_run_in_main(ompi_btl_openib_fd_main_callback_fn_t callback,
|
int ompi_btl_openib_fd_run_in_main(ompi_btl_openib_fd_main_callback_fn_t callback,
|
||||||
void *context);
|
void *context);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Drain all pending messages from the main thread's pipe.
|
||||||
|
* Likely only useful during finalize, when the event library
|
||||||
|
* won't fire callbacks.
|
||||||
|
*/
|
||||||
|
int ompi_btl_openib_fd_main_thread_drain(void);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Finalize fd monitoring.
|
* Finalize fd monitoring.
|
||||||
* Called by the main thread.
|
* Called by the main thread.
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
* Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2007-2008 Chelsio, Inc. All rights reserved.
|
* Copyright (c) 2007-2008 Chelsio, Inc. All rights reserved.
|
||||||
* Copyright (c) 2008 Mellanox Technologies. All rights reserved.
|
* Copyright (c) 2008 Mellanox Technologies. All rights reserved.
|
||||||
|
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
|
||||||
*
|
*
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -977,7 +978,7 @@ static void *call_disconnect_callback(void *v)
|
|||||||
OBJ_RELEASE(context);
|
OBJ_RELEASE(context);
|
||||||
|
|
||||||
/* Tell the main thread that we're done */
|
/* Tell the main thread that we're done */
|
||||||
++disconnect_callbacks;
|
opal_atomic_add(&disconnect_callbacks, 1);
|
||||||
OPAL_OUTPUT((-1, "SERVICE Service thread disconnect on ID %p done; count=%d",
|
OPAL_OUTPUT((-1, "SERVICE Service thread disconnect on ID %p done; count=%d",
|
||||||
(void*) tmp, disconnect_callbacks));
|
(void*) tmp, disconnect_callbacks));
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -1020,9 +1021,8 @@ static int rdmacm_endpoint_finalize(struct mca_btl_base_endpoint_t *endpoint)
|
|||||||
rdmacm_contents_t *contents = (rdmacm_contents_t *) item;
|
rdmacm_contents_t *contents = (rdmacm_contents_t *) item;
|
||||||
|
|
||||||
if (endpoint == contents->endpoint) {
|
if (endpoint == contents->endpoint) {
|
||||||
for (item2 = opal_list_remove_first(&(contents->ids));
|
while (NULL !=
|
||||||
NULL != item2;
|
(item2 = opal_list_remove_first(&(contents->ids)))) {
|
||||||
item2 = opal_list_remove_first(&(contents->ids))) {
|
|
||||||
/* Fun race condition: we cannot call
|
/* Fun race condition: we cannot call
|
||||||
rdma_disconnect() here in the main thread, because
|
rdma_disconnect() here in the main thread, because
|
||||||
if we do, there is a nonzero chance that the
|
if we do, there is a nonzero chance that the
|
||||||
@ -1039,7 +1039,9 @@ static int rdmacm_endpoint_finalize(struct mca_btl_base_endpoint_t *endpoint)
|
|||||||
ompi_btl_openib_fd_run_in_service(call_disconnect_callback,
|
ompi_btl_openib_fd_run_in_service(call_disconnect_callback,
|
||||||
item2);
|
item2);
|
||||||
}
|
}
|
||||||
opal_list_remove_item(&client_list, item);
|
/* remove_item returns the item before the item removed,
|
||||||
|
meaning that the for list is still safe */
|
||||||
|
item = opal_list_remove_item(&client_list, item);
|
||||||
contents->on_client_list = false;
|
contents->on_client_list = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1051,6 +1053,7 @@ static int rdmacm_endpoint_finalize(struct mca_btl_base_endpoint_t *endpoint)
|
|||||||
|
|
||||||
/* Now wait for all the disconnect callbacks to occur */
|
/* Now wait for all the disconnect callbacks to occur */
|
||||||
while (num_to_wait_for != disconnect_callbacks) {
|
while (num_to_wait_for != disconnect_callbacks) {
|
||||||
|
ompi_btl_openib_fd_main_thread_drain();
|
||||||
sched_yield();
|
sched_yield();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user