Drain pipe from service thread to main thread during shutdown. By this
point, the event engine has been shut down until btl finalization is done, so opal_progress in the wait loop is not an option - we have to drain from inside the btl. Clean up the looping structure for the finalize routine Update copyrights. This commit was SVN r21620.
Этот коммит содержится в:
родитель
ac34b1de69
Коммит
2f3c0b4fcf
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -670,6 +671,33 @@ int ompi_btl_openib_fd_run_in_main(ompi_btl_openib_fd_main_callback_fn_t *callba
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
ompi_btl_openib_fd_main_thread_drain(void)
|
||||
{
|
||||
int nfds, ret;
|
||||
fd_set rfds;
|
||||
struct timeval tv;
|
||||
|
||||
while (1) {
|
||||
FD_ZERO(&rfds);
|
||||
FD_SET(pipe_to_main_thread[0], &rfds);
|
||||
nfds = pipe_to_main_thread[0] + 1;
|
||||
|
||||
tv.tv_sec = 0;
|
||||
tv.tv_usec = 0;
|
||||
|
||||
ret = select(nfds, &rfds, NULL, NULL, &tv);
|
||||
if (ret > 0) {
|
||||
main_thread_event_callback(pipe_to_main_thread[0], 0, NULL);
|
||||
return 0;
|
||||
} else {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Finalize
|
||||
* Called by main thread
|
||||
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -62,6 +63,13 @@ int ompi_btl_openib_fd_run_in_service(ompi_btl_openib_fd_main_callback_fn_t call
|
||||
int ompi_btl_openib_fd_run_in_main(ompi_btl_openib_fd_main_callback_fn_t callback,
|
||||
void *context);
|
||||
|
||||
/**
|
||||
* Drain all pending messages from the main thread's pipe.
|
||||
* Likely only useful during finalize, when the event library
|
||||
* won't fire callbacks.
|
||||
*/
|
||||
int ompi_btl_openib_fd_main_thread_drain(void);
|
||||
|
||||
/**
|
||||
* Finalize fd monitoring.
|
||||
* Called by the main thread.
|
||||
|
@ -2,6 +2,7 @@
|
||||
* Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2007-2008 Chelsio, Inc. All rights reserved.
|
||||
* Copyright (c) 2008 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -977,7 +978,7 @@ static void *call_disconnect_callback(void *v)
|
||||
OBJ_RELEASE(context);
|
||||
|
||||
/* Tell the main thread that we're done */
|
||||
++disconnect_callbacks;
|
||||
opal_atomic_add(&disconnect_callbacks, 1);
|
||||
OPAL_OUTPUT((-1, "SERVICE Service thread disconnect on ID %p done; count=%d",
|
||||
(void*) tmp, disconnect_callbacks));
|
||||
return NULL;
|
||||
@ -1020,9 +1021,8 @@ static int rdmacm_endpoint_finalize(struct mca_btl_base_endpoint_t *endpoint)
|
||||
rdmacm_contents_t *contents = (rdmacm_contents_t *) item;
|
||||
|
||||
if (endpoint == contents->endpoint) {
|
||||
for (item2 = opal_list_remove_first(&(contents->ids));
|
||||
NULL != item2;
|
||||
item2 = opal_list_remove_first(&(contents->ids))) {
|
||||
while (NULL !=
|
||||
(item2 = opal_list_remove_first(&(contents->ids)))) {
|
||||
/* Fun race condition: we cannot call
|
||||
rdma_disconnect() here in the main thread, because
|
||||
if we do, there is a nonzero chance that the
|
||||
@ -1039,7 +1039,9 @@ static int rdmacm_endpoint_finalize(struct mca_btl_base_endpoint_t *endpoint)
|
||||
ompi_btl_openib_fd_run_in_service(call_disconnect_callback,
|
||||
item2);
|
||||
}
|
||||
opal_list_remove_item(&client_list, item);
|
||||
/* remove_item returns the item before the item removed,
|
||||
meaning that the for list is still safe */
|
||||
item = opal_list_remove_item(&client_list, item);
|
||||
contents->on_client_list = false;
|
||||
break;
|
||||
}
|
||||
@ -1051,6 +1053,7 @@ static int rdmacm_endpoint_finalize(struct mca_btl_base_endpoint_t *endpoint)
|
||||
|
||||
/* Now wait for all the disconnect callbacks to occur */
|
||||
while (num_to_wait_for != disconnect_callbacks) {
|
||||
ompi_btl_openib_fd_main_thread_drain();
|
||||
sched_yield();
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user