1
1

Drain pipe from service thread to main thread during shutdown. By this

point, the event engine has been shut down until btl finalization is
done, so opal_progress in the wait loop is not an option - we have
to drain from inside the btl.

Clean up the looping structure for the finalize routine

Update copyrights.

This commit was SVN r21620.
Этот коммит содержится в:
Brian Barrett 2009-07-09 22:13:10 +00:00
родитель ac34b1de69
Коммит 2f3c0b4fcf
3 изменённых файлов: 44 добавлений и 5 удалений

Просмотреть файл

@ -1,5 +1,6 @@
/*
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
*
* $COPYRIGHT$
*
@ -670,6 +671,33 @@ int ompi_btl_openib_fd_run_in_main(ompi_btl_openib_fd_main_callback_fn_t *callba
return OMPI_SUCCESS;
}
int
ompi_btl_openib_fd_main_thread_drain(void)
{
int nfds, ret;
fd_set rfds;
struct timeval tv;
while (1) {
FD_ZERO(&rfds);
FD_SET(pipe_to_main_thread[0], &rfds);
nfds = pipe_to_main_thread[0] + 1;
tv.tv_sec = 0;
tv.tv_usec = 0;
ret = select(nfds, &rfds, NULL, NULL, &tv);
if (ret > 0) {
main_thread_event_callback(pipe_to_main_thread[0], 0, NULL);
return 0;
} else {
return ret;
}
}
}
/*
* Finalize
* Called by main thread

Просмотреть файл

@ -1,5 +1,6 @@
/*
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
*
* $COPYRIGHT$
*
@ -62,6 +63,13 @@ int ompi_btl_openib_fd_run_in_service(ompi_btl_openib_fd_main_callback_fn_t call
int ompi_btl_openib_fd_run_in_main(ompi_btl_openib_fd_main_callback_fn_t callback,
void *context);
/**
* Drain all pending messages from the main thread's pipe.
* Likely only useful during finalize, when the event library
* won't fire callbacks.
*/
int ompi_btl_openib_fd_main_thread_drain(void);
/**
* Finalize fd monitoring.
* Called by the main thread.

Просмотреть файл

@ -2,6 +2,7 @@
* Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007-2008 Chelsio, Inc. All rights reserved.
* Copyright (c) 2008 Mellanox Technologies. All rights reserved.
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
*
* $COPYRIGHT$
*
@ -977,7 +978,7 @@ static void *call_disconnect_callback(void *v)
OBJ_RELEASE(context);
/* Tell the main thread that we're done */
++disconnect_callbacks;
opal_atomic_add(&disconnect_callbacks, 1);
OPAL_OUTPUT((-1, "SERVICE Service thread disconnect on ID %p done; count=%d",
(void*) tmp, disconnect_callbacks));
return NULL;
@ -1020,9 +1021,8 @@ static int rdmacm_endpoint_finalize(struct mca_btl_base_endpoint_t *endpoint)
rdmacm_contents_t *contents = (rdmacm_contents_t *) item;
if (endpoint == contents->endpoint) {
for (item2 = opal_list_remove_first(&(contents->ids));
NULL != item2;
item2 = opal_list_remove_first(&(contents->ids))) {
while (NULL !=
(item2 = opal_list_remove_first(&(contents->ids)))) {
/* Fun race condition: we cannot call
rdma_disconnect() here in the main thread, because
if we do, there is a nonzero chance that the
@ -1039,7 +1039,9 @@ static int rdmacm_endpoint_finalize(struct mca_btl_base_endpoint_t *endpoint)
ompi_btl_openib_fd_run_in_service(call_disconnect_callback,
item2);
}
opal_list_remove_item(&client_list, item);
/* remove_item returns the item before the item removed,
meaning that the for list is still safe */
item = opal_list_remove_item(&client_list, item);
contents->on_client_list = false;
break;
}
@ -1051,6 +1053,7 @@ static int rdmacm_endpoint_finalize(struct mca_btl_base_endpoint_t *endpoint)
/* Now wait for all the disconnect callbacks to occur */
while (num_to_wait_for != disconnect_callbacks) {
ompi_btl_openib_fd_main_thread_drain();
sched_yield();
}