Fix for the double iWrite problem Edgar found with ROMIO, plus some other
things I found: - Locking should prevent it from happening (I think), but there was a race condition in the component progress -- a callback could be triggered that would free the request before it was off the outstanding requests list. - When pulling a request off the component free list, make sure to reinitialize the free_called state on the IO request. This was what was causing Edgar's failures - In the request cleanup code, pull the request out of the per- component free list before returning to the free list. This probably would cause asserts to fire, although it looks like I wrote the loops such that it would have been memory safe if the asserts didn't fire. Not really sure why I did that, but let's try it again... This should go to the v1.0 and v1.1 branches. This commit was SVN r9913.
Этот коммит содержится в:
родитель
33e6d986be
Коммит
f2a6e63d82
@ -135,6 +135,7 @@ int mca_io_base_request_alloc(ompi_file_t *file,
|
||||
if (opal_list_get_size(&file->f_io_requests) > 0) {
|
||||
*req = (mca_io_base_request_t*)
|
||||
opal_list_remove_first(&file->f_io_requests);
|
||||
(*req)->free_called = false;
|
||||
} else {
|
||||
*req = NULL;
|
||||
}
|
||||
@ -221,14 +222,11 @@ void mca_io_base_request_free(ompi_file_t *file,
|
||||
*/
|
||||
void mca_io_base_request_return(ompi_file_t *file)
|
||||
{
|
||||
opal_list_item_t *p, *next;
|
||||
opal_list_item_t *next;
|
||||
|
||||
OPAL_THREAD_LOCK(&file->f_io_requests_lock);
|
||||
for (p = opal_list_get_first(&file->f_io_requests);
|
||||
p != opal_list_get_end(&file->f_io_requests);
|
||||
p = next) {
|
||||
next = opal_list_get_next(p);
|
||||
OMPI_FREE_LIST_RETURN(&mca_io_base_requests, p);
|
||||
while (NULL != (next = opal_list_remove_first(&file->f_io_requests))) {
|
||||
OMPI_FREE_LIST_RETURN(&mca_io_base_requests, next);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&file->f_io_requests_lock);
|
||||
}
|
||||
|
@ -279,10 +279,10 @@ static int progress()
|
||||
return ret;
|
||||
} else if (1 == flag) {
|
||||
++count;
|
||||
/* mark as complete (and make sure to wake up any waiters */
|
||||
ompi_request_complete((ompi_request_t*) item);
|
||||
/* we're done, so remove us from the pending list */
|
||||
opal_list_remove_item(&mca_io_romio_pending_requests, item);
|
||||
/* mark as complete (and make sure to wake up any waiters */
|
||||
ompi_request_complete((ompi_request_t*) item);
|
||||
mca_io_base_request_progress_del();
|
||||
/* if the request has been freed already, the user isn't
|
||||
* going to call test or wait on us, so we need to do it
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user