Fix for the double iWrite problem Edgar found with ROMIO, plus some other
things I found: - Locking should prevent it from happening (I think), but there was a race condition in the component progress -- a callback could be triggered that would free the request before it was off the outstanding requests list. - When pulling a request off the component free list, make sure to reinitialize the free_called state on the IO request. This was what was causing Edgar's failures - In the request cleanup code, pull the request out of the per- component free list before returning to the free list. This probably would cause asserts to fire, although it looks like I wrote the loops such that it would have been memory safe if the asserts didn't fire. Not really sure why I did that, but let's try it again... This should go to the v1.0 and v1.1 branches. This commit was SVN r9913.
Этот коммит содержится в:
родитель
33e6d986be
Коммит
f2a6e63d82
@ -135,6 +135,7 @@ int mca_io_base_request_alloc(ompi_file_t *file,
|
|||||||
if (opal_list_get_size(&file->f_io_requests) > 0) {
|
if (opal_list_get_size(&file->f_io_requests) > 0) {
|
||||||
*req = (mca_io_base_request_t*)
|
*req = (mca_io_base_request_t*)
|
||||||
opal_list_remove_first(&file->f_io_requests);
|
opal_list_remove_first(&file->f_io_requests);
|
||||||
|
(*req)->free_called = false;
|
||||||
} else {
|
} else {
|
||||||
*req = NULL;
|
*req = NULL;
|
||||||
}
|
}
|
||||||
@ -221,14 +222,11 @@ void mca_io_base_request_free(ompi_file_t *file,
|
|||||||
*/
|
*/
|
||||||
void mca_io_base_request_return(ompi_file_t *file)
|
void mca_io_base_request_return(ompi_file_t *file)
|
||||||
{
|
{
|
||||||
opal_list_item_t *p, *next;
|
opal_list_item_t *next;
|
||||||
|
|
||||||
OPAL_THREAD_LOCK(&file->f_io_requests_lock);
|
OPAL_THREAD_LOCK(&file->f_io_requests_lock);
|
||||||
for (p = opal_list_get_first(&file->f_io_requests);
|
while (NULL != (next = opal_list_remove_first(&file->f_io_requests))) {
|
||||||
p != opal_list_get_end(&file->f_io_requests);
|
OMPI_FREE_LIST_RETURN(&mca_io_base_requests, next);
|
||||||
p = next) {
|
|
||||||
next = opal_list_get_next(p);
|
|
||||||
OMPI_FREE_LIST_RETURN(&mca_io_base_requests, p);
|
|
||||||
}
|
}
|
||||||
OPAL_THREAD_UNLOCK(&file->f_io_requests_lock);
|
OPAL_THREAD_UNLOCK(&file->f_io_requests_lock);
|
||||||
}
|
}
|
||||||
|
@ -279,10 +279,10 @@ static int progress()
|
|||||||
return ret;
|
return ret;
|
||||||
} else if (1 == flag) {
|
} else if (1 == flag) {
|
||||||
++count;
|
++count;
|
||||||
/* mark as complete (and make sure to wake up any waiters */
|
|
||||||
ompi_request_complete((ompi_request_t*) item);
|
|
||||||
/* we're done, so remove us from the pending list */
|
/* we're done, so remove us from the pending list */
|
||||||
opal_list_remove_item(&mca_io_romio_pending_requests, item);
|
opal_list_remove_item(&mca_io_romio_pending_requests, item);
|
||||||
|
/* mark as complete (and make sure to wake up any waiters */
|
||||||
|
ompi_request_complete((ompi_request_t*) item);
|
||||||
mca_io_base_request_progress_del();
|
mca_io_base_request_progress_del();
|
||||||
/* if the request has been freed already, the user isn't
|
/* if the request has been freed already, the user isn't
|
||||||
* going to call test or wait on us, so we need to do it
|
* going to call test or wait on us, so we need to do it
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user