1
1

Fix for the double iWrite problem Edgar found with ROMIO, plus some other

things I found:
  - Locking should prevent it from happening (I think), but there was a 
    race condition in the component progress -- a callback could be
    triggered that would free the request before it was off the outstanding
    requests list.
  - When pulling a request off the component free list, make sure to
    reinitialize the free_called state on the IO request.  This was
    what was causing Edgar's failures
  - In the request cleanup code, pull the request out of the per-
    component free list before returning to the free list.  This
    probably would cause asserts to fire, although it looks like
    I wrote the loops such that it would have been memory safe if
    the asserts didn't fire.  Not really sure why I did that, but
    let's try it again...

This should go to the v1.0 and v1.1 branches.

This commit was SVN r9913.
Этот коммит содержится в:
Brian Barrett 2006-05-13 02:30:40 +00:00
родитель 33e6d986be
Коммит f2a6e63d82
2 изменённых файлов: 6 добавлений и 8 удалений

Просмотреть файл

@ -135,6 +135,7 @@ int mca_io_base_request_alloc(ompi_file_t *file,
if (opal_list_get_size(&file->f_io_requests) > 0) { if (opal_list_get_size(&file->f_io_requests) > 0) {
*req = (mca_io_base_request_t*) *req = (mca_io_base_request_t*)
opal_list_remove_first(&file->f_io_requests); opal_list_remove_first(&file->f_io_requests);
(*req)->free_called = false;
} else { } else {
*req = NULL; *req = NULL;
} }
@ -221,14 +222,11 @@ void mca_io_base_request_free(ompi_file_t *file,
*/ */
void mca_io_base_request_return(ompi_file_t *file) void mca_io_base_request_return(ompi_file_t *file)
{ {
opal_list_item_t *p, *next; opal_list_item_t *next;
OPAL_THREAD_LOCK(&file->f_io_requests_lock); OPAL_THREAD_LOCK(&file->f_io_requests_lock);
for (p = opal_list_get_first(&file->f_io_requests); while (NULL != (next = opal_list_remove_first(&file->f_io_requests))) {
p != opal_list_get_end(&file->f_io_requests); OMPI_FREE_LIST_RETURN(&mca_io_base_requests, next);
p = next) {
next = opal_list_get_next(p);
OMPI_FREE_LIST_RETURN(&mca_io_base_requests, p);
} }
OPAL_THREAD_UNLOCK(&file->f_io_requests_lock); OPAL_THREAD_UNLOCK(&file->f_io_requests_lock);
} }

Просмотреть файл

@ -279,10 +279,10 @@ static int progress()
return ret; return ret;
} else if (1 == flag) { } else if (1 == flag) {
++count; ++count;
/* mark as complete (and make sure to wake up any waiters */
ompi_request_complete((ompi_request_t*) item);
/* we're done, so remove us from the pending list */ /* we're done, so remove us from the pending list */
opal_list_remove_item(&mca_io_romio_pending_requests, item); opal_list_remove_item(&mca_io_romio_pending_requests, item);
/* mark as complete (and make sure to wake up any waiters */
ompi_request_complete((ompi_request_t*) item);
mca_io_base_request_progress_del(); mca_io_base_request_progress_del();
/* if the request has been freed already, the user isn't /* if the request has been freed already, the user isn't
* going to call test or wait on us, so we need to do it * going to call test or wait on us, so we need to do it