1
1

Consistent return from all progress functions.

This fix ensures that all progress functions return the number of
completed events.

Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Этот коммит содержится в:
George Bosilca 2020-01-08 16:07:36 -05:00 коммит произвёл Joseph Schuchart
родитель 2c97187ee0
Коммит 72501f8f9c
7 изменённых файлов: 30 добавлений и 22 удалений

Просмотреть файл

@ -100,6 +100,7 @@ static int ompi_comm_request_progress (void)
{ {
ompi_comm_request_t *request, *next; ompi_comm_request_t *request, *next;
static opal_atomic_int32_t progressing = 0; static opal_atomic_int32_t progressing = 0;
int completed = 0;
/* don't allow re-entry */ /* don't allow re-entry */
if (opal_atomic_swap_32 (&progressing, 1)) { if (opal_atomic_swap_32 (&progressing, 1)) {
@ -126,6 +127,7 @@ static int ompi_comm_request_progress (void)
} }
ompi_request_free (&subreq); ompi_request_free (&subreq);
request_item->subreq_count--; request_item->subreq_count--;
completed++;
} else { } else {
item_complete = false; item_complete = false;
break; break;
@ -163,7 +165,7 @@ static int ompi_comm_request_progress (void)
opal_mutex_unlock (&ompi_comm_request_mutex); opal_mutex_unlock (&ompi_comm_request_mutex);
progressing = 0; progressing = 0;
return 1; return completed;
} }
void ompi_comm_request_start (ompi_comm_request_t *request) void ompi_comm_request_start (ompi_comm_request_t *request)

Просмотреть файл

@ -469,5 +469,5 @@ int ompi_mtl_psm2_progress( void ) {
opal_show_help("help-mtl-psm2.txt", opal_show_help("help-mtl-psm2.txt",
"error polling network", true, "error polling network", true,
psm2_error_get_string(err)); psm2_error_get_string(err));
return 1; return OMPI_ERROR;
} }

Просмотреть файл

@ -202,14 +202,13 @@ int ompi_grequest_invoke_query(ompi_request_t *request,
int rc = OMPI_SUCCESS; int rc = OMPI_SUCCESS;
ompi_grequest_t *g = (ompi_grequest_t*) request; ompi_grequest_t *g = (ompi_grequest_t*) request;
/* MPI-2:8.2 does not say what to do with the return value from /* MPI-3 mandates that the return value from the query function
the query function (i.e., the int return value from the C * (i.e., the int return value from the C function or the ierr
function or the ierr argument from the Fortran function). * argument from the Fortran function) must be returned to the
Making the command decision here to ignore it. If the handler * user. Thus, if the return of the query function is not MPI_SUCCESS
wants to pass an error back, it should set it in the MPI_ERROR * we will update the MPI_ERROR field. Otherwise, the MPI_ERROR
field in the status (which is always kept, regardless if the * field is untouched (or left to the discretion of the query function).
top-level function was invoked with MPI_STATUS[ES]_IGNORE or */
not). */
if (NULL != g->greq_query.c_query) { if (NULL != g->greq_query.c_query) {
if (g->greq_funcs_are_c) { if (g->greq_funcs_are_c) {
rc = g->greq_query.c_query(g->greq_state, status); rc = g->greq_query.c_query(g->greq_state, status);
@ -221,7 +220,9 @@ int ompi_grequest_invoke_query(ompi_request_t *request,
rc = OMPI_FINT_2_INT(ierr); rc = OMPI_FINT_2_INT(ierr);
} }
} }
if( MPI_SUCCESS != rc ) {
status->MPI_ERROR = rc;
}
return rc; return rc;
} }

Просмотреть файл

@ -44,13 +44,11 @@ static int grequestx_progress(void) {
MPI_Status status; MPI_Status status;
OPAL_THREAD_UNLOCK(&lock); OPAL_THREAD_UNLOCK(&lock);
request->greq_poll.c_poll(request->greq_state, &status); request->greq_poll.c_poll(request->greq_state, &status);
if (REQUEST_COMPLETE(&request->greq_base)) {
OPAL_THREAD_LOCK(&lock); OPAL_THREAD_LOCK(&lock);
if (REQUEST_COMPLETE(&request->greq_base)) {
opal_list_remove_item(&requests, &request->greq_base.super.super); opal_list_remove_item(&requests, &request->greq_base.super.super);
OPAL_THREAD_UNLOCK(&lock);
completed++; completed++;
} }
OPAL_THREAD_LOCK(&lock);
} }
in_progress = false; in_progress = false;
} }

Просмотреть файл

@ -279,10 +279,11 @@ void opal_common_ucx_wpool_finalize(opal_common_ucx_wpool_t *wpool)
return; return;
} }
OPAL_DECLSPEC void OPAL_DECLSPEC int
opal_common_ucx_wpool_progress(opal_common_ucx_wpool_t *wpool) opal_common_ucx_wpool_progress(opal_common_ucx_wpool_t *wpool)
{ {
_winfo_list_item_t *item = NULL, *next = NULL; _winfo_list_item_t *item = NULL, *next = NULL;
int completed = 0, progressed = 0;
/* Go over all active workers and progress them /* Go over all active workers and progress them
* TODO: may want to have some partitioning to progress only part of * TODO: may want to have some partitioning to progress only part of
@ -297,14 +298,19 @@ opal_common_ucx_wpool_progress(opal_common_ucx_wpool_t *wpool)
opal_list_remove_item(&wpool->active_workers, &item->super); opal_list_remove_item(&wpool->active_workers, &item->super);
_winfo_reset(winfo); _winfo_reset(winfo);
opal_list_append(&wpool->idle_workers, &item->super); opal_list_append(&wpool->idle_workers, &item->super);
completed++;
} else { } else {
/* Progress worker until there are existing events */ /* Progress worker until there are existing events */
while(ucp_worker_progress(winfo->worker)); do {
progressed = ucp_worker_progress(winfo->worker);
completed += progressed;
} while (progressed);
} }
opal_mutex_unlock(&winfo->mutex); opal_mutex_unlock(&winfo->mutex);
} }
opal_mutex_unlock(&wpool->mutex); opal_mutex_unlock(&wpool->mutex);
} }
return completed;
} }
static int static int

Просмотреть файл

@ -165,7 +165,7 @@ OPAL_DECLSPEC void opal_common_ucx_wpool_free(opal_common_ucx_wpool_t *wpool);
OPAL_DECLSPEC int opal_common_ucx_wpool_init(opal_common_ucx_wpool_t *wpool, OPAL_DECLSPEC int opal_common_ucx_wpool_init(opal_common_ucx_wpool_t *wpool,
int proc_world_size, bool enable_mt); int proc_world_size, bool enable_mt);
OPAL_DECLSPEC void opal_common_ucx_wpool_finalize(opal_common_ucx_wpool_t *wpool); OPAL_DECLSPEC void opal_common_ucx_wpool_finalize(opal_common_ucx_wpool_t *wpool);
OPAL_DECLSPEC void opal_common_ucx_wpool_progress(opal_common_ucx_wpool_t *wpool); OPAL_DECLSPEC int opal_common_ucx_wpool_progress(opal_common_ucx_wpool_t *wpool);
/* Manage Communication context */ /* Manage Communication context */
OPAL_DECLSPEC int opal_common_ucx_wpctx_create(opal_common_ucx_wpool_t *wpool, int comm_size, OPAL_DECLSPEC int opal_common_ucx_wpctx_create(opal_common_ucx_wpool_t *wpool, int comm_size,

Просмотреть файл

@ -187,20 +187,21 @@ static int mca_spml_ucx_component_register(void)
int spml_ucx_ctx_progress(void) int spml_ucx_ctx_progress(void)
{ {
int i; int i, completed = 0;
for (i = 0; i < mca_spml_ucx.active_array.ctxs_count; i++) { for (i = 0; i < mca_spml_ucx.active_array.ctxs_count; i++) {
ucp_worker_progress(mca_spml_ucx.active_array.ctxs[i]->ucp_worker[0]); completed += ucp_worker_progress(mca_spml_ucx.active_array.ctxs[i]->ucp_worker[0]);
} }
return 1; return completed;
} }
int spml_ucx_default_progress(void) int spml_ucx_default_progress(void)
{ {
unsigned int i=0; unsigned int i=0;
int completed = 0;
for (i = 0; i < mca_spml_ucx.ucp_workers; i++) { for (i = 0; i < mca_spml_ucx.ucp_workers; i++) {
ucp_worker_progress(mca_spml_ucx_ctx_default.ucp_worker[i]); completed += ucp_worker_progress(mca_spml_ucx_ctx_default.ucp_worker[i]);
} }
return 1; return completed;
} }
int spml_ucx_progress_aux_ctx(void) int spml_ucx_progress_aux_ctx(void)