1
1

* properly setup communication infrastructure for libnbc.

* Prevent infinite recursion in progress loop.

Should fix improper barrier eugene was seeing.

This commit was SVN r26758.
Этот коммит содержится в:
Brian Barrett 2012-07-06 13:59:03 +00:00
родитель e0ceabd486
Коммит 58413fa1e4
3 изменённых файлов: 17 добавлений и 20 удалений

Просмотреть файл

@ -22,6 +22,7 @@
#include "ompi/mca/coll/coll.h"
#include "ompi/request/request.h"
#include "opal/sys/atomic.h"
BEGIN_C_DECLS
@ -62,6 +63,7 @@ struct ompi_coll_libnbc_component_t {
ompi_free_list_t requests;
opal_list_t active_requests;
uint32_t active_comms;
opal_atomic_lock_t progress_lock;
};
typedef struct ompi_coll_libnbc_component_t ompi_coll_libnbc_component_t;
@ -95,8 +97,7 @@ struct ompi_coll_libnbc_request_t {
long row_offset;
int tag;
volatile int req_count;
/*ompi_request_t **req_array;*/
MPI_Request *req_array;
ompi_request_t **req_array;
NBC_Comminfo *comminfo;
volatile NBC_Schedule *schedule;
void *tmpbuf; /* temporary buffer e.g. used for Reduce */

Просмотреть файл

@ -97,6 +97,8 @@ libnbc_open(void)
OBJ_CONSTRUCT(&mca_coll_libnbc_component.active_requests, opal_list_t);
mca_coll_libnbc_component.active_comms = 0;
opal_atomic_init(&mca_coll_libnbc_component.progress_lock, OPAL_ATOMIC_UNLOCKED);
return OMPI_SUCCESS;
}
@ -229,18 +231,27 @@ libnbc_progress(void)
{
opal_list_item_t *item;
if (opal_atomic_trylock(&mca_coll_libnbc_component.progress_lock)) return 0;
for (item = opal_list_get_first(&mca_coll_libnbc_component.active_requests) ;
item != opal_list_get_end(&mca_coll_libnbc_component.active_requests) ;
item = opal_list_get_next(item)) {
ompi_coll_libnbc_request_t* request = (ompi_coll_libnbc_request_t*) item;
if (NBC_OK == NBC_Progress(request)) {
/* done, remove */
/* done, remove and complete */
item = opal_list_remove_item(&mca_coll_libnbc_component.active_requests,
&request->super.super.super);
request->super.req_status.MPI_ERROR = OMPI_SUCCESS;
OPAL_THREAD_LOCK(&ompi_request_lock);
ompi_request_complete(&request->super, true);
OPAL_THREAD_UNLOCK(&ompi_request_lock);
}
item = opal_list_get_next(item);
}
opal_atomic_unlock(&mca_coll_libnbc_component.progress_lock);
return 0;
}

Просмотреть файл

@ -11,6 +11,7 @@
#include "nbc_internal.h"
#include "ompi/mca/coll/base/coll_tags.h"
#include "ompi/op/op.h"
#include "ompi/mca/pml/pml.h"
/* only used in this file */
static inline int NBC_Start_round(NBC_Handle *handle);
@ -274,11 +275,6 @@ static inline int NBC_Free(NBC_Handle* handle) {
handle->tmpbuf = NULL;
}
handle->super.req_status.MPI_ERROR = OMPI_SUCCESS;
OPAL_THREAD_LOCK(&ompi_request_lock);
ompi_request_complete(&handle->super, true);
OPAL_THREAD_UNLOCK(&ompi_request_lock);
return NBC_OK;
}
@ -298,11 +294,8 @@ int NBC_Progress(NBC_Handle *handle) {
#ifdef NBC_TIMING
Test_time -= MPI_Wtime();
#endif
#ifdef HAVE_OMPI
res = ompi_request_test_all(handle->req_count, handle->req_array, &flag, MPI_STATUSES_IGNORE);
/* res = MPI_Testall(handle->req_count, handle->req_array, &flag, MPI_STATUSES_IGNORE); */
if(res != OMPI_SUCCESS) { printf("MPI Error in MPI_Testall() (%i)\n", res); ret=res; goto error; }
#endif
#ifdef NBC_TIMING
Test_time += MPI_Wtime();
#endif
@ -393,14 +386,10 @@ static inline int NBC_Start_round(NBC_Handle *handle) {
#ifdef NBC_TIMING
Isend_time -= MPI_Wtime();
#endif
#ifdef HAVE_OMPI
handle->req_array = (MPI_Request*)realloc((void*)handle->req_array, (handle->req_count)*sizeof(MPI_Request));
NBC_CHECK_NULL(handle->req_array);
res = MCA_PML_CALL(isend_init(buf1, sendargs->count, sendargs->datatype, sendargs->dest, handle->tag, MCA_PML_BASE_SEND_STANDARD, handle->comm, handle->req_array+handle->req_count-1));
/*printf("MPI_Isend(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, sendargs->count, (unsigned long)sendargs->datatype, sendargs->dest, handle->tag, (unsigned long)handle->comm, res);*/
/* res = MPI_Isend(buf1, sendargs->count, sendargs->datatype, sendargs->dest, handle->tag, handle->comm, handle->req_array+handle->req_count-1); */
res = MCA_PML_CALL(isend(buf1, sendargs->count, sendargs->datatype, sendargs->dest, handle->tag, MCA_PML_BASE_SEND_STANDARD, handle->comm, handle->req_array+handle->req_count-1));
if(OMPI_SUCCESS != res) { printf("Error in MPI_Isend(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, sendargs->count, (unsigned long)sendargs->datatype, sendargs->dest, handle->tag, (unsigned long)handle->comm, res); ret=res; goto error; }
#endif
#ifdef NBC_TIMING
Isend_time += MPI_Wtime();
#endif
@ -421,14 +410,10 @@ static inline int NBC_Start_round(NBC_Handle *handle) {
#ifdef NBC_TIMING
Irecv_time -= MPI_Wtime();
#endif
#ifdef HAVE_OMPI
handle->req_array = (MPI_Request*)realloc((void*)handle->req_array, (handle->req_count)*sizeof(MPI_Request));
NBC_CHECK_NULL(handle->req_array);
res = MCA_PML_CALL(irecv(buf1, recvargs->count, recvargs->datatype, recvargs->source, handle->tag, handle->comm, handle->req_array+handle->req_count-1));
/*printf("MPI_Irecv(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, recvargs->count, (unsigned long)recvargs->datatype, recvargs->source, handle->tag, (unsigned long)handle->comm, res); */
/*res = MPI_Irecv(buf1, recvargs->count, recvargs->datatype, recvargs->source, handle->tag, handle->comm, handle->req_array+handle->req_count-1); */
if(OMPI_SUCCESS != res) { printf("Error in MPI_Irecv(%lu, %i, %lu, %i, %i, %lu) (%i)\n", (unsigned long)buf1, recvargs->count, (unsigned long)recvargs->datatype, recvargs->source, handle->tag, (unsigned long)handle->comm, res); ret=res; goto error; }
#endif
#ifdef NBC_TIMING
Irecv_time += MPI_Wtime();
#endif