1
1

Performance tuning: incorporate the usage of non-blocking operations in our array group-communication operations.

Этот коммит содержится в:
Edgar Gabriel 2015-08-13 20:03:58 -05:00
родитель 6118236f1a
Коммит 4bfc6ae798

Просмотреть файл

@ -130,6 +130,7 @@ int ompi_io_ompio_gatherv_array (void *sbuf,
int err = OMPI_SUCCESS; int err = OMPI_SUCCESS;
char *ptmp; char *ptmp;
OPAL_PTRDIFF_TYPE extent, lb; OPAL_PTRDIFF_TYPE extent, lb;
ompi_request_t **reqs=NULL;
rank = ompi_comm_rank (comm); rank = ompi_comm_rank (comm);
@ -153,7 +154,10 @@ int ompi_io_ompio_gatherv_array (void *sbuf,
if (OMPI_SUCCESS != err) { if (OMPI_SUCCESS != err) {
return OMPI_ERROR; return OMPI_ERROR;
} }
reqs = (ompi_request_t **) malloc ( procs_per_group *sizeof(ompi_request_t *));
if ( NULL == reqs ) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
for (i=0; i<procs_per_group; i++) { for (i=0; i<procs_per_group; i++) {
ptmp = ((char *) rbuf) + (extent * disps[i]); ptmp = ((char *) rbuf) + (extent * disps[i]);
@ -168,26 +172,34 @@ int ompi_io_ompio_gatherv_array (void *sbuf,
rcounts[i], rcounts[i],
rdtype); rdtype);
} }
reqs[i] = MPI_REQUEST_NULL;
} }
else { else {
/* Only receive if there is something to receive */ /* Only receive if there is something to receive */
if (rcounts[i] > 0) { if (rcounts[i] > 0) {
err = MCA_PML_CALL(recv(ptmp, err = MCA_PML_CALL(irecv(ptmp,
rcounts[i], rcounts[i],
rdtype, rdtype,
procs_in_group[i], procs_in_group[i],
OMPIO_TAG_GATHERV, OMPIO_TAG_GATHERV,
comm, comm,
MPI_STATUS_IGNORE)); &reqs[i]));
} }
else {
reqs[i] = MPI_REQUEST_NULL;
}
} }
if (OMPI_SUCCESS != err) { if (OMPI_SUCCESS != err) {
free ( reqs );
return err; return err;
} }
} }
/* All done */ /* All done */
err = ompi_request_wait_all ( procs_per_group, reqs, MPI_STATUSES_IGNORE );
if ( NULL != reqs ) {
free ( reqs );
}
return err; return err;
} }
@ -207,6 +219,7 @@ int ompi_io_ompio_scatterv_array (void *sbuf,
int err = OMPI_SUCCESS; int err = OMPI_SUCCESS;
char *ptmp; char *ptmp;
OPAL_PTRDIFF_TYPE extent, lb; OPAL_PTRDIFF_TYPE extent, lb;
ompi_request_t ** reqs=NULL;
rank = ompi_comm_rank (comm); rank = ompi_comm_rank (comm);
@ -230,6 +243,10 @@ int ompi_io_ompio_scatterv_array (void *sbuf,
if (OMPI_SUCCESS != err) { if (OMPI_SUCCESS != err) {
return OMPI_ERROR; return OMPI_ERROR;
} }
reqs = ( ompi_request_t **) malloc ( procs_per_group * sizeof ( ompi_request_t *));
if (NULL == reqs ) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
for (i=0 ; i<procs_per_group ; ++i) { for (i=0 ; i<procs_per_group ; ++i) {
ptmp = ((char *) sbuf) + (extent * disps[i]); ptmp = ((char *) sbuf) + (extent * disps[i]);
@ -245,25 +262,34 @@ int ompi_io_ompio_scatterv_array (void *sbuf,
rcount, rcount,
rdtype); rdtype);
} }
reqs[i] = MPI_REQUEST_NULL;
} }
else { else {
/* Only receive if there is something to receive */ /* Only receive if there is something to receive */
if (scounts[i] > 0) { if (scounts[i] > 0) {
err = MCA_PML_CALL(send(ptmp, err = MCA_PML_CALL(isend(ptmp,
scounts[i], scounts[i],
sdtype, sdtype,
procs_in_group[i], procs_in_group[i],
OMPIO_TAG_SCATTERV, OMPIO_TAG_SCATTERV,
MCA_PML_BASE_SEND_STANDARD, MCA_PML_BASE_SEND_STANDARD,
comm)); comm,
&reqs[i]));
}
else {
reqs[i] = MPI_REQUEST_NULL;
} }
} }
if (OMPI_SUCCESS != err) { if (OMPI_SUCCESS != err) {
free ( reqs );
return err; return err;
} }
} }
/* All done */ /* All done */
err = ompi_request_wait_all ( procs_per_group, reqs, MPI_STATUSES_IGNORE );
if ( NULL != reqs ) {
free ( reqs );
}
return err; return err;
} }
@ -337,7 +363,8 @@ int ompi_io_ompio_gather_array (void *sbuf,
OPAL_PTRDIFF_TYPE incr; OPAL_PTRDIFF_TYPE incr;
OPAL_PTRDIFF_TYPE extent, lb; OPAL_PTRDIFF_TYPE extent, lb;
int err = OMPI_SUCCESS; int err = OMPI_SUCCESS;
ompi_request_t ** reqs=NULL;
rank = ompi_comm_rank (comm); rank = ompi_comm_rank (comm);
/* Everyone but the writers sends data and returns. */ /* Everyone but the writers sends data and returns. */
@ -356,6 +383,11 @@ int ompi_io_ompio_gather_array (void *sbuf,
opal_datatype_get_extent (&rdtype->super, &lb, &extent); opal_datatype_get_extent (&rdtype->super, &lb, &extent);
incr = extent * rcount; incr = extent * rcount;
reqs = ( ompi_request_t **) malloc ( procs_per_group * sizeof ( ompi_request_t *));
if (NULL == reqs ) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
for (i = 0, ptmp = (char *) rbuf; for (i = 0, ptmp = (char *) rbuf;
i < procs_per_group; i < procs_per_group;
++i, ptmp += incr) { ++i, ptmp += incr) {
@ -371,15 +403,16 @@ int ompi_io_ompio_gather_array (void *sbuf,
else { else {
err = OMPI_SUCCESS; err = OMPI_SUCCESS;
} }
reqs[i] = MPI_REQUEST_NULL;
} }
else { else {
err = MCA_PML_CALL(recv(ptmp, err = MCA_PML_CALL(irecv(ptmp,
rcount, rcount,
rdtype, rdtype,
procs_in_group[i], procs_in_group[i],
OMPIO_TAG_GATHER, OMPIO_TAG_GATHER,
comm, comm,
MPI_STATUS_IGNORE)); &reqs[i]));
/* /*
for (k=0 ; k<4 ; k++) for (k=0 ; k<4 ; k++)
printf ("RECV %p %d \n", printf ("RECV %p %d \n",
@ -389,11 +422,16 @@ int ompi_io_ompio_gather_array (void *sbuf,
} }
if (OMPI_SUCCESS != err) { if (OMPI_SUCCESS != err) {
free ( reqs );
return err; return err;
} }
} }
/* All done */ /* All done */
err = ompi_request_wait_all ( procs_per_group, reqs, MPI_STATUSES_IGNORE );
if ( NULL != reqs ) {
free ( reqs );
}
return err; return err;
} }
@ -408,6 +446,7 @@ int ompi_io_ompio_bcast_array (void *buff,
{ {
int i, rank; int i, rank;
int err = OMPI_SUCCESS; int err = OMPI_SUCCESS;
ompi_request_t ** reqs=NULL;
rank = ompi_comm_rank (comm); rank = ompi_comm_rank (comm);
@ -424,24 +463,34 @@ int ompi_io_ompio_bcast_array (void *buff,
} }
/* Writers sends data to all others. */ /* Writers sends data to all others. */
reqs = ( ompi_request_t **) malloc ( procs_per_group * sizeof ( ompi_request_t *));
if (NULL == reqs ) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
for (i=0 ; i<procs_per_group ; i++) { for (i=0 ; i<procs_per_group ; i++) {
if (procs_in_group[i] == rank) { if (procs_in_group[i] == rank) {
reqs[i] = MPI_REQUEST_NULL;
continue; continue;
} }
err = MCA_PML_CALL(send(buff, err = MCA_PML_CALL(isend(buff,
count, count,
datatype, datatype,
procs_in_group[i], procs_in_group[i],
OMPIO_TAG_BCAST, OMPIO_TAG_BCAST,
MCA_PML_BASE_SEND_STANDARD, MCA_PML_BASE_SEND_STANDARD,
comm)); comm,
&reqs[i]));
if (OMPI_SUCCESS != err) { if (OMPI_SUCCESS != err) {
free ( reqs );
return err; return err;
} }
} }
err = ompi_request_wait_all ( procs_per_group, reqs, MPI_STATUSES_IGNORE );
if ( NULL != reqs ) {
free ( reqs );
}
return err; return err;
} }