1
1

Collective MPI function's back end implementation

This commit was SVN r1120.
Этот коммит содержится в:
Vishal Sahay 2004-05-07 23:09:55 +00:00
родитель 13b2738768
Коммит c01488ab28
4 изменённых файлов: 264 добавлений и 306 удалений

Просмотреть файл

@ -25,15 +25,12 @@ int mca_coll_basic_allgather(void *sbuf, int scount,
int rcount, MPI_Datatype rdtype, int rcount, MPI_Datatype rdtype,
MPI_Comm comm) MPI_Comm comm)
{ {
#if 1
return LAM_ERR_NOT_IMPLEMENTED;
#else
int size; int size;
int err; int err;
/* Gather and broadcast. */ /* Gather and broadcast. */
MPI_Comm_size(comm, &size); size = lam_comm_size(comm);
err = comm->c_coll.coll_gather_intra(sbuf, scount, sdtype, rbuf, rcount, err = comm->c_coll.coll_gather_intra(sbuf, scount, sdtype, rbuf, rcount,
rdtype, 0, comm); rdtype, 0, comm);
@ -43,5 +40,4 @@ int mca_coll_basic_allgather(void *sbuf, int scount,
err = comm->c_coll.coll_bcast_intra(rbuf, rcount * size, rdtype, err = comm->c_coll.coll_bcast_intra(rbuf, rcount * size, rdtype,
0, comm); 0, comm);
return err; return err;
#endif
} }

Просмотреть файл

@ -14,12 +14,13 @@
#include "mca/coll/coll.h" #include "mca/coll/coll.h"
#include "mca/coll/base/coll_tags.h" #include "mca/coll/base/coll_tags.h"
#include "coll_basic.h" #include "coll_basic.h"
#include "mca/pml/pml.h"
/* /*
* alltoall * alltoall
* *
* Function: - MPI_Alltoall for non-lamd RPI's * Function: - MPI_Alltoall
* Accepts: - same as MPI_Alltoall() * Accepts: - same as MPI_Alltoall()
* Returns: - MPI_SUCCESS or an MPI error code * Returns: - MPI_SUCCESS or an MPI error code
*/ */
@ -28,125 +29,121 @@ int mca_coll_basic_alltoall(void *sbuf, int scount,
int rcount, MPI_Datatype rdtype, int rcount, MPI_Datatype rdtype,
MPI_Comm comm) MPI_Comm comm)
{ {
#if 1 int i;
return LAM_ERR_NOT_IMPLEMENTED; int rank;
#else int size;
int i; int nreqs;
int rank; int err;
int size; char *psnd;
int nreqs; char *prcv;
int err; MPI_Aint lb;
char *psnd; MPI_Aint sndinc;
char *prcv; MPI_Aint rcvinc;
MPI_Aint sndinc;
MPI_Aint rcvinc;
MPI_Request *req;
MPI_Request *preq;
MPI_Request *qreq;
/* Initialize. */ lam_request_t **req;
lam_request_t **sreq;
lam_request_t **rreq;
MPI_Comm_size(comm, &size); /* Initialize. */
MPI_Comm_rank(comm, &rank);
MPI_Type_extent(sdtype, &sndinc);
MPI_Type_extent(rdtype, &rcvinc);
sndinc *= scount;
rcvinc *= rcount;
/* Allocate arrays of requests. */ size = lam_comm_size(comm);
rank = lam_comm_rank(comm);
nreqs = 2 * (size - 1);
if (nreqs > 0) { err = lam_ddt_get_extent(sdtype, &lb, &sndinc);
req = malloc(nreqs * sizeof(MPI_Request)); if (0 != err) {
if (NULL == req) { return LAM_ERROR;
free(req);
return ENOMEM;
} }
} else {
req = NULL;
}
/* simple optimization */ err = lam_ddt_get_extent(rdtype, &lb, &rcvinc);
if (0 != err) {
return LAM_ERROR;
}
psnd = ((char *) sbuf) + (rank * sndinc); sndinc *= scount;
prcv = ((char *) rbuf) + (rank * rcvinc); rcvinc *= rcount;
#if 0
/* JMS: Need a lam_datatype_something() here that allows two
different datatypes */
err = lam_dtsndrcv(psnd, scount, sdtype,
prcv, rcount, rdtype, BLKMPIALLTOALL, comm);
if (MPI_SUCCESS != err) {
if (NULL != req)
LAM_FREE(req);
lam_mkpt(comm);
return err;
}
#endif
/* If only one process, we're done. */ /* Allocate arrays of requests. */
if (1 == size) { nreqs = 2 * (size - 1);
if (nreqs > 0) {
req = malloc(nreqs * sizeof(lam_request_t *));
if (NULL == req) {
return ENOMEM;
}
} else {
req = NULL;
}
/* simple optimization */
psnd = ((char *) sbuf) + (rank * sndinc);
prcv = ((char *) rbuf) + (rank * rcvinc);
err = lam_ddt_sndrcv(psnd, scount, sdtype,
prcv, rcount, rdtype,
MCA_COLL_BASE_TAG_ALLTOALL, comm);
if (MPI_SUCCESS != err) {
if (NULL != req)
free(req);
return err;
}
/* If only one process, we're done. */
if (1 == size) {
return MPI_SUCCESS;
}
/* Initiate all send/recv to/from others. */
rreq = req;
sreq = req + size - 1;
prcv = (char*) rbuf;
psnd = (char*) sbuf;
for (i = (rank + 1) % size; i != rank;
i = (i + 1) % size, ++rreq, ++sreq) {
err = mca_pml.pml_irecv_init(prcv + (i * rcvinc), rcount, rdtype, i,
MCA_COLL_BASE_TAG_ALLTOALL, comm, rreq);
if (MPI_SUCCESS != err) {
free(req);
return err;
}
err = mca_pml.pml_isend(psnd + (i * sndinc), scount, sdtype, i,
MCA_COLL_BASE_TAG_ALLTOALL,
MCA_PML_BASE_SEND_STANDARD, comm, sreq);
if (MPI_SUCCESS != err) {
free(req);
return err;
}
}
if (MPI_SUCCESS != err) {
free(req);
return err;
}
/* Wait for them all. */
err = mca_pml.pml_wait_all(nreqs, req, MPI_STATUSES_IGNORE);
if (MPI_SUCCESS != err) {
free(req);
return err;
}
/* Free the reqs */
for (i = 0, rreq = req; i < nreqs; ++i, ++rreq) {
mca_pml.pml_free(rreq);
}
/* All done */
free(req);
return MPI_SUCCESS; return MPI_SUCCESS;
}
/* Initiate all send/recv to/from others. */
preq = req;
qreq = req + size - 1;
prcv = (char*) rbuf;
psnd = (char*) sbuf;
for (i = (rank + 1) % size; i != rank;
i = (i + 1) % size, ++preq, ++qreq) {
#if 0
/* JMS: Need to replace this with negative tags and and direct PML
calls */
err = MPI_Recv_init(prcv + (i * rcvinc), rcount, rdtype, i,
BLKMPIALLTOALL, comm, preq);
if (MPI_SUCCESS != err) {
LAM_FREE(req);
return err;
}
#endif
#if 0
/* JMS: Need to replace this with negative tags and and direct PML
calls */
err = MPI_Send_init(psnd + (i * sndinc), scount, sdtype, i,
BLKMPIALLTOALL, comm, qreq);
if (MPI_SUCCESS != err) {
LAM_FREE(req);
return err;
}
#endif
}
/* Start all the requests. */
err = MPI_Startall(nreqs, req);
if (MPI_SUCCESS != err) {
free(req);
return err;
}
/* Wait for them all. */
err = MPI_Waitall(nreqs, req, MPI_STATUSES_IGNORE);
if (MPI_SUCCESS != err) {
free(req);
return err;
}
for (i = 0, preq = req; i < nreqs; ++i, ++preq) {
err = MPI_Request_free(preq);
if (MPI_SUCCESS != err) {
free(req);
return err;
}
}
/* All done */
free(req);
return MPI_SUCCESS;
#endif
} }

Просмотреть файл

@ -7,9 +7,12 @@
#include "constants.h" #include "constants.h"
#include "mpi.h" #include "mpi.h"
#include "datatype/datatype.h"
#include "mca/coll/coll.h" #include "mca/coll/coll.h"
#include "mca/coll/base/coll_tags.h" #include "mca/coll/base/coll_tags.h"
#include "coll_basic.h" #include "coll_basic.h"
#include "mca/pml/pml.h"
#include "util/hibit.h"
/* /*
@ -20,84 +23,63 @@
* Returns: - MPI_SUCCESS or error code * Returns: - MPI_SUCCESS or error code
*/ */
int mca_coll_basic_bcast_lin(void *buff, int count, int mca_coll_basic_bcast_lin(void *buff, int count,
MPI_Datatype datatype, int root, MPI_Datatype datatype, int root,
MPI_Comm comm) MPI_Comm comm)
{ {
#if 1 int i;
return LAM_ERR_NOT_IMPLEMENTED; int size;
#else int rank;
int i; int err;
int size; lam_request_t **preq;
int rank; lam_request_t **reqs = comm->bcast_lin_reqs;
int err;
MPI_Request *preq;
/* JMS: Need to define this somewhere */
#define LAM_COLLMAXLIN 4
MPI_Request reqs[LAM_COLLMAXLIN];
MPI_Comm_size(comm, &size); size = lam_comm_size(comm);
MPI_Comm_rank(comm, &rank); rank = lam_comm_rank(comm);
/* Non-root receive the data. */
/* Non-root receive the data. */ if (rank != root) {
return mca_pml.pml_recv(buff, count, datatype, root,
if (rank != root) { MCA_COLL_BASE_TAG_BCAST, comm,
#if 0 MPI_STATUS_IGNORE);
/* JMS: Need to replace this with negative tags and and direct PML
calls */
return MPI_Recv(buff, count, datatype, root,
BLKMPIBCAST, comm, MPI_STATUS_IGNORE);
#endif
}
/* Root sends data to all others. */
for (i = 0, preq = reqs; i < size; ++i) {
if (i == rank)
continue;
#if 0
/* JMS: Need to replace this with negative tags and and direct PML
calls */
err = MPI_Send_init(buff, count, datatype, i, BLKMPIBCAST,
comm, preq++);
if (MPI_SUCCESS != err) {
return err;
} }
#endif
}
/* Start and wait on all requests. */ /* Root sends data to all others. */
err = MPI_Startall(size - 1, reqs); /* VPS: as per Tim's suggestion there is no advantage of having
if (MPI_SUCCESS != err) { isend_init/start over normal isend. So just trying a normal isend */
return err;
}
err = MPI_Waitall(size - 1, reqs, MPI_STATUSES_IGNORE); for (i = 0, preq = reqs; i < size; ++i) {
if (MPI_SUCCESS != err) { if (i == rank)
return err; continue;
}
/* Free the requests. */ err = mca_pml.pml_isend(buff, count, datatype, i,
MCA_COLL_BASE_TAG_BCAST,
MCA_PML_BASE_SEND_STANDARD,
comm, preq++);
for (i = 0, preq = reqs; i < size; ++i) { if (MPI_SUCCESS != err) {
if (i == rank) return err;
continue; }
}
/* Free the requests. */
for (i = 0, preq = reqs; i < size; ++i) {
if (i == rank)
continue;
err = MPI_Request_free(preq); err = mca_pml.pml_free(preq);
if (MPI_SUCCESS != err) ++preq;
return err; }
++preq; /* All done */
}
/* All done */ return MPI_SUCCESS;
return MPI_SUCCESS;
#endif
} }
/* /*
* bcast_log * bcast_log
* *
@ -106,97 +88,79 @@ int mca_coll_basic_bcast_lin(void *buff, int count,
* Returns: - MPI_SUCCESS or error code * Returns: - MPI_SUCCESS or error code
*/ */
int mca_coll_basic_bcast_log(void *buff, int count, int mca_coll_basic_bcast_log(void *buff, int count,
MPI_Datatype datatype, int root, MPI_Datatype datatype, int root,
MPI_Comm comm) MPI_Comm comm)
{ {
#if 1 int i;
return LAM_ERR_NOT_IMPLEMENTED; int size;
#else int rank;
int i; int vrank;
int size; int peer;
int rank; int dim;
int vrank; int hibit;
int peer; int mask;
int dim; int err;
int hibit; int nreqs;
int mask; lam_request_t **preq;
int err; lam_request_t **reqs = comm->bcast_log_reqs;
int nreqs;
MPI_Request *preq;
/* JMS: Need to define this somewhere */
#define LAM_COLLMAXDIM 64
MPI_Request reqs[LAM_COLLMAXDIM];
MPI_Comm_rank(comm, &rank); size = lam_comm_size(comm);
MPI_Comm_size(comm, &size); rank = lam_comm_rank(comm);
vrank = (rank + size - root) % size; vrank = (rank + size - root) % size;
#if 0
/* JMS Need to cache this somewhere */
dim = comm->c_cube_dim;
hibit = lam_hibit(vrank, dim);
#endif
--dim;
/* Receive data from parent in the tree. */ dim = comm->c_cube_dim;
hibit = lam_hibit(vrank, dim);
--dim;
if (vrank > 0) { /* Receive data from parent in the tree. */
peer = ((vrank & ~(1 << hibit)) + root) % size;
#if 0
/* JMS: Need to replace this with negative tags and and direct PML
calls */
err = MPI_Recv(buff, count, datatype, peer,
BLKMPIBCAST, comm, MPI_STATUS_IGNORE);
if (MPI_SUCCESS != err) {
return err;
}
#endif
}
/* Send data to the children. */ if (vrank > 0) {
peer = ((vrank & ~(1 << hibit)) + root) % size;
preq = reqs; err = mca_pml.pml_recv(buff, count, datatype, peer,
nreqs = 0; MCA_COLL_BASE_TAG_BCAST,
for (i = hibit + 1, mask = 1 << i; i <= dim; ++i, mask <<= 1) { comm, MPI_STATUS_IGNORE);
peer = vrank | mask; if (MPI_SUCCESS != err) {
if (peer < size) { return err;
peer = (peer + root) % size; }
++nreqs;
#if 0
/* JMS: Need to replace this with negative tags and and direct PML
calls */
err = MPI_Send_init(buff, count, datatype, peer, BLKMPIBCAST,
comm, preq++);
if (MPI_SUCCESS != err) {
return err;
}
#endif
}
}
/* Start and wait on all requests. */
if (nreqs > 0) {
err = MPI_Startall(nreqs, reqs);
if (MPI_SUCCESS != err) {
return err;
} }
err = MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE); /* Send data to the children. */
if (MPI_SUCCESS != err) {
return err; preq = reqs;
nreqs = 0;
for (i = hibit + 1, mask = 1 << i; i <= dim; ++i, mask <<= 1) {
peer = vrank | mask;
if (peer < size) {
peer = (peer + root) % size;
++nreqs;
err = mca_pml.pml_isend(buff, count, datatype, peer,
MCA_COLL_BASE_TAG_BCAST,
MCA_PML_BASE_SEND_STANDARD,
comm, preq++);
if (MPI_SUCCESS != err) {
return err;
}
}
} }
for (i = 0, preq = reqs; i < nreqs; ++i, ++preq) { /* Start and wait on all requests. */
err = MPI_Request_free(preq);
if (MPI_SUCCESS != err) { if (nreqs > 0) {
return err;
} err = mca_pml.pml_wait_all(nreqs, reqs, MPI_STATUSES_IGNORE);
if (MPI_SUCCESS != err) {
return err;
}
for (i = 0, preq = reqs; i < nreqs; ++i, ++preq) {
mca_pml.pml_free(preq);
}
} }
}
/* All done */ /* All done */
return MPI_SUCCESS; return MPI_SUCCESS;
#endif
} }

Просмотреть файл

@ -6,11 +6,12 @@
#include "coll_basic.h" #include "coll_basic.h"
#include "constants.h" #include "constants.h"
#include "coll_basic.h"
#include "mpi.h" #include "mpi.h"
#include "datatype/datatype.h"
#include "mca/coll/coll.h" #include "mca/coll/coll.h"
#include "mca/coll/base/coll_tags.h" #include "mca/coll/base/coll_tags.h"
#include "coll_basic.h" #include "mca/pml/pml.h"
/* /*
* gather * gather
@ -23,53 +24,53 @@ int mca_coll_basic_gather(void *sbuf, int scount, MPI_Datatype sdtype,
void *rbuf, int rcount, MPI_Datatype rdtype, void *rbuf, int rcount, MPI_Datatype rdtype,
int root, MPI_Comm comm) int root, MPI_Comm comm)
{ {
#if 1 int i;
return LAM_ERR_NOT_IMPLEMENTED; int err;
#else int rank;
int i; int size;
int err; char *ptmp;
int rank; MPI_Aint incr;
int size; MPI_Aint extent;
char *ptmp; MPI_Aint lb;
MPI_Aint incr;
MPI_Aint extent;
/* JMS: Need to replace lots things in this file: lam_dt* stuff with size = lam_comm_size(comm);
lam_datatype_*() functions. Also need to replace lots of rank = lam_comm_rank(comm);
MPI_Send/MPI_Recv with negative tags and PML entry points. */
MPI_Comm_rank(comm, &rank); /* Everyone but root sends data and returns. */
MPI_Comm_size(comm, &size);
/* Everyone but root sends data and returns. */ if (rank != root) {
err = mca_pml.pml_send(sbuf, scount, sdtype, root,
if (rank != root) { MCA_COLL_BASE_TAG_GATHER,
err = MPI_Send(sbuf, scount, sdtype, root, BLKMPIGATHER, comm); MCA_PML_BASE_SEND_STANDARD, comm);
return err; return err;
}
/* I am the root, loop receiving the data. */
MPI_Type_extent(rdtype, &extent);
incr = extent * rcount;
for (i = 0, ptmp = (char *) rbuf; i < size; ++i, ptmp += incr) {
/* simple optimization */
if (i == rank) {
err = lam_dtsndrcv(sbuf, scount, sdtype, ptmp,
rcount, rdtype, BLKMPIGATHER, comm);
} else {
err = MPI_Recv(ptmp, rcount, rdtype, i,
BLKMPIGATHER, comm, MPI_STATUS_IGNORE);
} }
if (MPI_SUCCESS != err) {
return err;
}
}
/* All done */ /* I am the root, loop receiving the data. */
err = lam_ddt_get_extent(rdtype, &lb, &extent);
if (0 != err)
return LAM_ERROR;
incr = extent * rcount;
for (i = 0, ptmp = (char *) rbuf; i < size; ++i, ptmp += incr) {
/* simple optimization */
if (i == rank) {
err = lam_ddt_sndrcv(sbuf, scount, sdtype, ptmp,
rcount, rdtype,
MCA_COLL_BASE_TAG_GATHER, comm);
} else {
err = mca_pml.pml_recv(ptmp, rcount, rdtype, i,
MCA_COLL_BASE_TAG_GATHER,
comm, MPI_STATUS_IGNORE);
}
if (MPI_SUCCESS != err) {
return err;
}
}
/* All done */
return MPI_SUCCESS; return MPI_SUCCESS;
#endif
} }