1
1

Correct the bcast problem where we always did a bcast with segzise of 0.

Activate the reduce decision function.
Others small updates (mostly TAB to spaces).

This commit was SVN r12161.
Этот коммит содержится в:
George Bosilca 2006-10-18 02:00:46 +00:00
родитель 50649dd6a9
Коммит be27ee6fa0
17 изменённых файлов: 1296 добавлений и 1360 удалений

Просмотреть файл

@ -42,9 +42,9 @@
*/ */
int int
ompi_coll_tuned_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count, ompi_coll_tuned_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_datatype_t *dtype,
struct ompi_op_t *op, struct ompi_op_t *op,
struct ompi_communicator_t *comm) struct ompi_communicator_t *comm)
{ {
int err; int err;
int rank; int rank;
@ -97,16 +97,15 @@ ompi_coll_tuned_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count
*/ */
int int
ompi_coll_tuned_allreduce_intra_basic_linear(void *sbuf, void *rbuf, int count, ompi_coll_tuned_allreduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_datatype_t *dtype,
struct ompi_op_t *op, struct ompi_op_t *op,
struct ompi_communicator_t *comm) struct ompi_communicator_t *comm)
{ {
int err; int err;
int rank; int rank;
rank = ompi_comm_rank(comm); rank = ompi_comm_rank(comm);
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_basic_linear rank %d", rank)); OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_basic_linear rank %d", rank));
/* Reduce to 0 and broadcast. */ /* Reduce to 0 and broadcast. */
@ -144,63 +143,63 @@ int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorith
int rc; int rc;
int max_alg = 2; int max_alg = 2;
ompi_coll_tuned_forced_max_algorithms[ALLREDUCE] = max_alg; ompi_coll_tuned_forced_max_algorithms[ALLREDUCE] = max_alg;
rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version, rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
"allreduce_algorithm_count", "allreduce_algorithm_count",
"Number of allreduce algorithms available", "Number of allreduce algorithms available",
false, true, max_alg, NULL); false, true, max_alg, NULL);
mca_param_indices->algorithm_param_index = mca_base_param_reg_int( mca_param_indices->algorithm_param_index = mca_base_param_reg_int(
&mca_coll_tuned_component.super.collm_version, &mca_coll_tuned_component.super.collm_version,
"allreduce_algorithm", "allreduce_algorithm",
"Which allreduce algorithm is used. Can be locked down to any of: 0 ignore, 1 basic linear, 2 nonoverlapping (tuned reduce + tuned bcast)", "Which allreduce algorithm is used. Can be locked down to any of: 0 ignore, 1 basic linear, 2 nonoverlapping (tuned reduce + tuned bcast)",
false, false, 0, NULL); false, false, 0, NULL);
mca_param_indices->segsize_param_index = mca_base_param_reg_int( mca_param_indices->segsize_param_index = mca_base_param_reg_int(
&mca_coll_tuned_component.super.collm_version, &mca_coll_tuned_component.super.collm_version,
"allreduce_algorithm_segmentsize", "allreduce_algorithm_segmentsize",
"Segment size in bytes used by default for allreduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.", "Segment size in bytes used by default for allreduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
false, false, 0, NULL); false, false, 0, NULL);
mca_param_indices->tree_fanout_param_index = mca_base_param_reg_int( mca_param_indices->tree_fanout_param_index = mca_base_param_reg_int(
&mca_coll_tuned_component.super.collm_version, &mca_coll_tuned_component.super.collm_version,
"allreduce_algorithm_tree_fanout", "allreduce_algorithm_tree_fanout",
"Fanout for n-tree used for allreduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.", "Fanout for n-tree used for allreduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
false, false, ompi_coll_tuned_init_tree_fanout, /* get system wide default */ false, false, ompi_coll_tuned_init_tree_fanout, /* get system wide default */
NULL); NULL);
mca_param_indices->chain_fanout_param_index = mca_base_param_reg_int( mca_param_indices->chain_fanout_param_index = mca_base_param_reg_int(
&mca_coll_tuned_component.super.collm_version, &mca_coll_tuned_component.super.collm_version,
"allreduce_algorithm_chain_fanout", "allreduce_algorithm_chain_fanout",
"Fanout for chains used for allreduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.", "Fanout for chains used for allreduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
false, false, false, false,
ompi_coll_tuned_init_chain_fanout, /* get system wide default */ ompi_coll_tuned_init_chain_fanout, /* get system wide default */
NULL); NULL);
return (MPI_SUCCESS); return (MPI_SUCCESS);
} }
int ompi_coll_tuned_allreduce_intra_do_forced(void *sbuf, void *rbuf, int count, int ompi_coll_tuned_allreduce_intra_do_forced(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_datatype_t *dtype,
struct ompi_op_t *op, struct ompi_op_t *op,
struct ompi_communicator_t *comm) struct ompi_communicator_t *comm)
{ {
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced selected algorithm %d", OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced selected algorithm %d",
comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm)); comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm));
switch (comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm) { switch (comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm) {
case (0): return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm); case (0): return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm);
case (1): return ompi_coll_tuned_allreduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, comm); case (1): return ompi_coll_tuned_allreduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, comm);
case (2): return ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm); case (2): return ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm);
default: default:
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm, comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm,
ompi_coll_tuned_forced_max_algorithms[ALLREDUCE])); ompi_coll_tuned_forced_max_algorithms[ALLREDUCE]));
return (MPI_ERR_ARG); return (MPI_ERR_ARG);
} /* switch */ } /* switch */
@ -208,25 +207,23 @@ switch (comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm) {
int ompi_coll_tuned_allreduce_intra_do_this(void *sbuf, void *rbuf, int count, int ompi_coll_tuned_allreduce_intra_do_this(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_datatype_t *dtype,
struct ompi_op_t *op, struct ompi_op_t *op,
struct ompi_communicator_t *comm, struct ompi_communicator_t *comm,
int algorithm, int faninout, int segsize) int algorithm, int faninout, int segsize)
{ {
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_this algorithm %d topo fan in/out %d segsize %d", OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_this algorithm %d topo fan in/out %d segsize %d",
algorithm, faninout, segsize)); algorithm, faninout, segsize));
switch (algorithm) { switch (algorithm) {
case (0): return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm); case (0): return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm);
case (1): return ompi_coll_tuned_allreduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, comm); case (1): return ompi_coll_tuned_allreduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, comm);
case (2): return ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm); case (2): return ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm);
default: default:
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
algorithm, ompi_coll_tuned_forced_max_algorithms[ALLREDUCE])); algorithm, ompi_coll_tuned_forced_max_algorithms[ALLREDUCE]));
return (MPI_ERR_ARG); return (MPI_ERR_ARG);
} /* switch */ } /* switch */
} }

Просмотреть файл

@ -31,10 +31,10 @@
#include "coll_tuned_util.h" #include "coll_tuned_util.h"
int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount, int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
struct ompi_datatype_t *sdtype, struct ompi_datatype_t *sdtype,
void* rbuf, int rcount, void* rbuf, int rcount,
struct ompi_datatype_t *rdtype, struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm) struct ompi_communicator_t *comm)
{ {
int line = -1, err = 0; int line = -1, err = 0;
int rank, size, step; int rank, size, step;
@ -54,37 +54,37 @@ int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
/* Perform pairwise exchange - starting from 1 so the local copy is last */ /* Perform pairwise exchange - starting from 1 so the local copy is last */
for (step = 1; step < size+1; step++) { for (step = 1; step < size+1; step++) {
/* who do we talk to in this step? */ /* who do we talk to in this step? */
sendto = (rank+step)%size; sendto = (rank+step)%size;
recvfrom = (rank+size-step)%size; recvfrom = (rank+size-step)%size;
/* where from are we sending and where from are we receiving actual data ? */ /* where from are we sending and where from are we receiving actual data ? */
tmpsend = (char*)sbuf+sendto*sext*scount; tmpsend = (char*)sbuf+sendto*sext*scount;
tmprecv = (char*)rbuf+recvfrom*rext*rcount; tmprecv = (char*)rbuf+recvfrom*rext*rcount;
/* send and receive */ /* send and receive */
err = ompi_coll_tuned_sendrecv( tmpsend, scount, sdtype, sendto, MCA_COLL_BASE_TAG_ALLTOALL, err = ompi_coll_tuned_sendrecv( tmpsend, scount, sdtype, sendto, MCA_COLL_BASE_TAG_ALLTOALL,
tmprecv, rcount, rdtype, recvfrom, MCA_COLL_BASE_TAG_ALLTOALL, tmprecv, rcount, rdtype, recvfrom, MCA_COLL_BASE_TAG_ALLTOALL,
comm, MPI_STATUS_IGNORE, rank); comm, MPI_STATUS_IGNORE, rank);
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
} }
return MPI_SUCCESS; return MPI_SUCCESS;
err_hndl: err_hndl:
OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
return err; return err;
} }
int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount, int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
struct ompi_datatype_t *sdtype, struct ompi_datatype_t *sdtype,
void* rbuf, int rcount, void* rbuf, int rcount,
struct ompi_datatype_t *rdtype, struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm) struct ompi_communicator_t *comm)
{ {
int i, k, line = -1; int i, k, line = -1;
int rank, size; int rank, size;
@ -145,107 +145,107 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
err = ompi_ddt_copy_content_same_ddt (sdtype, (int32_t) ((size-rank)*scount), err = ompi_ddt_copy_content_same_ddt (sdtype, (int32_t) ((size-rank)*scount),
tmpbuf, ((char*)sbuf)+rank*scount*sext); tmpbuf, ((char*)sbuf)+rank*scount*sext);
if (err<0) { if (err<0) {
line = __LINE__; err = -1; goto err_hndl; line = __LINE__; err = -1; goto err_hndl;
} }
if (rank != 0) { if (rank != 0) {
err = ompi_ddt_copy_content_same_ddt (sdtype, (int32_t) (rank*scount), err = ompi_ddt_copy_content_same_ddt (sdtype, (int32_t) (rank*scount),
tmpbuf+(size-rank)*scount*sext, (char*)sbuf); tmpbuf+(size-rank)*scount*sext, (char*)sbuf);
if (err<0) {
line = __LINE__; err = -1; goto err_hndl;
}
}
/* perform communication step */
for (distance = 1; distance < size; distance<<=1) {
/* send data to "sendto" */
sendto = (rank+distance)%size;
recvfrom = (rank-distance+size)%size;
packsize = 0;
k = 0;
/* create indexed datatype */
for (i = 1; i < size; i++) {
if ((i&distance) == distance) {
displs[k] = i*scount; blen[k] = scount;
k++;
}
}
/* Set indexes and displacements */
err = MPI_Type_indexed(k, blen, displs, sdtype, &iddt);
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
/* Commit the new datatype */
err = MPI_Type_commit(&iddt);
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
/* have the new distribution ddt, pack and exchange data */
err = MPI_Pack(tmpbuf, 1, iddt, packbuf, maxpacksize, &packsize, comm);
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
/* Sendreceive */
err = ompi_coll_tuned_sendrecv ( packbuf, packsize, MPI_PACKED, sendto,
MCA_COLL_BASE_TAG_ALLTOALL,
rbuf, packsize, MPI_PACKED, recvfrom,
MCA_COLL_BASE_TAG_ALLTOALL,
comm, MPI_STATUS_IGNORE, rank);
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
/* Unpack data from rbuf to tmpbuf */
position = 0;
err = MPI_Unpack(rbuf, packsize, &position,
tmpbuf, 1, iddt, comm);
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
/* free ddt */
err = MPI_Type_free(&iddt);
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
} /* end of for (distance = 1... */
/* Step 3 - local rotation - */
for (i = 0; i < size; i++) {
err = ompi_ddt_copy_content_same_ddt (rdtype, (int32_t) rcount,
((char*)rbuf)+(((rank-i+size)%size)*rcount*rext),
tmpbuf+i*rcount*rext);
if (err<0) { if (err<0) {
line = __LINE__; err = -1; goto err_hndl; line = __LINE__; err = -1; goto err_hndl;
} }
} }
/* perform communication step */
for (distance = 1; distance < size; distance<<=1) {
/* send data to "sendto" */
sendto = (rank+distance)%size;
recvfrom = (rank-distance+size)%size;
packsize = 0;
k = 0;
/* create indexed datatype */
for (i = 1; i < size; i++) {
if ((i&distance) == distance) {
displs[k] = i*scount; blen[k] = scount;
k++;
}
}
/* Set indexes and displacements */
err = MPI_Type_indexed(k, blen, displs, sdtype, &iddt);
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
/* Commit the new datatype */
err = MPI_Type_commit(&iddt);
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
/* have the new distribution ddt, pack and exchange data */
err = MPI_Pack(tmpbuf, 1, iddt, packbuf, maxpacksize, &packsize, comm);
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
/* Sendreceive */
err = ompi_coll_tuned_sendrecv ( packbuf, packsize, MPI_PACKED, sendto,
MCA_COLL_BASE_TAG_ALLTOALL,
rbuf, packsize, MPI_PACKED, recvfrom,
MCA_COLL_BASE_TAG_ALLTOALL,
comm, MPI_STATUS_IGNORE, rank);
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
/* Unpack data from rbuf to tmpbuf */
position = 0;
err = MPI_Unpack(rbuf, packsize, &position,
tmpbuf, 1, iddt, comm);
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
/* free ddt */
err = MPI_Type_free(&iddt);
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
} /* end of for (distance = 1... */
/* Step 3 - local rotation - */
for (i = 0; i < size; i++) {
err = ompi_ddt_copy_content_same_ddt (rdtype, (int32_t) rcount,
((char*)rbuf)+(((rank-i+size)%size)*rcount*rext),
tmpbuf+i*rcount*rext);
if (err<0) {
line = __LINE__; err = -1; goto err_hndl;
}
}
if (err<0) { if (err<0) {
line = __LINE__; err = -1; goto err_hndl; line = __LINE__; err = -1; goto err_hndl;
} }
/* Step 4 - clean up */ /* Step 4 - clean up */
if (tmpbuf != NULL) free(tmpbuf); if (tmpbuf != NULL) free(tmpbuf);
if (packbuf != NULL) free(packbuf); if (packbuf != NULL) free(packbuf);
if (weallocated) { if (weallocated) {
if (displs != NULL) free(displs); if (displs != NULL) free(displs);
if (blen != NULL) free(blen); if (blen != NULL) free(blen);
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;
err_hndl: err_hndl:
OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
if (tmpbuf != NULL) free(tmpbuf); if (tmpbuf != NULL) free(tmpbuf);
if (packbuf != NULL) free(packbuf); if (packbuf != NULL) free(packbuf);
if (weallocated) { if (weallocated) {
if (displs != NULL) free(displs); if (displs != NULL) free(displs);
if (blen != NULL) free(blen); if (blen != NULL) free(blen);
} }
return err; return err;
} }
int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount, int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
struct ompi_datatype_t *sdtype, struct ompi_datatype_t *sdtype,
void* rbuf, int rcount, void* rbuf, int rcount,
struct ompi_datatype_t *rdtype, struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm) struct ompi_communicator_t *comm)
{ {
int line = -1, err = 0; int line = -1, err = 0;
int rank; int rank;
@ -273,8 +273,8 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
/* send and receive */ /* send and receive */
err = ompi_coll_tuned_sendrecv ( tmpsend, scount, sdtype, sendto, MCA_COLL_BASE_TAG_ALLTOALL, err = ompi_coll_tuned_sendrecv ( tmpsend, scount, sdtype, sendto, MCA_COLL_BASE_TAG_ALLTOALL,
tmprecv, rcount, rdtype, recvfrom, MCA_COLL_BASE_TAG_ALLTOALL, tmprecv, rcount, rdtype, recvfrom, MCA_COLL_BASE_TAG_ALLTOALL,
comm, MPI_STATUS_IGNORE, rank ); comm, MPI_STATUS_IGNORE, rank );
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
/* ddt sendrecv your own data */ /* ddt sendrecv your own data */
@ -287,7 +287,7 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
err_hndl: err_hndl:
OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
return err; return err;
} }
@ -311,10 +311,10 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount, int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
struct ompi_datatype_t *sdtype, struct ompi_datatype_t *sdtype,
void* rbuf, int rcount, void* rbuf, int rcount,
struct ompi_datatype_t *rdtype, struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm) struct ompi_communicator_t *comm)
{ {
int i; int i;
int rank; int rank;
@ -443,51 +443,51 @@ int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm
ompi_coll_tuned_forced_max_algorithms[ALLTOALL] = max_alg; ompi_coll_tuned_forced_max_algorithms[ALLTOALL] = max_alg;
rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version, rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
"alltoall_algorithm_count", "alltoall_algorithm_count",
"Number of alltoall algorithms available", "Number of alltoall algorithms available",
false, true, max_alg, NULL); false, true, max_alg, NULL);
mca_param_indices->algorithm_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, mca_param_indices->algorithm_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"alltoall_algorithm", "alltoall_algorithm",
"Which alltoall algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 pairwise, 3: modified bruck, 4: two proc only.", "Which alltoall algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 pairwise, 3: modified bruck, 4: two proc only.",
false, false, 0, NULL); false, false, 0, NULL);
mca_param_indices->segsize_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, mca_param_indices->segsize_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"alltoall_algorithm_segmentsize", "alltoall_algorithm_segmentsize",
"Segment size in bytes used by default for alltoall algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.", "Segment size in bytes used by default for alltoall algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
false, false, 0, NULL); false, false, 0, NULL);
mca_param_indices->tree_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, mca_param_indices->tree_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"alltoall_algorithm_tree_fanout", "alltoall_algorithm_tree_fanout",
"Fanout for n-tree used for alltoall algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.", "Fanout for n-tree used for alltoall algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
false, false, false, false,
ompi_coll_tuned_init_tree_fanout, /* get system wide default */ ompi_coll_tuned_init_tree_fanout, /* get system wide default */
NULL); NULL);
mca_param_indices->chain_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, mca_param_indices->chain_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"alltoall_algorithm_chain_fanout", "alltoall_algorithm_chain_fanout",
"Fanout for chains used for alltoall algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.", "Fanout for chains used for alltoall algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
false, false, false, false,
ompi_coll_tuned_init_chain_fanout, /* get system wide default */ ompi_coll_tuned_init_chain_fanout, /* get system wide default */
NULL); NULL);
return (MPI_SUCCESS); return (MPI_SUCCESS);
} }
int ompi_coll_tuned_alltoall_intra_do_forced(void *sbuf, int scount, int ompi_coll_tuned_alltoall_intra_do_forced(void *sbuf, int scount,
struct ompi_datatype_t *sdtype, struct ompi_datatype_t *sdtype,
void* rbuf, int rcount, void* rbuf, int rcount,
struct ompi_datatype_t *rdtype, struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm) struct ompi_communicator_t *comm)
{ {
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced selected algorithm %d", OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced selected algorithm %d",
comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm)); comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm));
switch (comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm) { switch (comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm) {
case (0): return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); case (0): return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
case (1): return ompi_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); case (1): return ompi_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
case (2): return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); case (2): return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
@ -495,7 +495,7 @@ switch (comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm) {
case (4): return ompi_coll_tuned_alltoall_intra_two_procs (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); case (4): return ompi_coll_tuned_alltoall_intra_two_procs (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
default: default:
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALL])); comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALL]));
return (MPI_ERR_ARG); return (MPI_ERR_ARG);
} /* switch */ } /* switch */
@ -503,16 +503,16 @@ switch (comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm) {
int ompi_coll_tuned_alltoall_intra_do_this(void *sbuf, int scount, int ompi_coll_tuned_alltoall_intra_do_this(void *sbuf, int scount,
struct ompi_datatype_t *sdtype, struct ompi_datatype_t *sdtype,
void* rbuf, int rcount, void* rbuf, int rcount,
struct ompi_datatype_t *rdtype, struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm, struct ompi_communicator_t *comm,
int algorithm, int faninout, int segsize) int algorithm, int faninout, int segsize)
{ {
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_this selected algorithm %d topo faninout %d segsize %d", OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_this selected algorithm %d topo faninout %d segsize %d",
algorithm, faninout, segsize)); algorithm, faninout, segsize));
switch (algorithm) { switch (algorithm) {
case (0): return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); case (0): return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
case (1): return ompi_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); case (1): return ompi_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
case (2): return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); case (2): return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
@ -520,7 +520,7 @@ switch (algorithm) {
case (4): return ompi_coll_tuned_alltoall_intra_two_procs (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); case (4): return ompi_coll_tuned_alltoall_intra_two_procs (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
default: default:
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALL])); algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALL]));
return (MPI_ERR_ARG); return (MPI_ERR_ARG);
} /* switch */ } /* switch */

Просмотреть файл

@ -65,7 +65,7 @@ int ompi_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm)
if (rank > 0) { /* receive message from the left */ if (rank > 0) { /* receive message from the left */
err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left, err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left,
MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE)); MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE));
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
} }
@ -77,14 +77,14 @@ int ompi_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm)
/* root needs to receive from the last node */ /* root needs to receive from the last node */
if (rank == 0) { if (rank == 0) {
err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left, err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left,
MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE)); MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE));
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
} }
/* Allow nodes to exit */ /* Allow nodes to exit */
if (rank > 0) { /* post Receive from left */ if (rank > 0) { /* post Receive from left */
err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left, err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left,
MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE)); MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE));
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
} }
@ -96,15 +96,15 @@ int ompi_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm)
/* rank 0 post receive from the last node */ /* rank 0 post receive from the last node */
if (rank == 0) { if (rank == 0) {
err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left, err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left,
MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE)); MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE));
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; } if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
} }
return MPI_SUCCESS; return MPI_SUCCESS;
err_hndl: err_hndl:
OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
return err; return err;
} }
@ -131,13 +131,13 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
if (rank >= adjsize) { if (rank >= adjsize) {
/* send message to lower ranked node */ /* send message to lower ranked node */
err = MCA_PML_CALL(send((void*)NULL, 0, MPI_BYTE, rank-adjsize, err = MCA_PML_CALL(send((void*)NULL, 0, MPI_BYTE, rank-adjsize,
MCA_COLL_BASE_TAG_BARRIER, MCA_PML_BASE_SEND_STANDARD, comm)); MCA_COLL_BASE_TAG_BARRIER, MCA_PML_BASE_SEND_STANDARD, comm));
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;} if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
/* post receive from lower ranked node */ /* post receive from lower ranked node */
err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, rank-adjsize, err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, rank-adjsize,
MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE)); MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE));
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;} if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
@ -145,7 +145,7 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
/* receive message from high level rank */ /* receive message from high level rank */
err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, rank+adjsize, err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, rank+adjsize,
MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE)); MCA_COLL_BASE_TAG_BARRIER, comm, MPI_STATUS_IGNORE));
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;} if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
} }
@ -160,8 +160,8 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
if (remote >= adjsize) continue; if (remote >= adjsize) continue;
err = ompi_coll_tuned_sendrecv_localcompleted (NULL, 0, MPI_BYTE, remote, MCA_COLL_BASE_TAG_BARRIER, err = ompi_coll_tuned_sendrecv_localcompleted (NULL, 0, MPI_BYTE, remote, MCA_COLL_BASE_TAG_BARRIER,
NULL, 0, MPI_BYTE, remote, MCA_COLL_BASE_TAG_BARRIER, NULL, 0, MPI_BYTE, remote, MCA_COLL_BASE_TAG_BARRIER,
comm, MPI_STATUS_IGNORE, rank); comm, MPI_STATUS_IGNORE, rank);
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;} if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
} }
@ -173,7 +173,7 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
/* send enter message to higher ranked node */ /* send enter message to higher ranked node */
err = MCA_PML_CALL(send((void*)NULL, 0, MPI_BYTE, rank+adjsize, err = MCA_PML_CALL(send((void*)NULL, 0, MPI_BYTE, rank+adjsize,
MCA_COLL_BASE_TAG_BARRIER, MCA_PML_BASE_SEND_SYNCHRONOUS, comm)); MCA_COLL_BASE_TAG_BARRIER, MCA_PML_BASE_SEND_SYNCHRONOUS, comm));
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;} if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
} }
@ -181,9 +181,9 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
return MPI_SUCCESS; return MPI_SUCCESS;
err_hndl: err_hndl:
OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
return err; return err;
} }
@ -206,16 +206,16 @@ int ompi_coll_tuned_barrier_intra_bruck(struct ompi_communicator_t *comm)
from = (rank + size - distance)%size; from = (rank + size - distance)%size;
to = (rank + distance)%size; to = (rank + distance)%size;
err = ompi_coll_tuned_sendrecv_localcompleted (NULL, 0, MPI_BYTE, to, MCA_COLL_BASE_TAG_BARRIER, err = ompi_coll_tuned_sendrecv_localcompleted (NULL, 0, MPI_BYTE, to, MCA_COLL_BASE_TAG_BARRIER,
NULL, 0, MPI_BYTE, from, MCA_COLL_BASE_TAG_BARRIER, NULL, 0, MPI_BYTE, from, MCA_COLL_BASE_TAG_BARRIER,
comm, MPI_STATUS_IGNORE, rank); comm, MPI_STATUS_IGNORE, rank);
if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;} if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
} }
return MPI_SUCCESS; return MPI_SUCCESS;
err_hndl: err_hndl:
OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
return err; return err;
} }
@ -233,13 +233,13 @@ int ompi_coll_tuned_barrier_intra_two_procs(struct ompi_communicator_t *comm)
if (0==rank) { if (0==rank) {
err = ompi_coll_tuned_sendrecv_localcompleted (NULL, 0, MPI_BYTE, 1, MCA_COLL_BASE_TAG_BARRIER, err = ompi_coll_tuned_sendrecv_localcompleted (NULL, 0, MPI_BYTE, 1, MCA_COLL_BASE_TAG_BARRIER,
NULL, 0, MPI_BYTE, 1, MCA_COLL_BASE_TAG_BARRIER, NULL, 0, MPI_BYTE, 1, MCA_COLL_BASE_TAG_BARRIER,
comm, MPI_STATUS_IGNORE, rank); comm, MPI_STATUS_IGNORE, rank);
} }
else { else {
err = ompi_coll_tuned_sendrecv_localcompleted (NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER, err = ompi_coll_tuned_sendrecv_localcompleted (NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER,
NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER, NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER,
comm, MPI_STATUS_IGNORE, rank); comm, MPI_STATUS_IGNORE, rank);
} }
return (err); return (err);
@ -334,39 +334,39 @@ int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_
int rc; int rc;
int max_alg = 5; int max_alg = 5;
ompi_coll_tuned_forced_max_algorithms[BARRIER] = max_alg; ompi_coll_tuned_forced_max_algorithms[BARRIER] = max_alg;
rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version, rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
"barrier_algorithm_count", "barrier_algorithm_count",
"Number of barrier algorithms available", "Number of barrier algorithms available",
false, true, max_alg, NULL); false, true, max_alg, NULL);
mca_param_indices->algorithm_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, mca_param_indices->algorithm_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"barrier_algorithm", "barrier_algorithm",
"Which barrier algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 double ring, 3: recursive doubling 4: bruck, 5: two proc only", "Which barrier algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 double ring, 3: recursive doubling 4: bruck, 5: two proc only",
false, false, 0, NULL); false, false, 0, NULL);
return (MPI_SUCCESS); return (MPI_SUCCESS);
} }
int ompi_coll_tuned_barrier_intra_do_forced(struct ompi_communicator_t *comm) int ompi_coll_tuned_barrier_intra_do_forced(struct ompi_communicator_t *comm)
{ {
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_forced selected algorithm %d", OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_forced selected algorithm %d",
comm->c_coll_selected_data->user_forced[BARRIER].algorithm)); comm->c_coll_selected_data->user_forced[BARRIER].algorithm));
switch (comm->c_coll_selected_data->user_forced[BARRIER].algorithm) { switch (comm->c_coll_selected_data->user_forced[BARRIER].algorithm) {
case (0): return ompi_coll_tuned_barrier_intra_dec_fixed (comm); case (0): return ompi_coll_tuned_barrier_intra_dec_fixed (comm);
case (1): return ompi_coll_tuned_barrier_intra_basic_linear (comm); case (1): return ompi_coll_tuned_barrier_intra_basic_linear (comm);
case (2): return ompi_coll_tuned_barrier_intra_doublering (comm); case (2): return ompi_coll_tuned_barrier_intra_doublering (comm);
case (3): return ompi_coll_tuned_barrier_intra_recursivedoubling (comm); case (3): return ompi_coll_tuned_barrier_intra_recursivedoubling (comm);
case (4): return ompi_coll_tuned_barrier_intra_bruck (comm); case (4): return ompi_coll_tuned_barrier_intra_bruck (comm);
case (5): return ompi_coll_tuned_barrier_intra_two_procs (comm); case (5): return ompi_coll_tuned_barrier_intra_two_procs (comm);
/* case (6): return ompi_coll_tuned_barrier_intra_bmtree_step (comm); */ /* case (6): return ompi_coll_tuned_barrier_intra_bmtree_step (comm); */
default: default:
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
comm->c_coll_selected_data->user_forced[BARRIER].algorithm, ompi_coll_tuned_forced_max_algorithms[BARRIER])); comm->c_coll_selected_data->user_forced[BARRIER].algorithm, ompi_coll_tuned_forced_max_algorithms[BARRIER]));
return (MPI_ERR_ARG); return (MPI_ERR_ARG);
} /* switch */ } /* switch */
@ -375,19 +375,19 @@ switch (comm->c_coll_selected_data->user_forced[BARRIER].algorithm) {
int ompi_coll_tuned_barrier_intra_do_this (struct ompi_communicator_t *comm, int algorithm, int faninout, int segsize) int ompi_coll_tuned_barrier_intra_do_this (struct ompi_communicator_t *comm, int algorithm, int faninout, int segsize)
{ {
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_this selected algorithm %d topo fanin/out%d", algorithm, faninout)); OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_this selected algorithm %d topo fanin/out%d", algorithm, faninout));
switch (algorithm) { switch (algorithm) {
case (0): return ompi_coll_tuned_barrier_intra_dec_fixed (comm); case (0): return ompi_coll_tuned_barrier_intra_dec_fixed (comm);
case (1): return ompi_coll_tuned_barrier_intra_basic_linear (comm); case (1): return ompi_coll_tuned_barrier_intra_basic_linear (comm);
case (2): return ompi_coll_tuned_barrier_intra_doublering (comm); case (2): return ompi_coll_tuned_barrier_intra_doublering (comm);
case (3): return ompi_coll_tuned_barrier_intra_recursivedoubling (comm); case (3): return ompi_coll_tuned_barrier_intra_recursivedoubling (comm);
case (4): return ompi_coll_tuned_barrier_intra_bruck (comm); case (4): return ompi_coll_tuned_barrier_intra_bruck (comm);
case (5): return ompi_coll_tuned_barrier_intra_two_procs (comm); case (5): return ompi_coll_tuned_barrier_intra_two_procs (comm);
/* case (6): return ompi_coll_tuned_barrier_intra_bmtree_step (comm); */ /* case (6): return ompi_coll_tuned_barrier_intra_bmtree_step (comm); */
default: default:
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
algorithm, ompi_coll_tuned_forced_max_algorithms[BARRIER])); algorithm, ompi_coll_tuned_forced_max_algorithms[BARRIER]));
return (MPI_ERR_ARG); return (MPI_ERR_ARG);
} /* switch */ } /* switch */

Просмотреть файл

@ -31,10 +31,10 @@
int int
ompi_coll_tuned_bcast_intra_chain ( void *buff, int count, ompi_coll_tuned_bcast_intra_chain ( void *buff, int count,
struct ompi_datatype_t *datatype, struct ompi_datatype_t *datatype,
int root, int root,
struct ompi_communicator_t *comm, struct ompi_communicator_t *comm,
uint32_t segsize, int32_t chains ) uint32_t segsize, int32_t chains )
{ {
int err = 0, line, rank, size, segindex, i; int err = 0, line, rank, size, segindex, i;
int segcount; /* Number of elements sent with each segment */ int segcount; /* Number of elements sent with each segment */
@ -111,7 +111,7 @@ ompi_coll_tuned_bcast_intra_chain ( void *buff, int count,
/* set the buffer pointer */ /* set the buffer pointer */
tmpbuf = (char *)buff; tmpbuf = (char *)buff;
/* OPAL_OUTPUT((ompi_coll_tuned_stream,("%1d chain root %d num_segments %d\n", rank, root, num_segments); */ /* OPAL_OUTPUT((ompi_coll_tuned_stream,("%1d chain root %d num_segments %d\n", rank, root, num_segments); */
/* root code */ /* root code */
if( rank == root ) { if( rank == root ) {
@ -141,8 +141,8 @@ ompi_coll_tuned_bcast_intra_chain ( void *buff, int count,
*/ */
new_sendcount = sendcount = segcount; new_sendcount = sendcount = segcount;
err = MCA_PML_CALL(irecv( tmpbuf, sendcount, datatype, err = MCA_PML_CALL(irecv( tmpbuf, sendcount, datatype,
chain->chain_prev, MCA_COLL_BASE_TAG_BCAST, chain->chain_prev, MCA_COLL_BASE_TAG_BCAST,
comm, &base_req)); comm, &base_req));
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
for( segindex = 1; segindex < num_segments; segindex++ ) { for( segindex = 1; segindex < num_segments; segindex++ ) {
@ -212,29 +212,29 @@ ompi_coll_tuned_bcast_intra_chain ( void *buff, int count,
int int
ompi_coll_tuned_bcast_intra_pipeline ( void *buffer, ompi_coll_tuned_bcast_intra_pipeline ( void *buffer,
int count, int count,
struct ompi_datatype_t *datatype, struct ompi_datatype_t *datatype,
int root, int root,
struct ompi_communicator_t *comm, struct ompi_communicator_t *comm,
uint32_t segsize ) uint32_t segsize )
{ {
int rank; /* remove when removing print statement */ int rank; /* remove when removing print statement */
rank = ompi_comm_rank(comm); /* remove when removing print statement */ rank = ompi_comm_rank(comm); /* remove when removing print statement */
OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_bcast_intra_pipeline rank %d root %d ss %5d", rank, root, segsize)); OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_bcast_intra_pipeline rank %d root %d ss %5d", rank, root, segsize));
return ompi_coll_tuned_bcast_intra_chain ( buffer, count, datatype, root, comm, return ompi_coll_tuned_bcast_intra_chain ( buffer, count, datatype, root, comm,
segsize, 1 ); segsize, 1 );
} }
int int
ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer, ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
int count, int count,
struct ompi_datatype_t* datatype, struct ompi_datatype_t* datatype,
int root, int root,
struct ompi_communicator_t* comm, struct ompi_communicator_t* comm,
uint32_t segsize ) uint32_t segsize )
{ {
int err=0, line; int err=0, line;
int rank, size; int rank, size;
@ -307,7 +307,7 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
(segsize > counts[1] * type_size) ) { (segsize > counts[1] * type_size) ) {
/* call linear version here ! */ /* call linear version here ! */
return (ompi_coll_tuned_bcast_intra_chain ( buffer, count, datatype, return (ompi_coll_tuned_bcast_intra_chain ( buffer, count, datatype,
root, comm, segsize, 1 )); root, comm, segsize, 1 ));
} }
err = ompi_ddt_get_extent (datatype, &lb, &type_extent); err = ompi_ddt_get_extent (datatype, &lb, &type_extent);
@ -349,7 +349,7 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST, tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST,
MCA_PML_BASE_SEND_STANDARD, comm)); MCA_PML_BASE_SEND_STANDARD, comm));
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
/* update tmp buffer */ /* update tmp buffer */
tmpbuf[i] += realsegsize[i]; tmpbuf[i] += realsegsize[i];
} }
} }
@ -448,10 +448,10 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
if ( (size%2) != 0 && rank != root) { if ( (size%2) != 0 && rank != root) {
err = ompi_coll_tuned_sendrecv( tmpbuf[lr], counts[lr], datatype, err = ompi_coll_tuned_sendrecv( tmpbuf[lr], counts[lr], datatype,
pair, MCA_COLL_BASE_TAG_BCAST, pair, MCA_COLL_BASE_TAG_BCAST,
tmpbuf[(lr+1)%2], counts[(lr+1)%2], datatype, tmpbuf[(lr+1)%2], counts[(lr+1)%2], datatype,
pair, MCA_COLL_BASE_TAG_BCAST, pair, MCA_COLL_BASE_TAG_BCAST,
comm, MPI_STATUS_IGNORE, rank); comm, MPI_STATUS_IGNORE, rank);
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
} else if ( (size%2) == 0 ) { } else if ( (size%2) == 0 ) {
/* root sends right buffer to the last node */ /* root sends right buffer to the last node */
@ -472,17 +472,17 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
/* everyone else exchanges buffers */ /* everyone else exchanges buffers */
else { else {
err = ompi_coll_tuned_sendrecv( tmpbuf[lr], counts[lr], datatype, err = ompi_coll_tuned_sendrecv( tmpbuf[lr], counts[lr], datatype,
pair, MCA_COLL_BASE_TAG_BCAST, pair, MCA_COLL_BASE_TAG_BCAST,
tmpbuf[(lr+1)%2], counts[(lr+1)%2], datatype, tmpbuf[(lr+1)%2], counts[(lr+1)%2], datatype,
pair, MCA_COLL_BASE_TAG_BCAST, pair, MCA_COLL_BASE_TAG_BCAST,
comm, MPI_STATUS_IGNORE, rank); comm, MPI_STATUS_IGNORE, rank);
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
} }
} }
return (MPI_SUCCESS); return (MPI_SUCCESS);
error_hndl: error_hndl:
OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
return (err); return (err);
} }
@ -491,11 +491,11 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
int int
ompi_coll_tuned_bcast_intra_bintree ( void* buffer, ompi_coll_tuned_bcast_intra_bintree ( void* buffer,
int count, int count,
struct ompi_datatype_t* datatype, struct ompi_datatype_t* datatype,
int root, int root,
struct ompi_communicator_t* comm, struct ompi_communicator_t* comm,
uint32_t segsize ) uint32_t segsize )
{ {
int err=0, line, i; int err=0, line, i;
int rank, size; int rank, size;
@ -588,8 +588,8 @@ ompi_coll_tuned_bcast_intra_bintree ( void* buffer,
for( i = 0; i < tree->tree_nextsize; i++ ) { /* send data to children */ for( i = 0; i < tree->tree_nextsize; i++ ) { /* send data to children */
/* send data */ /* send data */
MCA_PML_CALL(isend(tmpbuf, sendcount, datatype, MCA_PML_CALL(isend(tmpbuf, sendcount, datatype,
tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST, tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST,
MCA_PML_BASE_SEND_STANDARD, comm, &send_reqs[i])); MCA_PML_BASE_SEND_STANDARD, comm, &send_reqs[i]));
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
} }
@ -639,8 +639,8 @@ ompi_coll_tuned_bcast_intra_bintree ( void* buffer,
for( i = 0; i < tree->tree_nextsize; i++ ) { /* send data to children */ for( i = 0; i < tree->tree_nextsize; i++ ) { /* send data to children */
/* send data */ /* send data */
MCA_PML_CALL(isend(tmpbuf, segcount, datatype, MCA_PML_CALL(isend(tmpbuf, segcount, datatype,
tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST, tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST,
MCA_PML_BASE_SEND_STANDARD, comm, &send_reqs[i])); MCA_PML_BASE_SEND_STANDARD, comm, &send_reqs[i]));
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
} }
@ -661,8 +661,8 @@ ompi_coll_tuned_bcast_intra_bintree ( void* buffer,
for( i = 0; i < tree->tree_nextsize; i++ ) { /* send data to children */ for( i = 0; i < tree->tree_nextsize; i++ ) { /* send data to children */
MCA_PML_CALL(isend(tmpbuf, sendcount, datatype, MCA_PML_CALL(isend(tmpbuf, sendcount, datatype,
tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST, tree->tree_next[i], MCA_COLL_BASE_TAG_BCAST,
MCA_PML_BASE_SEND_STANDARD, comm, &send_reqs[i])); MCA_PML_BASE_SEND_STANDARD, comm, &send_reqs[i]));
if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
} }
@ -692,7 +692,7 @@ ompi_coll_tuned_bcast_intra_bintree ( void* buffer,
return (MPI_SUCCESS); return (MPI_SUCCESS);
error_hndl: error_hndl:
OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank)); OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
return (err); return (err);
} }
@ -720,8 +720,8 @@ ompi_coll_tuned_bcast_intra_bintree ( void* buffer,
*/ */
int int
ompi_coll_tuned_bcast_intra_basic_linear (void *buff, int count, ompi_coll_tuned_bcast_intra_basic_linear (void *buff, int count,
struct ompi_datatype_t *datatype, int root, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm) struct ompi_communicator_t *comm)
{ {
int i; int i;
int size; int size;
@ -735,7 +735,6 @@ ompi_coll_tuned_bcast_intra_basic_linear (void *buff, int count,
OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_bcast_intra_basic_linear rank %d root %d", rank, root)); OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_bcast_intra_basic_linear rank %d root %d", rank, root));
/* Non-root receive the data. */ /* Non-root receive the data. */
if (rank != root) { if (rank != root) {
@ -800,67 +799,67 @@ int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mc
int rc; int rc;
int max_alg = 5; int max_alg = 5;
ompi_coll_tuned_forced_max_algorithms[BCAST] = max_alg; ompi_coll_tuned_forced_max_algorithms[BCAST] = max_alg;
rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version, rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
"bcast_algorithm_count", "bcast_algorithm_count",
"Number of bcast algorithms available", "Number of bcast algorithms available",
false, true, max_alg, NULL); false, true, max_alg, NULL);
mca_param_indices->algorithm_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, mca_param_indices->algorithm_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"bcast_algorithm", "bcast_algorithm",
"Which bcast algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 chain, 3: pipeline, 4: split binary tree, 5: binary tree.", "Which bcast algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 chain, 3: pipeline, 4: split binary tree, 5: binary tree.",
false, false, 0, NULL); false, false, 0, NULL);
mca_param_indices->segsize_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, mca_param_indices->segsize_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"bcast_algorithm_segmentsize", "bcast_algorithm_segmentsize",
"Segment size in bytes used by default for bcast algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.", "Segment size in bytes used by default for bcast algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
false, false, 0, NULL); false, false, 0, NULL);
mca_param_indices->tree_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, mca_param_indices->tree_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"bcast_algorithm_tree_fanout", "bcast_algorithm_tree_fanout",
"Fanout for n-tree used for bcast algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.", "Fanout for n-tree used for bcast algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
false, false, false, false,
ompi_coll_tuned_init_tree_fanout, /* get system wide default */ ompi_coll_tuned_init_tree_fanout, /* get system wide default */
NULL); NULL);
mca_param_indices->chain_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, mca_param_indices->chain_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"bcast_algorithm_chain_fanout", "bcast_algorithm_chain_fanout",
"Fanout for chains used for bcast algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.", "Fanout for chains used for bcast algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
false, false, false, false,
ompi_coll_tuned_init_chain_fanout, /* get system wide default */ ompi_coll_tuned_init_chain_fanout, /* get system wide default */
NULL); NULL);
return (MPI_SUCCESS); return (MPI_SUCCESS);
} }
int ompi_coll_tuned_bcast_intra_do_forced(void *buf, int count, int ompi_coll_tuned_bcast_intra_do_forced(void *buf, int count,
struct ompi_datatype_t *dtype, struct ompi_datatype_t *dtype,
int root, int root,
struct ompi_communicator_t *comm) struct ompi_communicator_t *comm)
{ {
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced algorithm %d", OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced algorithm %d",
comm->c_coll_selected_data->user_forced[BCAST].algorithm)); comm->c_coll_selected_data->user_forced[BCAST].algorithm));
switch (comm->c_coll_selected_data->user_forced[BCAST].algorithm) { switch (comm->c_coll_selected_data->user_forced[BCAST].algorithm) {
case (0): return ompi_coll_tuned_bcast_intra_dec_fixed (buf, count, dtype, root, comm); case (0): return ompi_coll_tuned_bcast_intra_dec_fixed (buf, count, dtype, root, comm);
case (1): return ompi_coll_tuned_bcast_intra_basic_linear (buf, count, dtype, root, comm); case (1): return ompi_coll_tuned_bcast_intra_basic_linear (buf, count, dtype, root, comm);
case (2): return ompi_coll_tuned_bcast_intra_chain (buf, count, dtype, root, comm, case (2): return ompi_coll_tuned_bcast_intra_chain (buf, count, dtype, root, comm,
comm->c_coll_selected_data->user_forced[BCAST].segsize, comm->c_coll_selected_data->user_forced[BCAST].segsize,
comm->c_coll_selected_data->user_forced[BCAST].chain_fanout ); comm->c_coll_selected_data->user_forced[BCAST].chain_fanout );
case (3): return ompi_coll_tuned_bcast_intra_pipeline (buf, count, dtype, root, comm, case (3): return ompi_coll_tuned_bcast_intra_pipeline (buf, count, dtype, root, comm,
comm->c_coll_selected_data->user_forced[BCAST].segsize); comm->c_coll_selected_data->user_forced[BCAST].segsize);
case (4): return ompi_coll_tuned_bcast_intra_split_bintree (buf, count, dtype, root, comm, case (4): return ompi_coll_tuned_bcast_intra_split_bintree (buf, count, dtype, root, comm,
comm->c_coll_selected_data->user_forced[BCAST].segsize); comm->c_coll_selected_data->user_forced[BCAST].segsize);
case (5): return ompi_coll_tuned_bcast_intra_bintree (buf, count, dtype, root, comm, case (5): return ompi_coll_tuned_bcast_intra_bintree (buf, count, dtype, root, comm,
comm->c_coll_selected_data->user_forced[BCAST].segsize); comm->c_coll_selected_data->user_forced[BCAST].segsize);
/* case (6): return ompi_coll_tuned_bcast_intra_bmtree (buf, count, dtype, root, comm, /* case (6): return ompi_coll_tuned_bcast_intra_bmtree (buf, count, dtype, root, comm,
* ompi_coll_tuned_bcast_forced_segsize); */ * ompi_coll_tuned_bcast_forced_segsize); */
default: default:
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
comm->c_coll_selected_data->user_forced[BCAST].algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST])); comm->c_coll_selected_data->user_forced[BCAST].algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST]));
return (MPI_ERR_ARG); return (MPI_ERR_ARG);
} /* switch */ } /* switch */
@ -868,27 +867,27 @@ switch (comm->c_coll_selected_data->user_forced[BCAST].algorithm) {
int ompi_coll_tuned_bcast_intra_do_this(void *buf, int count, int ompi_coll_tuned_bcast_intra_do_this(void *buf, int count,
struct ompi_datatype_t *dtype, struct ompi_datatype_t *dtype,
int root, int root,
struct ompi_communicator_t *comm, struct ompi_communicator_t *comm,
int algorithm, int faninout, int segsize) int algorithm, int faninout, int segsize)
{ {
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_this algorithm %d topo faninout %d segsize %d", OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_this algorithm %d topo faninout %d segsize %d",
algorithm, faninout, segsize)); algorithm, faninout, segsize));
switch (algorithm) { switch (algorithm) {
case (0): return ompi_coll_tuned_bcast_intra_dec_fixed (buf, count, dtype, root, comm); case (0): return ompi_coll_tuned_bcast_intra_dec_fixed (buf, count, dtype, root, comm);
case (1): return ompi_coll_tuned_bcast_intra_basic_linear (buf, count, dtype, root, comm); case (1): return ompi_coll_tuned_bcast_intra_basic_linear (buf, count, dtype, root, comm);
case (2): return ompi_coll_tuned_bcast_intra_chain (buf, count, dtype, root, comm, segsize, faninout ); case (2): return ompi_coll_tuned_bcast_intra_chain (buf, count, dtype, root, comm, segsize, faninout );
case (3): return ompi_coll_tuned_bcast_intra_pipeline (buf, count, dtype, root, comm, segsize); case (3): return ompi_coll_tuned_bcast_intra_pipeline (buf, count, dtype, root, comm, segsize);
case (4): return ompi_coll_tuned_bcast_intra_split_bintree (buf, count, dtype, root, comm, segsize); case (4): return ompi_coll_tuned_bcast_intra_split_bintree (buf, count, dtype, root, comm, segsize);
case (5): return ompi_coll_tuned_bcast_intra_bintree (buf, count, dtype, root, comm, segsize); case (5): return ompi_coll_tuned_bcast_intra_bintree (buf, count, dtype, root, comm, segsize);
/* case (6): return ompi_coll_tuned_bcast_intra_bmtree (buf, count, dtype, root, comm, /* case (6): return ompi_coll_tuned_bcast_intra_bmtree (buf, count, dtype, root, comm,
* segsize); */ * segsize); */
default: default:
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST])); algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST]));
return (MPI_ERR_ARG); return (MPI_ERR_ARG);
} /* switch */ } /* switch */

Просмотреть файл

@ -72,48 +72,48 @@ mca_coll_tuned_component_t mca_coll_tuned_component = {
{ {
/* First, the mca_component_t struct containing meta information /* First, the mca_component_t struct containing meta information
about the component itself */ about the component itself */
{ {
/* Indicate that we are a coll v1.0.0 component (which also implies a /* Indicate that we are a coll v1.0.0 component (which also implies a
specific MCA version) */ specific MCA version) */
MCA_COLL_BASE_VERSION_1_0_0, MCA_COLL_BASE_VERSION_1_0_0,
/* Component name and version */ /* Component name and version */
"tuned", "tuned",
OMPI_MAJOR_VERSION, OMPI_MAJOR_VERSION,
OMPI_MINOR_VERSION, OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION, OMPI_RELEASE_VERSION,
/* Component open and close functions */ /* Component open and close functions */
tuned_open, tuned_open,
tuned_close tuned_close
}, },
/* Next the MCA v1.0.0 component meta data */ /* Next the MCA v1.0.0 component meta data */
{ {
/* Whether the component is checkpointable or not */ /* Whether the component is checkpointable or not */
true true
}, },
/* Initialization / querying functions */ /* Initialization / querying functions */
ompi_coll_tuned_init_query, ompi_coll_tuned_init_query,
ompi_coll_tuned_comm_query, ompi_coll_tuned_comm_query,
NULL NULL
}, },
/* priority of the module */ /* priority of the module */
0, 0,
/* Tuned component specific information */ /* Tuned component specific information */
/* Note some of this WAS in the module */ /* Note some of this WAS in the module */
NULL /* ompi_coll_alg_rule_t ptr */ NULL /* ompi_coll_alg_rule_t ptr */
}; };
@ -122,7 +122,7 @@ static int tuned_open(void)
{ {
int param; int param;
/* ompi_coll_tuned_component_t *ct = &ompi_coll_tuned_component; */ /* ompi_coll_tuned_component_t *ct = &ompi_coll_tuned_component; */
/* Use a low priority, but allow other components to be lower */ /* Use a low priority, but allow other components to be lower */
@ -149,13 +149,13 @@ static int tuned_open(void)
/* if dynamic rules allowed then look up dynamic rules config filename, else we leave it an empty filename (NULL) */ /* if dynamic rules allowed then look up dynamic rules config filename, else we leave it an empty filename (NULL) */
if (ompi_coll_tuned_use_dynamic_rules) { if (ompi_coll_tuned_use_dynamic_rules) {
/* char *default_name; */ /* char *default_name; */
/* asprintf(&default_name, "~/.openmpi/openmpi-coll-tuned-params.conf"); */ /* asprintf(&default_name, "~/.openmpi/openmpi-coll-tuned-params.conf"); */
mca_base_param_reg_string(&mca_coll_tuned_component.super.collm_version, mca_base_param_reg_string(&mca_coll_tuned_component.super.collm_version,
"dynamic_rules_filename", "dynamic_rules_filename",
"Filename of configuration file that contains the dynamic (@runtime) decision function rules", "Filename of configuration file that contains the dynamic (@runtime) decision function rules",
false, false, ompi_coll_tuned_dynamic_rules_filename, false, false, ompi_coll_tuned_dynamic_rules_filename,
&ompi_coll_tuned_dynamic_rules_filename); &ompi_coll_tuned_dynamic_rules_filename);
} }
/* some initial guesses at topology parameters */ /* some initial guesses at topology parameters */
@ -176,7 +176,7 @@ static int tuned_open(void)
int verbose; int verbose;
mca_base_param_lookup_int(param, &verbose); mca_base_param_lookup_int(param, &verbose);
if (verbose > 0) { if (verbose > 0) {
ompi_coll_tuned_stream = opal_output_open(NULL); ompi_coll_tuned_stream = opal_output_open(NULL);
} }
} }
@ -190,7 +190,7 @@ static int tuned_open(void)
if (ompi_coll_tuned_use_dynamic_rules) { if (ompi_coll_tuned_use_dynamic_rules) {
ompi_coll_tuned_allreduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLREDUCE]); ompi_coll_tuned_allreduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLREDUCE]);
ompi_coll_tuned_alltoall_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALL]); ompi_coll_tuned_alltoall_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALL]);
/* ompi_coll_tuned_alltoall_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALLV]); */ /* ompi_coll_tuned_alltoall_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALLV]); */
ompi_coll_tuned_barrier_intra_check_forced_init(&ompi_coll_tuned_forced_params[BARRIER]); ompi_coll_tuned_barrier_intra_check_forced_init(&ompi_coll_tuned_forced_params[BARRIER]);
ompi_coll_tuned_bcast_intra_check_forced_init(&ompi_coll_tuned_forced_params[BCAST]); ompi_coll_tuned_bcast_intra_check_forced_init(&ompi_coll_tuned_forced_params[BCAST]);
ompi_coll_tuned_reduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[REDUCE]); ompi_coll_tuned_reduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[REDUCE]);

Просмотреть файл

@ -54,9 +54,9 @@
*/ */
int int
ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count, ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_datatype_t *dtype,
struct ompi_op_t *op, struct ompi_op_t *op,
struct ompi_communicator_t *comm) struct ompi_communicator_t *comm)
{ {
OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allreduce_intra_dec_dynamic")); OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allreduce_intra_dec_dynamic"));
@ -64,7 +64,7 @@ ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count,
/* check to see if we have some filebased rules */ /* check to see if we have some filebased rules */
if (comm->c_coll_selected_data->com_rules[ALLREDUCE]) { if (comm->c_coll_selected_data->com_rules[ALLREDUCE]) {
/* we do, so calc the message size or what ever we need and use this for the evaluation */ /* we do, so calc the message size or what ever we need and use this for the evaluation */
int alg, faninout, segsize; int alg, faninout, segsize;
size_t dsize; size_t dsize;
@ -72,20 +72,18 @@ ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count,
dsize *= count; dsize *= count;
alg = ompi_coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[ALLREDUCE], alg = ompi_coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[ALLREDUCE],
dsize, &faninout, &segsize); dsize, &faninout, &segsize);
if (alg) { /* we have found a valid choice from the file based rules for this message size */ if (alg) { /* we have found a valid choice from the file based rules for this message size */
return ompi_coll_tuned_allreduce_intra_do_this (sbuf, rbuf, count, dtype, op, comm, return ompi_coll_tuned_allreduce_intra_do_this (sbuf, rbuf, count, dtype, op, comm,
alg, faninout, segsize); alg, faninout, segsize);
} /* found a method */ } /* found a method */
} /*end if any com rules to check */ } /*end if any com rules to check */
if (comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm) { if (comm->c_coll_selected_data->user_forced[ALLREDUCE].algorithm) {
return ompi_coll_tuned_allreduce_intra_do_forced (sbuf, rbuf, count, dtype, op, comm); return ompi_coll_tuned_allreduce_intra_do_forced (sbuf, rbuf, count, dtype, op, comm);
} }
else { return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm);
return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm);
}
} }
/* /*
@ -97,10 +95,10 @@ ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count,
*/ */
int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount, int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
struct ompi_datatype_t *sdtype, struct ompi_datatype_t *sdtype,
void* rbuf, int rcount, void* rbuf, int rcount,
struct ompi_datatype_t *rdtype, struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm) struct ompi_communicator_t *comm)
{ {
OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_alltoall_intra_dec_dynamic")); OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_alltoall_intra_dec_dynamic"));
@ -108,7 +106,7 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
/* check to see if we have some filebased rules */ /* check to see if we have some filebased rules */
if (comm->c_coll_selected_data->com_rules[ALLTOALL]) { if (comm->c_coll_selected_data->com_rules[ALLTOALL]) {
/* we do, so calc the message size or what ever we need and use this for the evaluation */ /* we do, so calc the message size or what ever we need and use this for the evaluation */
int comsize; int comsize;
int alg, faninout, segsize; int alg, faninout, segsize;
size_t dsize; size_t dsize;
@ -118,11 +116,11 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
dsize *= comsize * scount; dsize *= comsize * scount;
alg = ompi_coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[ALLTOALL], alg = ompi_coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[ALLTOALL],
dsize, &faninout, &segsize); dsize, &faninout, &segsize);
if (alg) { /* we have found a valid choice from the file based rules for this message size */ if (alg) { /* we have found a valid choice from the file based rules for this message size */
return ompi_coll_tuned_alltoall_intra_do_this (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, return ompi_coll_tuned_alltoall_intra_do_this (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm,
alg, faninout, segsize); alg, faninout, segsize);
} /* found a method */ } /* found a method */
} /*end if any com rules to check */ } /*end if any com rules to check */
@ -130,9 +128,7 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
if (comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm) { if (comm->c_coll_selected_data->user_forced[ALLTOALL].algorithm) {
return ompi_coll_tuned_alltoall_intra_do_forced (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); return ompi_coll_tuned_alltoall_intra_do_forced (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
} }
else { return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
}
} }
/* /*
@ -150,25 +146,22 @@ int ompi_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm)
/* check to see if we have some filebased rules */ /* check to see if we have some filebased rules */
if (comm->c_coll_selected_data->com_rules[BARRIER]) { if (comm->c_coll_selected_data->com_rules[BARRIER]) {
/* we do, so calc the message size or what ever we need and use this for the evaluation */ /* we do, so calc the message size or what ever we need and use this for the evaluation */
int alg, faninout, segsize; int alg, faninout, segsize;
alg = ompi_coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[BARRIER], alg = ompi_coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[BARRIER],
0, &faninout, &segsize); 0, &faninout, &segsize);
if (alg) { /* we have found a valid choice from the file based rules for this message size */ if (alg) { /* we have found a valid choice from the file based rules for this message size */
return ompi_coll_tuned_barrier_intra_do_this (comm, return ompi_coll_tuned_barrier_intra_do_this (comm,
alg, faninout, segsize); alg, faninout, segsize);
} /* found a method */ } /* found a method */
} /*end if any com rules to check */ } /*end if any com rules to check */
if (comm->c_coll_selected_data->user_forced[BARRIER].algorithm) { if (comm->c_coll_selected_data->user_forced[BARRIER].algorithm) {
return ompi_coll_tuned_barrier_intra_do_forced (comm); return ompi_coll_tuned_barrier_intra_do_forced (comm);
} }
else { return ompi_coll_tuned_barrier_intra_dec_fixed (comm);
return ompi_coll_tuned_barrier_intra_dec_fixed (comm);
}
} }
/* /*
@ -179,8 +172,8 @@ int ompi_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm)
* Returns: - MPI_SUCCESS or error code (passed from the bcast implementation) * Returns: - MPI_SUCCESS or error code (passed from the bcast implementation)
*/ */
int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count, int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
struct ompi_datatype_t *datatype, int root, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm) struct ompi_communicator_t *comm)
{ {
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:bcast_intra_dec_dynamic")); OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:bcast_intra_dec_dynamic"));
@ -188,7 +181,7 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
/* check to see if we have some filebased rules */ /* check to see if we have some filebased rules */
if (comm->c_coll_selected_data->com_rules[BCAST]) { if (comm->c_coll_selected_data->com_rules[BCAST]) {
/* we do, so calc the message size or what ever we need and use this for the evaluation */ /* we do, so calc the message size or what ever we need and use this for the evaluation */
int alg, faninout, segsize; int alg, faninout, segsize;
size_t dsize; size_t dsize;
@ -196,7 +189,7 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
dsize *= count; dsize *= count;
alg = ompi_coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[BCAST], alg = ompi_coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[BCAST],
dsize, &faninout, &segsize); dsize, &faninout, &segsize);
if (alg) { /* we have found a valid choice from the file based rules for this message size */ if (alg) { /* we have found a valid choice from the file based rules for this message size */
return ompi_coll_tuned_bcast_intra_do_this (buff, count, datatype, root, comm, return ompi_coll_tuned_bcast_intra_do_this (buff, count, datatype, root, comm,
@ -206,12 +199,9 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
if (comm->c_coll_selected_data->user_forced[BCAST].algorithm) { if (comm->c_coll_selected_data->user_forced[BCAST].algorithm) {
return ompi_coll_tuned_bcast_intra_do_forced (buff, count, datatype, root, comm); return ompi_coll_tuned_bcast_intra_do_forced (buff, count, datatype, root, comm);
} }
else { return ompi_coll_tuned_bcast_intra_dec_fixed (buff, count, datatype, root, comm);
return ompi_coll_tuned_bcast_intra_dec_fixed (buff, count, datatype, root, comm);
}
} }
/* /*
@ -223,9 +213,9 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
* *
*/ */
int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf, int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf,
int count, struct ompi_datatype_t* datatype, int count, struct ompi_datatype_t* datatype,
struct ompi_op_t* op, int root, struct ompi_op_t* op, int root,
struct ompi_communicator_t* comm) struct ompi_communicator_t* comm)
{ {
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:reduce_intra_dec_dynamic")); OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:reduce_intra_dec_dynamic"));
@ -233,7 +223,7 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf,
/* check to see if we have some filebased rules */ /* check to see if we have some filebased rules */
if (comm->c_coll_selected_data->com_rules[REDUCE]) { if (comm->c_coll_selected_data->com_rules[REDUCE]) {
/* we do, so calc the message size or what ever we need and use this for the evaluation */ /* we do, so calc the message size or what ever we need and use this for the evaluation */
int alg, faninout, segsize; int alg, faninout, segsize;
size_t dsize; size_t dsize;
@ -241,20 +231,17 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf,
dsize *= count; dsize *= count;
alg = ompi_coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[REDUCE], alg = ompi_coll_tuned_get_target_method_params (comm->c_coll_selected_data->com_rules[REDUCE],
dsize, &faninout, &segsize); dsize, &faninout, &segsize);
if (alg) { /* we have found a valid choice from the file based rules for this message size */ if (alg) { /* we have found a valid choice from the file based rules for this message size */
return ompi_coll_tuned_reduce_intra_do_this (sendbuf, recvbuf, count, datatype, op, root, comm, return ompi_coll_tuned_reduce_intra_do_this (sendbuf, recvbuf, count, datatype, op, root, comm,
alg, faninout, segsize); alg, faninout, segsize);
} /* found a method */ } /* found a method */
} /*end if any com rules to check */ } /*end if any com rules to check */
if (comm->c_coll_selected_data->user_forced[REDUCE].algorithm) { if (comm->c_coll_selected_data->user_forced[REDUCE].algorithm) {
return ompi_coll_tuned_reduce_intra_do_forced (sendbuf, recvbuf, count, datatype, op, root, comm); return ompi_coll_tuned_reduce_intra_do_forced (sendbuf, recvbuf, count, datatype, op, root, comm);
} }
else { return ompi_coll_tuned_reduce_intra_dec_fixed (sendbuf, recvbuf, count, datatype, op, root, comm);
return ompi_coll_tuned_reduce_intra_dec_fixed (sendbuf, recvbuf, count, datatype, op, root, comm);
}
} }

Просмотреть файл

@ -37,21 +37,13 @@
*/ */
int int
ompi_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count, ompi_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_datatype_t *dtype,
struct ompi_op_t *op, struct ompi_op_t *op,
struct ompi_communicator_t *comm) struct ompi_communicator_t *comm)
{ {
/* int size; */
/* int contig; */
/* int dsize; */
OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allreduce_intra_dec_fixed")); OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allreduce_intra_dec_fixed"));
/* size = ompi_comm_size(comm); */
return (ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm)); return (ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm));
} }
/* /*
@ -63,16 +55,13 @@ ompi_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count,
*/ */
int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount, int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount,
struct ompi_datatype_t *sdtype, struct ompi_datatype_t *sdtype,
void* rbuf, int rcount, void* rbuf, int rcount,
struct ompi_datatype_t *rdtype, struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm) struct ompi_communicator_t *comm)
{ {
int comsize; int comsize, rank, err;
int rank; size_t dsize, total_dsize;
int err;
unsigned long dsize;
unsigned long total_dsize;
OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_alltoall_intra_dec_fixed")); OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_alltoall_intra_dec_fixed"));
@ -87,21 +76,19 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount,
/* else we need data size for decision function */ /* else we need data size for decision function */
err = ompi_ddt_get_size (sdtype, &dsize); err = ompi_ddt_get_size (sdtype, &dsize);
if (err != MPI_SUCCESS) { if (err != MPI_SUCCESS) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,__LINE__,err,rank)); OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,__LINE__,err,rank));
return (err); return (err);
} }
total_dsize = dsize * scount * (unsigned long)comsize; /* needed for decision */ total_dsize = dsize * scount * comsize; /* needed for decision */
if (comsize >= 12 && total_dsize <= 768) { if (comsize >= 12 && total_dsize <= 768) {
return ompi_coll_tuned_alltoall_intra_bruck (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); return ompi_coll_tuned_alltoall_intra_bruck (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
} }
else if (total_dsize <= 131072) { if (total_dsize <= 131072) {
return ompi_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm); return ompi_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
} }
else { return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm);
}
} }
@ -122,11 +109,10 @@ int ompi_coll_tuned_barrier_intra_dec_fixed(struct ompi_communicator_t *comm)
if (2==comsize) if (2==comsize)
return ompi_coll_tuned_barrier_intra_two_procs(comm); return ompi_coll_tuned_barrier_intra_two_procs(comm);
else /* return ompi_coll_tuned_barrier_intra_doublering(comm); */
/* return ompi_coll_tuned_barrier_intra_doublering(comm); */
return ompi_coll_tuned_barrier_intra_recursivedoubling(comm); return ompi_coll_tuned_barrier_intra_recursivedoubling(comm);
/* return ompi_coll_tuned_barrier_intra_bruck(comm); */ /* return ompi_coll_tuned_barrier_intra_bruck(comm); */
/* return ompi_coll_tuned_barrier_intra_linear(comm); */ /* return ompi_coll_tuned_barrier_intra_linear(comm); */
} }
@ -139,16 +125,12 @@ int ompi_coll_tuned_barrier_intra_dec_fixed(struct ompi_communicator_t *comm)
* Returns: - MPI_SUCCESS or error code (passed from the bcast implementation) * Returns: - MPI_SUCCESS or error code (passed from the bcast implementation)
*/ */
int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count, int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
struct ompi_datatype_t *datatype, int root, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm) struct ompi_communicator_t *comm)
{ {
int comsize; int comsize, rank, err;
int rank;
int err;
unsigned long msgsize;
unsigned long dsize;
int segsize = 0; int segsize = 0;
size_t msgsize, dsize;
OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_bcast_intra_dec_fixed")); OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_bcast_intra_dec_fixed"));
@ -158,7 +140,7 @@ int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
/* else we need data size for decision function */ /* else we need data size for decision function */
err = ompi_ddt_get_size (datatype, &dsize); err = ompi_ddt_get_size (datatype, &dsize);
if (err != MPI_SUCCESS) { if (err != MPI_SUCCESS) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,__LINE__,err,rank)); OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,__LINE__,err,rank));
return (err); return (err);
} }
@ -166,34 +148,29 @@ int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
/* this is based on gige measurements */ /* this is based on gige measurements */
if ((comsize < 4)) { if (comsize < 4) {
segsize = 0;
return ompi_coll_tuned_bcast_intra_basic_linear (buff, count, datatype, root, comm); return ompi_coll_tuned_bcast_intra_basic_linear (buff, count, datatype, root, comm);
} }
else if (comsize == 4) { if (comsize == 4) {
if (msgsize < 524288) segsize = 0; if (msgsize < 524288) segsize = 0;
else msgsize = 16384; else segsize = 16384;
return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, segsize); return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, segsize);
} }
else if (comsize > 4 && comsize <= 8 && msgsize < 4096) { if (comsize <= 8 && msgsize < 4096) {
segsize = 0; return ompi_coll_tuned_bcast_intra_basic_linear (buff, count, datatype, root, comm);
return ompi_coll_tuned_bcast_intra_basic_linear (buff, count, datatype, root, comm);
} }
else if (comsize > 8 && msgsize >= 32768 && msgsize < 524288) { if (comsize > 8 && msgsize >= 32768 && msgsize < 524288) {
segsize = 16384; segsize = 16384;
return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, segsize); return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, segsize);
} }
else if (comsize > 4 && msgsize >= 524288) { if (msgsize >= 524288) {
segsize = 16384; segsize = 16384;
return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, root, comm, segsize); return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, root, comm, segsize);
} }
else { segsize = 0;
segsize = 0; /* once tested can swap this back in */
/* once tested can swap this back in */ /* return ompi_coll_tuned_bcast_intra_bmtree (buff, count, datatype, root, comm, segsize); */
/* return ompi_coll_tuned_bcast_intra_bmtree (buff, count, datatype, root, comm, segsize); */ return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, segsize);
return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, segsize);
}
} }
/* /*
@ -205,19 +182,12 @@ int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
* *
*/ */
int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf, int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
int count, struct ompi_datatype_t* datatype, int count, struct ompi_datatype_t* datatype,
struct ompi_op_t* op, int root, struct ompi_op_t* op, int root,
struct ompi_communicator_t* comm) struct ompi_communicator_t* comm)
{ {
int comsize; int comsize, rank, err, segsize = 0, fanout = 0;
int rank; size_t msgsize, dsize;
int err;
/* int contig; */
unsigned long msgsize;
unsigned long dsize;
int segsize = 0;
/* int fanout = 0; */
OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_reduce_intra_dec_fixed")); OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_reduce_intra_dec_fixed"));
@ -227,39 +197,33 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
/* need data size for decision function */ /* need data size for decision function */
err = ompi_ddt_get_size (datatype, &dsize); err = ompi_ddt_get_size (datatype, &dsize);
if (err != MPI_SUCCESS) { if (err != MPI_SUCCESS) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,__LINE__,err,rank)); OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,__LINE__,err,rank));
return (err); return (err);
} }
msgsize = dsize * (unsigned long)count; /* needed for decision */ msgsize = dsize * count; /* needed for decision */
return ompi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm); /* for small messages use linear algorithm */
#ifdef coconuts if (msgsize <= 4096) {
/* for small messages use linear algorithm */
if (msgsize <= 4096) {
segsize = 0; segsize = 0;
fanout = size-1; fanout = comsize - 1;
/* when linear implemented or taken from basic put here, right now using chain as a linear system */ /* when linear implemented or taken from basic put here, right now using chain as a linear system */
/* it is implemented and I shouldn't be calling a chain with a fanout bigger than MAXTREEFANOUT from topo.h! */ /* it is implemented and I shouldn't be calling a chain with a fanout bigger than MAXTREEFANOUT from topo.h! */
return ompi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm); return ompi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm);
/* return ompi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout); */ /* return ompi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout); */
} else if (msgsize <= 65536 ) { }
segsize = 32768; if (msgsize < 524288) {
fanout = 8; if (msgsize <= 65536 ) {
segsize = 32768;
fanout = 8;
} else {
segsize = 1024;
fanout = comsize/2;
}
/* later swap this for a binary tree */
/* fanout = 2; */
return ompi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout); return ompi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout);
} else if (msgsize < 524288) { }
segsize = 1024; segsize = 1024;
fanout = size/2; return ompi_coll_tuned_reduce_intra_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm, segsize);
/* later swap this for a binary tree */
/* fanout = 2; */
return ompi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout);
} else
#endif
{
segsize = 1024;
return ompi_coll_tuned_reduce_intra_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm, segsize);
}
} }

Просмотреть файл

@ -60,235 +60,235 @@ static int fileline=0; /* used for verbose error messages */
int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules, int n_collectives) int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules, int n_collectives)
{ {
FILE *fptr = (FILE*) NULL; FILE *fptr = (FILE*) NULL;
int X; int X;
int CI; int CI;
int NCS; int NCS;
int CS; int CS;
int NMS; int NMS;
int MS, ALG, FANINOUT, SS; int MS, ALG, FANINOUT, SS;
int x, ncs, nms; int x, ncs, nms;
ompi_coll_alg_rule_t *alg_rules = (ompi_coll_alg_rule_t*) NULL; /* complete table of rules */ ompi_coll_alg_rule_t *alg_rules = (ompi_coll_alg_rule_t*) NULL; /* complete table of rules */
/* individual pointers to sections of rules */ /* individual pointers to sections of rules */
ompi_coll_alg_rule_t *alg_p = (ompi_coll_alg_rule_t*) NULL; ompi_coll_alg_rule_t *alg_p = (ompi_coll_alg_rule_t*) NULL;
ompi_coll_com_rule_t *com_p = (ompi_coll_com_rule_t*) NULL; ompi_coll_com_rule_t *com_p = (ompi_coll_com_rule_t*) NULL;
ompi_coll_msg_rule_t *msg_p = (ompi_coll_msg_rule_t*) NULL; ompi_coll_msg_rule_t *msg_p = (ompi_coll_msg_rule_t*) NULL;
/* stats info */ /* stats info */
int total_alg_count = 0; int total_alg_count = 0;
int total_com_count = 0; int total_com_count = 0;
int total_msg_count = 0; int total_msg_count = 0;
if (!fname) { if (!fname) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Gave NULL as rule table configuration file for tuned collectives... ignoring!\n")); OPAL_OUTPUT((ompi_coll_tuned_stream,"Gave NULL as rule table configuration file for tuned collectives... ignoring!\n"));
return (-1); return (-1);
} }
if (!rules) { if (!rules) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Gave NULL as rule table result ptr!... ignoring!\n")); OPAL_OUTPUT((ompi_coll_tuned_stream,"Gave NULL as rule table result ptr!... ignoring!\n"));
return (-2); return (-2);
} }
if (n_collectives<1) { if (n_collectives<1) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Gave %d as max number of collectives in the rule table configuration file for tuned collectives!... ignoring!\n", n_collectives)); OPAL_OUTPUT((ompi_coll_tuned_stream,"Gave %d as max number of collectives in the rule table configuration file for tuned collectives!... ignoring!\n", n_collectives));
return (-3); return (-3);
} }
fptr = fopen (fname, "r"); fptr = fopen (fname, "r");
if (!fptr) { if (!fptr) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"cannot read rules file [%s]\n", fname)); OPAL_OUTPUT((ompi_coll_tuned_stream,"cannot read rules file [%s]\n", fname));
goto on_file_error; goto on_file_error;
} }
/* make space and init the algorithm rules for each of the n_collectives MPI collectives */ /* make space and init the algorithm rules for each of the n_collectives MPI collectives */
alg_rules = ompi_coll_tuned_mk_alg_rules (n_collectives); alg_rules = ompi_coll_tuned_mk_alg_rules (n_collectives);
X = getnext(fptr); X = getnext(fptr);
if (X<0) { if (X<0) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of collectives in configuration file around line %d\n", fileline)); OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of collectives in configuration file around line %d\n", fileline));
goto on_file_error; goto on_file_error;
} }
if (X>n_collectives) { if (X>n_collectives) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Number of collectives in configuration file %d is greater than number of MPI collectives possible %d ??? error around line %d\n", X, n_collectives, fileline)); OPAL_OUTPUT((ompi_coll_tuned_stream,"Number of collectives in configuration file %d is greater than number of MPI collectives possible %d ??? error around line %d\n", X, n_collectives, fileline));
goto on_file_error; goto on_file_error;
} }
for (x=0;x<X;x++) { /* for each collective */ for (x=0;x<X;x++) { /* for each collective */
CI = getnext (fptr); CI = getnext (fptr);
if (CI<0) { if (CI<0) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read next Collective id in configuration file around line %d\n", fileline)); OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read next Collective id in configuration file around line %d\n", fileline));
goto on_file_error;
}
if (CI>=n_collectives) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Collective id in configuration file %d is greater than MPI collectives possible %d. Error around line %d\n", CI, n_collectives, fileline));
goto on_file_error;
}
if (alg_rules[CI].alg_rule_id != CI) {
OPAL_OUTPUT((ompi_coll_tuned_stream, "Internal error in handling collective ID %d\n", CI));
ompi_coll_tuned_free_all_rules (*rules, n_collectives);
return (-4);
}
alg_p = &alg_rules[CI];
alg_p->alg_rule_id = CI;
alg_p->n_com_sizes = 0;
alg_p->com_rules = (ompi_coll_com_rule_t *) NULL;
NCS = getnext (fptr);
if (NCS<0) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read count of communicators for collective ID %d at around line %d\n", CI, fileline));
goto on_file_error;
}
alg_p->n_com_sizes = NCS;
alg_p->com_rules = ompi_coll_tuned_mk_com_rules (NCS, CI);
for (ncs=0;ncs<NCS;ncs++) { /* for each comm size */
com_p = &(alg_p->com_rules[ncs]);
CS = getnext (fptr);
if (CS<0) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read communicator size for collective ID %d com rule %d at around line %d\n", CI, ncs, fileline));
goto on_file_error; goto on_file_error;
} }
if (CI>=n_collectives) {
com_p->mpi_comsize = CS; OPAL_OUTPUT((ompi_coll_tuned_stream,"Collective id in configuration file %d is greater than MPI collectives possible %d. Error around line %d\n", CI, n_collectives, fileline));
NMS = getnext (fptr);
if (NMS<0) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of message sizes for collective ID %d com rule %d at around line %d\n", CI, ncs, fileline));
goto on_file_error; goto on_file_error;
} }
com_p->n_msg_sizes = NMS; if (alg_rules[CI].alg_rule_id != CI) {
com_p->msg_rules = ompi_coll_tuned_mk_msg_rules (NMS, CI, ncs, CS); OPAL_OUTPUT((ompi_coll_tuned_stream, "Internal error in handling collective ID %d\n", CI));
ompi_coll_tuned_free_all_rules (*rules, n_collectives);
return (-4);
}
msg_p = com_p->msg_rules; alg_p = &alg_rules[CI];
for (nms=0;nms<NMS;nms++) { /* for each msg size */ alg_p->alg_rule_id = CI;
alg_p->n_com_sizes = 0;
alg_p->com_rules = (ompi_coll_com_rule_t *) NULL;
msg_p = &(com_p->msg_rules[nms]); NCS = getnext (fptr);
if (NCS<0) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read count of communicators for collective ID %d at around line %d\n", CI, fileline));
goto on_file_error;
}
MS = getnext (fptr); alg_p->n_com_sizes = NCS;
if (MS<0) { alg_p->com_rules = ompi_coll_tuned_mk_com_rules (NCS, CI);
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read message size for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
for (ncs=0;ncs<NCS;ncs++) { /* for each comm size */
com_p = &(alg_p->com_rules[ncs]);
CS = getnext (fptr);
if (CS<0) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read communicator size for collective ID %d com rule %d at around line %d\n", CI, ncs, fileline));
goto on_file_error; goto on_file_error;
} }
msg_p->msg_size = MS;
ALG = getnext (fptr); com_p->mpi_comsize = CS;
if (ALG<0) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target algorithm method for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline)); NMS = getnext (fptr);
if (NMS<0) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of message sizes for collective ID %d com rule %d at around line %d\n", CI, ncs, fileline));
goto on_file_error; goto on_file_error;
} }
msg_p->result_alg = ALG;
FANINOUT = getnext (fptr); com_p->n_msg_sizes = NMS;
if (FANINOUT<0) { com_p->msg_rules = ompi_coll_tuned_mk_msg_rules (NMS, CI, ncs, CS);
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read fan in/out topo for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
goto on_file_error;
}
msg_p->result_topo_faninout = FANINOUT;
SS = getnext (fptr); msg_p = com_p->msg_rules;
if (SS<0) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target segment size for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
goto on_file_error;
}
msg_p->result_segsize = SS;
if (!nms && MS) { for (nms=0;nms<NMS;nms++) { /* for each msg size */
OPAL_OUTPUT((ompi_coll_tuned_stream,"All algorithms must specify a rule for message size of zero upwards always first!\n"));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Message size was %d for collective ID %d com rule %d msg rule %d at around line %d\n", MS, CI, ncs, nms, fileline));
goto on_file_error;
}
total_msg_count++; msg_p = &(com_p->msg_rules[nms]);
} /* msg size */ MS = getnext (fptr);
if (MS<0) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read message size for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
goto on_file_error;
}
msg_p->msg_size = MS;
total_com_count++; ALG = getnext (fptr);
if (ALG<0) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target algorithm method for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
goto on_file_error;
}
msg_p->result_alg = ALG;
} /* comm size */ FANINOUT = getnext (fptr);
if (FANINOUT<0) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read fan in/out topo for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
goto on_file_error;
}
msg_p->result_topo_faninout = FANINOUT;
total_alg_count++; SS = getnext (fptr);
if (SS<0) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target segment size for collective ID %d com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline));
goto on_file_error;
}
msg_p->result_segsize = SS;
} /* per collective */ if (!nms && MS) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"All algorithms must specify a rule for message size of zero upwards always first!\n"));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Message size was %d for collective ID %d com rule %d msg rule %d at around line %d\n", MS, CI, ncs, nms, fileline));
goto on_file_error;
}
fclose (fptr); total_msg_count++;
OPAL_OUTPUT((ompi_coll_tuned_stream,"\nConfigure file Stats\n")); } /* msg size */
OPAL_OUTPUT((ompi_coll_tuned_stream,"Collectives with rules\t\t\t: %5d\n", total_alg_count));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Communicator sizes with rules\t\t: %5d\n", total_com_count));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Message sizes with rules\t\t: %5d\n", total_msg_count));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Lines in configuration file read\t\t: %5d\n", fileline));
/* return the rules to the caller */ total_com_count++;
*rules = alg_rules;
return (total_alg_count); } /* comm size */
total_alg_count++;
} /* per collective */
fclose (fptr);
OPAL_OUTPUT((ompi_coll_tuned_stream,"\nConfigure file Stats\n"));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Collectives with rules\t\t\t: %5d\n", total_alg_count));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Communicator sizes with rules\t\t: %5d\n", total_com_count));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Message sizes with rules\t\t: %5d\n", total_msg_count));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Lines in configuration file read\t\t: %5d\n", fileline));
/* return the rules to the caller */
*rules = alg_rules;
return (total_alg_count);
on_file_error: on_file_error:
/* here we close out the file and delete any memory allocated nicely */ /* here we close out the file and delete any memory allocated nicely */
/* we return back a verbose message and a count of -1 algorithms read */ /* we return back a verbose message and a count of -1 algorithms read */
/* draconian but its better than having a bad collective decision table */ /* draconian but its better than having a bad collective decision table */
OPAL_OUTPUT((ompi_coll_tuned_stream,"read_rules_config_file: bad configure file [%s]. Read afar as line %d\n", fname, fileline)); OPAL_OUTPUT((ompi_coll_tuned_stream,"read_rules_config_file: bad configure file [%s]. Read afar as line %d\n", fname, fileline));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Ignoring user supplied tuned collectives configuration decision file.\n")); OPAL_OUTPUT((ompi_coll_tuned_stream,"Ignoring user supplied tuned collectives configuration decision file.\n"));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Switching back to [compiled in] fixed decision table.\n")); OPAL_OUTPUT((ompi_coll_tuned_stream,"Switching back to [compiled in] fixed decision table.\n"));
OPAL_OUTPUT((ompi_coll_tuned_stream,"Fix errors as listed above and try again.\n")); OPAL_OUTPUT((ompi_coll_tuned_stream,"Fix errors as listed above and try again.\n"));
/* deallocate memory if allocated */ /* deallocate memory if allocated */
if (alg_rules) ompi_coll_tuned_free_all_rules (alg_rules, n_collectives); if (alg_rules) ompi_coll_tuned_free_all_rules (alg_rules, n_collectives);
/* close file */ /* close file */
if (fptr) fclose (fptr); if (fptr) fclose (fptr);
*rules = (ompi_coll_alg_rule_t*) NULL; *rules = (ompi_coll_alg_rule_t*) NULL;
return (-1); return (-1);
} }
static int getnext (FILE *fptr) static int getnext (FILE *fptr)
{ {
int val; int val;
int rc; int rc;
char trash; char trash;
do { do {
rc = fscanf(fptr, "%d", &val); rc = fscanf(fptr, "%d", &val);
if (rc==EOF) return (MYEOF); if (rc==EOF) return (MYEOF);
if (1==rc) return (val); if (1==rc) return (val);
else { else {
rc = fread(&trash, 1, 1, fptr); rc = fread(&trash, 1, 1, fptr);
if ('\n'==trash) fileline++; if ('\n'==trash) fileline++;
if ('#'==trash) skiptonewline (fptr); if ('#'==trash) skiptonewline (fptr);
} }
} while (1); } while (1);
return rc; return rc;
} }
static void skiptonewline (FILE *fptr) static void skiptonewline (FILE *fptr)
{ {
char val; char val;
int rc; int rc;
do { do {
rc = fread(&val, 1, 1, fptr); rc = fread(&val, 1, 1, fptr);
if (0==rc) return; if (0==rc) return;
if ((1==rc)&&('\n'==val)) { if ((1==rc)&&('\n'==val)) {
fileline++; fileline++;
return; return;
} }
} while (1); } while (1);
} }

Просмотреть файл

@ -41,36 +41,36 @@
ompi_coll_alg_rule_t* ompi_coll_tuned_mk_alg_rules (int n_alg) ompi_coll_alg_rule_t* ompi_coll_tuned_mk_alg_rules (int n_alg)
{ {
int i; int i;
ompi_coll_alg_rule_t* alg_rules; ompi_coll_alg_rule_t* alg_rules;
alg_rules = (ompi_coll_alg_rule_t *) calloc (n_alg, sizeof (ompi_coll_alg_rule_t)); alg_rules = (ompi_coll_alg_rule_t *) calloc (n_alg, sizeof (ompi_coll_alg_rule_t));
if (!alg_rules) return (alg_rules); if (!alg_rules) return (alg_rules);
/* set all we can at this point */ /* set all we can at this point */
for (i=0;i<n_alg;i++) { for (i=0;i<n_alg;i++) {
alg_rules[i].alg_rule_id = i; alg_rules[i].alg_rule_id = i;
} }
return (alg_rules); return (alg_rules);
} }
ompi_coll_com_rule_t* ompi_coll_tuned_mk_com_rules (int n_com_rules, int alg_rule_id) ompi_coll_com_rule_t* ompi_coll_tuned_mk_com_rules (int n_com_rules, int alg_rule_id)
{ {
int i; int i;
ompi_coll_com_rule_t * com_rules; ompi_coll_com_rule_t * com_rules;
com_rules = (ompi_coll_com_rule_t *) calloc (n_com_rules, sizeof (ompi_coll_com_rule_t)); com_rules = (ompi_coll_com_rule_t *) calloc (n_com_rules, sizeof (ompi_coll_com_rule_t));
if (!com_rules) return (com_rules); if (!com_rules) return (com_rules);
for (i=0;i<n_com_rules;i++) { for (i=0;i<n_com_rules;i++) {
com_rules[i].mpi_comsize = 0; /* unknown */ com_rules[i].mpi_comsize = 0; /* unknown */
com_rules[i].alg_rule_id = alg_rule_id; com_rules[i].alg_rule_id = alg_rule_id;
com_rules[i].com_rule_id = i; com_rules[i].com_rule_id = i;
com_rules[i].n_msg_sizes = 0; /* unknown */ com_rules[i].n_msg_sizes = 0; /* unknown */
com_rules[i].msg_rules = (ompi_coll_msg_rule_t *) NULL; com_rules[i].msg_rules = (ompi_coll_msg_rule_t *) NULL;
} }
return (com_rules); return (com_rules);
} }
@ -83,14 +83,14 @@ ompi_coll_msg_rule_t* ompi_coll_tuned_mk_msg_rules (int n_msg_rules, int alg_rul
if (!msg_rules) return (msg_rules); if (!msg_rules) return (msg_rules);
for (i=0;i<n_msg_rules;i++) { for (i=0;i<n_msg_rules;i++) {
msg_rules[i].mpi_comsize = mpi_comsize; msg_rules[i].mpi_comsize = mpi_comsize;
msg_rules[i].alg_rule_id = alg_rule_id; msg_rules[i].alg_rule_id = alg_rule_id;
msg_rules[i].com_rule_id = com_rule_id; msg_rules[i].com_rule_id = com_rule_id;
msg_rules[i].msg_rule_id = i; msg_rules[i].msg_rule_id = i;
msg_rules[i].msg_size = 0; /* unknown */ msg_rules[i].msg_size = 0; /* unknown */
msg_rules[i].result_alg = 0; /* unknown */ msg_rules[i].result_alg = 0; /* unknown */
msg_rules[i].result_topo_faninout = 0; /* unknown */ msg_rules[i].result_topo_faninout = 0; /* unknown */
msg_rules[i].result_segsize = 0; /* unknown */ msg_rules[i].result_segsize = 0; /* unknown */
} }
return (msg_rules); return (msg_rules);
} }
@ -104,89 +104,89 @@ ompi_coll_msg_rule_t* ompi_coll_tuned_mk_msg_rules (int n_msg_rules, int alg_rul
int ompi_coll_tuned_dump_msg_rule (ompi_coll_msg_rule_t* msg_p) int ompi_coll_tuned_dump_msg_rule (ompi_coll_msg_rule_t* msg_p)
{ {
if (!msg_p) { if (!msg_p) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Message rule was a NULL ptr?!\n")); OPAL_OUTPUT((ompi_coll_tuned_stream,"Message rule was a NULL ptr?!\n"));
return (-1); return (-1);
} }
OPAL_OUTPUT((ompi_coll_tuned_stream,"alg_id %3d\tcom_id %3d\tcom_size %3d\tmsg_id %3d\t", msg_p->alg_rule_id, OPAL_OUTPUT((ompi_coll_tuned_stream,"alg_id %3d\tcom_id %3d\tcom_size %3d\tmsg_id %3d\t", msg_p->alg_rule_id,
msg_p->com_rule_id, msg_p->mpi_comsize, msg_p->msg_rule_id)); msg_p->com_rule_id, msg_p->mpi_comsize, msg_p->msg_rule_id));
OPAL_OUTPUT((ompi_coll_tuned_stream,"msg_size %6d -> algorithm %2d\ttopo in/out %2d\tsegsize %5ld\n", OPAL_OUTPUT((ompi_coll_tuned_stream,"msg_size %6d -> algorithm %2d\ttopo in/out %2d\tsegsize %5ld\n",
msg_p->msg_size, msg_p->result_alg, msg_p->result_topo_faninout, msg_p->result_segsize)); msg_p->msg_size, msg_p->result_alg, msg_p->result_topo_faninout, msg_p->result_segsize));
return (0); return (0);
} }
int ompi_coll_tuned_dump_com_rule (ompi_coll_com_rule_t* com_p) int ompi_coll_tuned_dump_com_rule (ompi_coll_com_rule_t* com_p)
{ {
int i; int i;
if (!com_p) { if (!com_p) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Com rule was a NULL ptr?!\n")); OPAL_OUTPUT((ompi_coll_tuned_stream,"Com rule was a NULL ptr?!\n"));
return (-1); return (-1);
} }
OPAL_OUTPUT((ompi_coll_tuned_stream, "alg_id %3d\tcom_id %3d\tcom_size %3d\t", com_p->alg_rule_id, com_p->com_rule_id, com_p->mpi_comsize)); OPAL_OUTPUT((ompi_coll_tuned_stream, "alg_id %3d\tcom_id %3d\tcom_size %3d\t", com_p->alg_rule_id, com_p->com_rule_id, com_p->mpi_comsize));
if (!com_p->n_msg_sizes) { if (!com_p->n_msg_sizes) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"no msgsizes defined\n")); OPAL_OUTPUT((ompi_coll_tuned_stream,"no msgsizes defined\n"));
return (0); return (0);
} }
OPAL_OUTPUT((ompi_coll_tuned_stream,"number of message sizes %3d\n", com_p->n_msg_sizes)); OPAL_OUTPUT((ompi_coll_tuned_stream,"number of message sizes %3d\n", com_p->n_msg_sizes));
for (i=0;i<com_p->n_msg_sizes;i++) { for (i=0;i<com_p->n_msg_sizes;i++) {
ompi_coll_tuned_dump_msg_rule (&(com_p->msg_rules[i])); ompi_coll_tuned_dump_msg_rule (&(com_p->msg_rules[i]));
} }
return (0); return (0);
} }
int ompi_coll_tuned_dump_alg_rule (ompi_coll_alg_rule_t* alg_p) int ompi_coll_tuned_dump_alg_rule (ompi_coll_alg_rule_t* alg_p)
{ {
int i; int i;
if (!alg_p) { if (!alg_p) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Algorithm rule was a NULL ptr?!\n")); OPAL_OUTPUT((ompi_coll_tuned_stream,"Algorithm rule was a NULL ptr?!\n"));
return (-1); return (-1);
} }
OPAL_OUTPUT((ompi_coll_tuned_stream,"alg_id %3d\t", alg_p->alg_rule_id)); OPAL_OUTPUT((ompi_coll_tuned_stream,"alg_id %3d\t", alg_p->alg_rule_id));
if (!alg_p->n_com_sizes) { if (!alg_p->n_com_sizes) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"no coms defined\n")); OPAL_OUTPUT((ompi_coll_tuned_stream,"no coms defined\n"));
return (0); return (0);
} }
OPAL_OUTPUT((ompi_coll_tuned_stream,"number of com sizes %3d\n", alg_p->n_com_sizes)); OPAL_OUTPUT((ompi_coll_tuned_stream,"number of com sizes %3d\n", alg_p->n_com_sizes));
for (i=0;i<alg_p->n_com_sizes;i++) { for (i=0;i<alg_p->n_com_sizes;i++) {
ompi_coll_tuned_dump_com_rule (&(alg_p->com_rules[i])); ompi_coll_tuned_dump_com_rule (&(alg_p->com_rules[i]));
} }
return (0); return (0);
} }
int ompi_coll_tuned_dump_all_rules (ompi_coll_alg_rule_t* alg_p, int n_rules) int ompi_coll_tuned_dump_all_rules (ompi_coll_alg_rule_t* alg_p, int n_rules)
{ {
int i; int i;
if (!alg_p) { if (!alg_p) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"Algorithm rule was a NULL ptr?!\n")); OPAL_OUTPUT((ompi_coll_tuned_stream,"Algorithm rule was a NULL ptr?!\n"));
return (-1); return (-1);
} }
OPAL_OUTPUT((ompi_coll_tuned_stream,"Number of algorithm rules %3d\n", n_rules)); OPAL_OUTPUT((ompi_coll_tuned_stream,"Number of algorithm rules %3d\n", n_rules));
for (i=0;i<n_rules;i++) { for (i=0;i<n_rules;i++) {
ompi_coll_tuned_dump_alg_rule (&(alg_p[i])); ompi_coll_tuned_dump_alg_rule (&(alg_p[i]));
} }
return (0); return (0);
} }
@ -198,82 +198,81 @@ int ompi_coll_tuned_dump_all_rules (ompi_coll_alg_rule_t* alg_p, int n_rules)
int ompi_coll_tuned_free_msg_rules_in_com_rule (ompi_coll_com_rule_t* com_p) int ompi_coll_tuned_free_msg_rules_in_com_rule (ompi_coll_com_rule_t* com_p)
{ {
int rc=0; int rc=0;
ompi_coll_msg_rule_t* msg_p; ompi_coll_msg_rule_t* msg_p;
if (!com_p) { if (!com_p) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"attempt to free NULL com_rule ptr\n")); OPAL_OUTPUT((ompi_coll_tuned_stream,"attempt to free NULL com_rule ptr\n"));
return (-1); return (-1);
} }
if (com_p->n_msg_sizes) { if (com_p->n_msg_sizes) {
msg_p = com_p->msg_rules; msg_p = com_p->msg_rules;
if (!msg_p) { if (!msg_p) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"attempt to free NULL msg_rules when msg count was %d\n", com_p->n_msg_sizes)); OPAL_OUTPUT((ompi_coll_tuned_stream,"attempt to free NULL msg_rules when msg count was %d\n", com_p->n_msg_sizes));
rc = -1; /* some error */ rc = -1; /* some error */
} }
else { else {
/* ok, memory exists for the msg rules so free that first */ /* ok, memory exists for the msg rules so free that first */
free (com_p->msg_rules); free (com_p->msg_rules);
com_p->msg_rules = (ompi_coll_msg_rule_t*) NULL; com_p->msg_rules = (ompi_coll_msg_rule_t*) NULL;
} }
} /* if we have msg rules to free as well */ } /* if we have msg rules to free as well */
return (rc);
return (rc);
} }
int ompi_coll_tuned_free_coms_in_alg_rule (ompi_coll_alg_rule_t* alg_p) int ompi_coll_tuned_free_coms_in_alg_rule (ompi_coll_alg_rule_t* alg_p)
{ {
int rc=0; int rc=0;
int i; int i;
ompi_coll_com_rule_t* com_p; ompi_coll_com_rule_t* com_p;
if (!alg_p) { if (!alg_p) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"attempt to free NULL alg_rule ptr\n")); OPAL_OUTPUT((ompi_coll_tuned_stream,"attempt to free NULL alg_rule ptr\n"));
return (-1); return (-1);
} }
if (alg_p->n_com_sizes) { if (alg_p->n_com_sizes) {
com_p = alg_p->com_rules; com_p = alg_p->com_rules;
if (!com_p) { if (!com_p) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"attempt to free NULL com_rules when com count was %d\n", alg_p->n_com_sizes)); OPAL_OUTPUT((ompi_coll_tuned_stream,"attempt to free NULL com_rules when com count was %d\n", alg_p->n_com_sizes));
} }
else { else {
/* ok, memory exists for the com rules so free their message rules first */ /* ok, memory exists for the com rules so free their message rules first */
for (i=0;i<alg_p->n_com_sizes;i++) { for (i=0;i<alg_p->n_com_sizes;i++) {
com_p = &(alg_p->com_rules[i]); com_p = &(alg_p->com_rules[i]);
ompi_coll_tuned_free_msg_rules_in_com_rule (com_p); ompi_coll_tuned_free_msg_rules_in_com_rule (com_p);
}
/* we are now free to free the com rules themselives */
free (alg_p->com_rules);
alg_p->com_rules = (ompi_coll_com_rule_t*) NULL;
} }
/* we are now free to free the com rules themselives */
free (alg_p->com_rules);
alg_p->com_rules = (ompi_coll_com_rule_t*) NULL;
}
} /* if we have msg rules to free as well */ } /* if we have msg rules to free as well */
return (rc); return (rc);
} }
int ompi_coll_tuned_free_all_rules (ompi_coll_alg_rule_t* alg_p, int n_algs) int ompi_coll_tuned_free_all_rules (ompi_coll_alg_rule_t* alg_p, int n_algs)
{ {
int i; int i;
int rc = 0; int rc = 0;
for(i=0;i<n_algs;i++) { for(i=0;i<n_algs;i++) {
rc += ompi_coll_tuned_free_coms_in_alg_rule (&(alg_p[i])); rc += ompi_coll_tuned_free_coms_in_alg_rule (&(alg_p[i]));
} }
free (alg_p); free (alg_p);
return (rc); return (rc);
} }
@ -296,48 +295,48 @@ int ompi_coll_tuned_free_all_rules (ompi_coll_alg_rule_t* alg_p, int n_algs)
*/ */
ompi_coll_com_rule_t* ompi_coll_tuned_get_com_rule_ptr (ompi_coll_alg_rule_t* rules, int alg_id, int mpi_comsize) ompi_coll_com_rule_t* ompi_coll_tuned_get_com_rule_ptr (ompi_coll_alg_rule_t* rules, int alg_id, int mpi_comsize)
{ {
ompi_coll_alg_rule_t* alg_p = (ompi_coll_alg_rule_t*) NULL; ompi_coll_alg_rule_t* alg_p = (ompi_coll_alg_rule_t*) NULL;
ompi_coll_com_rule_t* com_p = (ompi_coll_com_rule_t*) NULL; ompi_coll_com_rule_t* com_p = (ompi_coll_com_rule_t*) NULL;
ompi_coll_com_rule_t* best_com_p = (ompi_coll_com_rule_t*) NULL; ompi_coll_com_rule_t* best_com_p = (ompi_coll_com_rule_t*) NULL;
int i, best; int i, best;
if (!rules) { /* no rule base no resulting com rule */ if (!rules) { /* no rule base no resulting com rule */
return ((ompi_coll_com_rule_t*)NULL); return ((ompi_coll_com_rule_t*)NULL);
} }
alg_p = &(rules[alg_id]); /* get the algorithm rule pointer */ alg_p = &(rules[alg_id]); /* get the algorithm rule pointer */
if (!alg_p->n_com_sizes) { /* check for count of communicator sizes */ if (!alg_p->n_com_sizes) { /* check for count of communicator sizes */
return ((ompi_coll_com_rule_t*)NULL); /* no com sizes so no rule */ return ((ompi_coll_com_rule_t*)NULL); /* no com sizes so no rule */
} }
/* ok have some com sizes, now to find the one closest to my mpi_comsize */ /* ok have some com sizes, now to find the one closest to my mpi_comsize */
/* make a copy of the first com rule */ /* make a copy of the first com rule */
best_com_p = com_p = alg_p->com_rules; best_com_p = com_p = alg_p->com_rules;
i = best = 0; i = best = 0;
while (i<alg_p->n_com_sizes) { while (i<alg_p->n_com_sizes) {
/* OPAL_OUTPUT((ompi_coll_tuned_stream,"checking comsize %d against alg_id %d com_id %d index %d com_size %d", */ /* OPAL_OUTPUT((ompi_coll_tuned_stream,"checking comsize %d against alg_id %d com_id %d index %d com_size %d", */
/* mpi_comsize, com_p->alg_rule_id, com_p->com_rule_id, i, com_p->mpi_comsize)); */ /* mpi_comsize, com_p->alg_rule_id, com_p->com_rule_id, i, com_p->mpi_comsize)); */
if (com_p->mpi_comsize <= mpi_comsize) { if (com_p->mpi_comsize <= mpi_comsize) {
best = i; best = i;
best_com_p = com_p; best_com_p = com_p;
/* OPAL_OUTPUT((ompi_coll_tuned_stream(":ok\n")); */ /* OPAL_OUTPUT((ompi_coll_tuned_stream(":ok\n")); */
} }
else { else {
/* OPAL_OUTPUT((ompi_coll_tuned_stream(":nop\n")); */ /* OPAL_OUTPUT((ompi_coll_tuned_stream(":nop\n")); */
break; break;
} }
/* go to the next entry */ /* go to the next entry */
com_p++; com_p++;
i++; i++;
} }
OPAL_OUTPUT((ompi_coll_tuned_stream,"Selected the following com rule id %d\n", best_com_p->com_rule_id)); OPAL_OUTPUT((ompi_coll_tuned_stream,"Selected the following com rule id %d\n", best_com_p->com_rule_id));
ompi_coll_tuned_dump_com_rule (best_com_p); ompi_coll_tuned_dump_com_rule (best_com_p);
return (best_com_p); return (best_com_p);
} }
/* /*
@ -356,61 +355,61 @@ ompi_coll_com_rule_t* ompi_coll_tuned_get_com_rule_ptr (ompi_coll_alg_rule_t* ru
*/ */
int ompi_coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rule, int mpi_msgsize, int *result_topo_faninout, int ompi_coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rule, int mpi_msgsize, int *result_topo_faninout,
int* result_segsize) int* result_segsize)
{ {
ompi_coll_msg_rule_t* msg_p = (ompi_coll_msg_rule_t*) NULL; ompi_coll_msg_rule_t* msg_p = (ompi_coll_msg_rule_t*) NULL;
ompi_coll_msg_rule_t* best_msg_p = (ompi_coll_msg_rule_t*) NULL; ompi_coll_msg_rule_t* best_msg_p = (ompi_coll_msg_rule_t*) NULL;
int i, best; int i, best;
if (!base_com_rule) { if (!base_com_rule) {
return (0); return (0);
} }
if (!result_topo_faninout) { if (!result_topo_faninout) {
return (0); return (0);
} }
if (!result_segsize) { if (!result_segsize) {
return (0); return (0);
} }
if (!base_com_rule->n_msg_sizes) { /* check for count of message sizes */ if (!base_com_rule->n_msg_sizes) { /* check for count of message sizes */
return (0); /* no msg sizes so no rule */ return (0); /* no msg sizes so no rule */
} }
/* ok have some msg sizes, now to find the one closest to my mpi_msgsize */ /* ok have some msg sizes, now to find the one closest to my mpi_msgsize */
/* make a copy of the first msg rule */ /* make a copy of the first msg rule */
best_msg_p = msg_p = base_com_rule->msg_rules; best_msg_p = msg_p = base_com_rule->msg_rules;
i = best = 0; i = best = 0;
while (i<base_com_rule->n_msg_sizes) { while (i<base_com_rule->n_msg_sizes) {
/* OPAL_OUTPUT((ompi_coll_tuned_stream,"checking mpi_msgsize %d against com_id %d msg_id %d index %d msg_size %d", */ /* OPAL_OUTPUT((ompi_coll_tuned_stream,"checking mpi_msgsize %d against com_id %d msg_id %d index %d msg_size %d", */
/* mpi_msgsize, msg_p->com_rule_id, msg_p->msg_rule_id, i, msg_p->msg_size)); */ /* mpi_msgsize, msg_p->com_rule_id, msg_p->msg_rule_id, i, msg_p->msg_size)); */
if (msg_p->msg_size <= mpi_msgsize) { if (msg_p->msg_size <= mpi_msgsize) {
best = i; best = i;
best_msg_p = msg_p; best_msg_p = msg_p;
/* OPAL_OUTPUT((ompi_coll_tuned_stream(":ok\n")); */ /* OPAL_OUTPUT((ompi_coll_tuned_stream(":ok\n")); */
} }
else { else {
/* OPAL_OUTPUT((ompi_coll_tuned_stream(":nop\n")); */ /* OPAL_OUTPUT((ompi_coll_tuned_stream(":nop\n")); */
break; break;
} }
/* go to the next entry */ /* go to the next entry */
msg_p++; msg_p++;
i++; i++;
} }
OPAL_OUTPUT((ompi_coll_tuned_stream,"Selected the following msg rule id %d\n", best_msg_p->msg_rule_id)); OPAL_OUTPUT((ompi_coll_tuned_stream,"Selected the following msg rule id %d\n", best_msg_p->msg_rule_id));
ompi_coll_tuned_dump_msg_rule (best_msg_p); ompi_coll_tuned_dump_msg_rule (best_msg_p);
/* return the segment size */ /* return the segment size */
*result_topo_faninout = best_msg_p->result_topo_faninout; *result_topo_faninout = best_msg_p->result_topo_faninout;
/* return the segment size */ /* return the segment size */
*result_segsize = best_msg_p->result_segsize; *result_segsize = best_msg_p->result_segsize;
/* return the algorithm/method to use */ /* return the algorithm/method to use */
return (best_msg_p->result_alg); return (best_msg_p->result_alg);
} }

Просмотреть файл

@ -42,24 +42,24 @@
/* recheck the setting of forced, called on module create (i.e. for each new comm) */ /* recheck the setting of forced, called on module create (i.e. for each new comm) */
int ompi_coll_tuned_forced_getvalues (coll_tuned_force_algorithm_mca_param_indices_t mca_params, int ompi_coll_tuned_forced_getvalues (coll_tuned_force_algorithm_mca_param_indices_t mca_params,
coll_tuned_force_algorithm_params_t *forced_values) coll_tuned_force_algorithm_params_t *forced_values)
{ {
mca_base_param_lookup_int (mca_params.algorithm_param_index, &(forced_values->algorithm)); mca_base_param_lookup_int (mca_params.algorithm_param_index, &(forced_values->algorithm));
mca_base_param_lookup_int (mca_params.segsize_param_index, &(forced_values->segsize)); mca_base_param_lookup_int (mca_params.segsize_param_index, &(forced_values->segsize));
mca_base_param_lookup_int (mca_params.tree_fanout_param_index, &(forced_values->tree_fanout)); mca_base_param_lookup_int (mca_params.tree_fanout_param_index, &(forced_values->tree_fanout));
mca_base_param_lookup_int (mca_params.chain_fanout_param_index, &(forced_values->chain_fanout)); mca_base_param_lookup_int (mca_params.chain_fanout_param_index, &(forced_values->chain_fanout));
return (MPI_SUCCESS); return (MPI_SUCCESS);
} }
/* special version of above just for barrier which only has one option available (at the moment...) */ /* special version of above just for barrier which only has one option available (at the moment...) */
int ompi_coll_tuned_forced_getvalues_barrier (coll_tuned_force_algorithm_mca_param_indices_t mca_params, int ompi_coll_tuned_forced_getvalues_barrier (coll_tuned_force_algorithm_mca_param_indices_t mca_params,
coll_tuned_force_algorithm_params_t *forced_values) coll_tuned_force_algorithm_params_t *forced_values)
{ {
mca_base_param_lookup_int (mca_params.algorithm_param_index, &(forced_values->algorithm)); mca_base_param_lookup_int (mca_params.algorithm_param_index, &(forced_values->algorithm));
return (MPI_SUCCESS); return (MPI_SUCCESS);
} }

Просмотреть файл

@ -61,8 +61,6 @@ int ompi_coll_tuned_forced_getvalues (coll_tuned_force_algorithm_mca_param_indic
int ompi_coll_tuned_forced_getvalues_barrier (coll_tuned_force_algorithm_mca_param_indices_t mca_params, int ompi_coll_tuned_forced_getvalues_barrier (coll_tuned_force_algorithm_mca_param_indices_t mca_params,
coll_tuned_force_algorithm_params_t *forced_values); coll_tuned_force_algorithm_params_t *forced_values);
#if defined(c_plusplus) || defined(__cplusplus) #if defined(c_plusplus) || defined(__cplusplus)
} }
#endif #endif

Просмотреть файл

@ -45,87 +45,87 @@ static const mca_coll_base_module_1_0_0_t *to_use = NULL;
*/ */
static const mca_coll_base_module_1_0_0_t intra_fixed = { static const mca_coll_base_module_1_0_0_t intra_fixed = {
/* Initialization / finalization functions */ /* Initialization / finalization functions */
ompi_coll_tuned_module_init, ompi_coll_tuned_module_init,
ompi_coll_tuned_module_finalize, ompi_coll_tuned_module_finalize,
/* Collective function pointers */ /* Collective function pointers */
/* ompi_coll_tuned_allgather_intra_dec_fixed, */ /* ompi_coll_tuned_allgather_intra_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_allgatherv_intra_dec_fixed, */ /* ompi_coll_tuned_allgatherv_intra_dec_fixed, */
NULL, NULL,
ompi_coll_tuned_allreduce_intra_dec_fixed, ompi_coll_tuned_allreduce_intra_dec_fixed,
/* NULL, */ /* NULL, */
ompi_coll_tuned_alltoall_intra_dec_fixed, ompi_coll_tuned_alltoall_intra_dec_fixed,
/* NULL, */ /* NULL, */
/* ompi_coll_tuned_alltoallv_intra_dec_fixed, */ /* ompi_coll_tuned_alltoallv_intra_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_alltoallw_intra_dec_fixed, */ /* ompi_coll_tuned_alltoallw_intra_dec_fixed, */
NULL, NULL,
ompi_coll_tuned_barrier_intra_dec_fixed, ompi_coll_tuned_barrier_intra_dec_fixed,
/* NULL, */ /* NULL, */
ompi_coll_tuned_bcast_intra_dec_fixed, ompi_coll_tuned_bcast_intra_dec_fixed,
/* NULL, */ /* NULL, */
/* ompi_coll_tuned_exscan_intra_dec_fixed, */ /* ompi_coll_tuned_exscan_intra_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_gather_intra_dec_fixed, */ /* ompi_coll_tuned_gather_intra_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_gatherv_intra_dec_fixed, */ /* ompi_coll_tuned_gatherv_intra_dec_fixed, */
NULL, NULL,
ompi_coll_tuned_reduce_intra_dec_fixed, ompi_coll_tuned_reduce_intra_dec_fixed,
/* NULL, */ /* NULL, */
/* ompi_coll_tuned_reduce_scatter_intra_dec_fixed, */ /* ompi_coll_tuned_reduce_scatter_intra_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_scan_intra_dec_fixed, */ /* ompi_coll_tuned_scan_intra_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_scatter_intra_dec_fixed, */ /* ompi_coll_tuned_scatter_intra_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_scatterv_intra_dec_fixed */ /* ompi_coll_tuned_scatterv_intra_dec_fixed */
NULL NULL
}; };
static const mca_coll_base_module_1_0_0_t intra_dynamic = { static const mca_coll_base_module_1_0_0_t intra_dynamic = {
/* Initialization / finalization functions */ /* Initialization / finalization functions */
ompi_coll_tuned_module_init, ompi_coll_tuned_module_init,
ompi_coll_tuned_module_finalize, ompi_coll_tuned_module_finalize,
/* Collective function pointers */ /* Collective function pointers */
/* ompi_coll_tuned_allgather_intra_dec_dynamic, */ /* ompi_coll_tuned_allgather_intra_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_allgatherv_intra_dec_dynamic, */ /* ompi_coll_tuned_allgatherv_intra_dec_dynamic, */
NULL, NULL,
ompi_coll_tuned_allreduce_intra_dec_dynamic, ompi_coll_tuned_allreduce_intra_dec_dynamic,
/* NULL, */ /* NULL, */
ompi_coll_tuned_alltoall_intra_dec_dynamic, ompi_coll_tuned_alltoall_intra_dec_dynamic,
/* NULL, */ /* NULL, */
/* ompi_coll_tuned_alltoallv_intra_dec_dynamic, */ /* ompi_coll_tuned_alltoallv_intra_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_alltoallw_intra_dec_dynamic, */ /* ompi_coll_tuned_alltoallw_intra_dec_dynamic, */
NULL, NULL,
ompi_coll_tuned_barrier_intra_dec_dynamic, ompi_coll_tuned_barrier_intra_dec_dynamic,
/* NULL, */ /* NULL, */
ompi_coll_tuned_bcast_intra_dec_dynamic, ompi_coll_tuned_bcast_intra_dec_dynamic,
/* NULL, */ /* NULL, */
/* ompi_coll_tuned_exscan_intra_dec_dynamic, */ /* ompi_coll_tuned_exscan_intra_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_gather_intra_dec_dynamic, */ /* ompi_coll_tuned_gather_intra_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_gatherv_intra_dec_dynamic, */ /* ompi_coll_tuned_gatherv_intra_dec_dynamic, */
NULL, NULL,
ompi_coll_tuned_reduce_intra_dec_dynamic, ompi_coll_tuned_reduce_intra_dec_dynamic,
/* NULL, */ /* NULL, */
/* ompi_coll_tuned_reduce_scatter_intra_dec_dynamic, */ /* ompi_coll_tuned_reduce_scatter_intra_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_scan_intra_dec_dynamic, */ /* ompi_coll_tuned_scan_intra_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_scatter_intra_dec_dynamic, */ /* ompi_coll_tuned_scatter_intra_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_scatterv_intra_dec_dynamic */ /* ompi_coll_tuned_scatterv_intra_dec_dynamic */
NULL NULL
}; };
@ -137,87 +137,87 @@ static const mca_coll_base_module_1_0_0_t intra_dynamic = {
*/ */
static const mca_coll_base_module_1_0_0_t inter_fixed = { static const mca_coll_base_module_1_0_0_t inter_fixed = {
/* Initialization / finalization functions */ /* Initialization / finalization functions */
ompi_coll_tuned_module_init, ompi_coll_tuned_module_init,
ompi_coll_tuned_module_finalize, ompi_coll_tuned_module_finalize,
/* Collective function pointers */ /* Collective function pointers */
/* ompi_coll_tuned_allgather_inter_dec_fixed, */ /* ompi_coll_tuned_allgather_inter_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_allgatherv_inter_dec_fixed, */ /* ompi_coll_tuned_allgatherv_inter_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_allreduce_inter_dec_fixed, */ /* ompi_coll_tuned_allreduce_inter_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_alltoall_inter_dec_fixed, */ /* ompi_coll_tuned_alltoall_inter_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_alltoallv_inter_dec_fixed, */ /* ompi_coll_tuned_alltoallv_inter_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_alltoallw_inter_dec_fixed, */ /* ompi_coll_tuned_alltoallw_inter_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_barrier_inter_dec_fixed, */ /* ompi_coll_tuned_barrier_inter_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_bcast_inter_dec_fixed, */ /* ompi_coll_tuned_bcast_inter_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_exscan_inter_dec_fixed, */ /* ompi_coll_tuned_exscan_inter_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_gather_inter_dec_fixed, */ /* ompi_coll_tuned_gather_inter_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_gatherv_inter_dec_fixed, */ /* ompi_coll_tuned_gatherv_inter_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_reduce_inter_dec_fixed, */ /* ompi_coll_tuned_reduce_inter_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_reduce_scatter_inter_dec_fixed, */ /* ompi_coll_tuned_reduce_scatter_inter_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_scan_inter_dec_fixed, */ /* ompi_coll_tuned_scan_inter_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_scatter_inter_dec_fixed, */ /* ompi_coll_tuned_scatter_inter_dec_fixed, */
NULL, NULL,
/* ompi_coll_tuned_scatterv_inter_dec_fixed */ /* ompi_coll_tuned_scatterv_inter_dec_fixed */
NULL NULL
}; };
static const mca_coll_base_module_1_0_0_t inter_dynamic = { static const mca_coll_base_module_1_0_0_t inter_dynamic = {
/* Initialization / finalization functions */ /* Initialization / finalization functions */
ompi_coll_tuned_module_init, ompi_coll_tuned_module_init,
ompi_coll_tuned_module_finalize, ompi_coll_tuned_module_finalize,
/* Collective function pointers */ /* Collective function pointers */
/* ompi_coll_tuned_allgather_inter_dec_dynamic, */ /* ompi_coll_tuned_allgather_inter_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_allgatherv_inter_dec_dynamic, */ /* ompi_coll_tuned_allgatherv_inter_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_allreduce_inter_dec_dynamic, */ /* ompi_coll_tuned_allreduce_inter_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_alltoall_inter_dec_dynamic, */ /* ompi_coll_tuned_alltoall_inter_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_alltoallv_inter_dec_dynamic, */ /* ompi_coll_tuned_alltoallv_inter_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_alltoallw_inter_dec_dynamic, */ /* ompi_coll_tuned_alltoallw_inter_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_barrier_inter_dec_dynamic, */ /* ompi_coll_tuned_barrier_inter_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_bcast_inter_dec_dynamic, */ /* ompi_coll_tuned_bcast_inter_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_exscan_inter_dec_dynamic, */ /* ompi_coll_tuned_exscan_inter_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_gather_inter_dec_dynamic, */ /* ompi_coll_tuned_gather_inter_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_gatherv_inter_dec_dynamic, */ /* ompi_coll_tuned_gatherv_inter_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_reduce_inter_dec_dynamic, */ /* ompi_coll_tuned_reduce_inter_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_reduce_scatter_inter_dec_dynamic, */ /* ompi_coll_tuned_reduce_scatter_inter_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_scan_inter_dec_dynamic, */ /* ompi_coll_tuned_scan_inter_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_scatter_inter_dec_dynamic, */ /* ompi_coll_tuned_scatter_inter_dec_dynamic, */
NULL, NULL,
/* ompi_coll_tuned_scatterv_inter_dec_dynamic */ /* ompi_coll_tuned_scatterv_inter_dec_dynamic */
NULL NULL
}; };
@ -233,7 +233,7 @@ static const mca_coll_base_module_1_0_0_t inter_dynamic = {
* required level of thread support. * required level of thread support.
*/ */
int ompi_coll_tuned_init_query(bool enable_progress_threads, int ompi_coll_tuned_init_query(bool enable_progress_threads,
bool enable_mpi_threads) bool enable_mpi_threads)
{ {
/* Nothing to do */ /* Nothing to do */
@ -248,38 +248,38 @@ int ompi_coll_tuned_init_query(bool enable_progress_threads,
*/ */
const mca_coll_base_module_1_0_0_t * const mca_coll_base_module_1_0_0_t *
ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority, ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority,
struct mca_coll_base_comm_t **data) struct mca_coll_base_comm_t **data)
{ {
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:module_tuned query called")); OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:module_tuned query called"));
*priority = ompi_coll_tuned_priority; *priority = ompi_coll_tuned_priority;
/* /*
* Choose whether to use [intra|inter] decision functions * Choose whether to use [intra|inter] decision functions
* and if using fixed OR dynamic rule sets. * and if using fixed OR dynamic rule sets.
* Right now you cannot mix them, maybe later on it can be changed * Right now you cannot mix them, maybe later on it can be changed
* but this would probably add an extra if and funct call to the path * but this would probably add an extra if and funct call to the path
* *
*/ */
if (OMPI_COMM_IS_INTER(comm)) { if (OMPI_COMM_IS_INTER(comm)) {
if (ompi_coll_tuned_use_dynamic_rules) { if (ompi_coll_tuned_use_dynamic_rules) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using inter_dynamic")); OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using inter_dynamic"));
to_use = &inter_dynamic; to_use = &inter_dynamic;
} else { } else {
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using inter_fixed")); OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using inter_fixed"));
to_use = &inter_fixed; to_use = &inter_fixed;
}
} else { /* is an intra comm */
if (ompi_coll_tuned_use_dynamic_rules) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using intra_dynamic"));
to_use = &intra_dynamic;
} else {
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using intra_fixed"));
to_use = &intra_fixed;
}
} }
} else { /* is an intra comm */ return to_use;
if (ompi_coll_tuned_use_dynamic_rules) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using intra_dynamic"));
to_use = &intra_dynamic;
} else {
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using intra_fixed"));
to_use = &intra_fixed;
}
}
return to_use;
} }
@ -289,199 +289,199 @@ ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority,
const struct mca_coll_base_module_1_0_0_t * const struct mca_coll_base_module_1_0_0_t *
ompi_coll_tuned_module_init(struct ompi_communicator_t *comm) ompi_coll_tuned_module_init(struct ompi_communicator_t *comm)
{ {
int size, rank; int size, rank;
struct mca_coll_base_comm_t *data; struct mca_coll_base_comm_t *data;
/* fanout parameters */ /* fanout parameters */
int rc=0; int rc=0;
int i; int i;
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init called.")); OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init called."));
/* This routine will become more complex and might have to be */ /* This routine will become more complex and might have to be */
/* broken into more sections/function calls */ /* broken into more sections/function calls */
/* Order of operations: /* Order of operations:
* alloc memory for nb reqs (in case we fall through) * alloc memory for nb reqs (in case we fall through)
* add decision rules if using dynamic rules * add decision rules if using dynamic rules
* compact rules using communicator size info etc * compact rules using communicator size info etc
* build first guess cached topologies (might depend on the rules from above) * build first guess cached topologies (might depend on the rules from above)
* *
* then attach all to the communicator and return base module funct ptrs * then attach all to the communicator and return base module funct ptrs
*/ */
/* Allocate the data that hangs off the communicator */ /* Allocate the data that hangs off the communicator */
if (OMPI_COMM_IS_INTER(comm)) { if (OMPI_COMM_IS_INTER(comm)) {
size = ompi_comm_remote_size(comm); size = ompi_comm_remote_size(comm);
} else { } else {
size = ompi_comm_size(comm); size = ompi_comm_size(comm);
}
/*
* we still malloc data as it is used by the TUNED modules
* if we don't allocate it and fall back to a BASIC module routine then confuses debuggers
* we place any special info after the default data
*
* BUT on very large systems we might not be able to allocate all this memory so
* we do check a MCA parameter to see if if we should allocate this memory
*
* The default is set very high
*
*/
/* if we within the memory/size limit, allow preallocated data */
if (size<=ompi_coll_tuned_preallocate_memory_comm_size_limit) {
data = (mca_coll_base_comm_t*)malloc(sizeof(struct mca_coll_base_comm_t) +
(sizeof(ompi_request_t *) * size * 2));
if (NULL == data) {
return NULL;
} }
data->mcct_reqs = (ompi_request_t **) (data + 1);
data->mcct_num_reqs = size * 2;
}
else {
data = (mca_coll_base_comm_t*)malloc(sizeof(struct mca_coll_base_comm_t));
if (NULL == data) {
return NULL;
}
data->mcct_reqs = (ompi_request_t **) NULL;
data->mcct_num_reqs = 0;
}
/* /*
* If using dynamic and you are MPI_COMM_WORLD and you want to use a parameter file.. * we still malloc data as it is used by the TUNED modules
* then this effects how much storage space you need * if we don't allocate it and fall back to a BASIC module routine then confuses debuggers
* (This is a basic version of what will go into V2) * we place any special info after the default data
* *
*/ * BUT on very large systems we might not be able to allocate all this memory so
* we do check a MCA parameter to see if if we should allocate this memory
*
* The default is set very high
*
*/
/* if we within the memory/size limit, allow preallocated data */
size = ompi_comm_size(comm); /* find size so we can (A) decide if to access the file directly */ if (size<=ompi_coll_tuned_preallocate_memory_comm_size_limit) {
/* (B) so we can get our very own customised ompi_coll_com_rule_t ptr */ data = (mca_coll_base_comm_t*)malloc(sizeof(struct mca_coll_base_comm_t) +
/* which only has rules in it for our com size */ (sizeof(ompi_request_t *) * size * 2));
rank = ompi_comm_rank(comm); /* find rank as only MCW:0 opens any tuned conf files */ if (NULL == data) {
/* actually if they are below a threadhold, they all open it */ return NULL;
/* have to build a collective in here.. but just for MCW.. */ }
/* but we have to make sure we have the same rules everywhere :( */ data->mcct_reqs = (ompi_request_t **) (data + 1);
data->mcct_num_reqs = size * 2;
}
else {
data = (mca_coll_base_comm_t*)malloc(sizeof(struct mca_coll_base_comm_t));
/* if using dynamic rules make sure all overrides are NULL before we start override anything accidently */ if (NULL == data) {
if (ompi_coll_tuned_use_dynamic_rules) { return NULL;
}
data->mcct_reqs = (ompi_request_t **) NULL;
data->mcct_num_reqs = 0;
}
/*
* If using dynamic and you are MPI_COMM_WORLD and you want to use a parameter file..
* then this effects how much storage space you need
* (This is a basic version of what will go into V2)
*
*/
size = ompi_comm_size(comm); /* find size so we can (A) decide if to access the file directly */
/* (B) so we can get our very own customised ompi_coll_com_rule_t ptr */
/* which only has rules in it for our com size */
rank = ompi_comm_rank(comm); /* find rank as only MCW:0 opens any tuned conf files */
/* actually if they are below a threadhold, they all open it */
/* have to build a collective in here.. but just for MCW.. */
/* but we have to make sure we have the same rules everywhere :( */
/* if using dynamic rules make sure all overrides are NULL before we start override anything accidently */
if (ompi_coll_tuned_use_dynamic_rules) {
/* base rules */ /* base rules */
data->all_base_rules = (ompi_coll_alg_rule_t*) NULL; data->all_base_rules = (ompi_coll_alg_rule_t*) NULL;
/* each collective rule for my com size */ /* each collective rule for my com size */
for (i=0;i<COLLCOUNT;i++) { for (i=0;i<COLLCOUNT;i++) {
data->com_rules[i] = (ompi_coll_com_rule_t*) NULL; data->com_rules[i] = (ompi_coll_com_rule_t*) NULL;
} }
} }
/* next dynamic state, recheck all forced rules as well */ /* next dynamic state, recheck all forced rules as well */
/* warning, we should check to make sure this is really an INTRA comm here... */ /* warning, we should check to make sure this is really an INTRA comm here... */
if (ompi_coll_tuned_use_dynamic_rules) { if (ompi_coll_tuned_use_dynamic_rules) {
ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLREDUCE], &(data->user_forced[ALLREDUCE])); ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLREDUCE], &(data->user_forced[ALLREDUCE]));
ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLTOALL], &(data->user_forced[ALLTOALL])); ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLTOALL], &(data->user_forced[ALLTOALL]));
/* ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLTOALLV], &(data->user_forced[ALLTOALLV])); */ /* ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLTOALLV], &(data->user_forced[ALLTOALLV])); */
ompi_coll_tuned_forced_getvalues_barrier (ompi_coll_tuned_forced_params[BARRIER], &(data->user_forced[BARRIER])); ompi_coll_tuned_forced_getvalues_barrier (ompi_coll_tuned_forced_params[BARRIER], &(data->user_forced[BARRIER]));
ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[BCAST], &(data->user_forced[BCAST])); ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[BCAST], &(data->user_forced[BCAST]));
ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[REDUCE], &(data->user_forced[REDUCE])); ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[REDUCE], &(data->user_forced[REDUCE]));
} }
if (&ompi_mpi_comm_world==comm) { if (&ompi_mpi_comm_world==comm) {
if (ompi_coll_tuned_use_dynamic_rules) { if (ompi_coll_tuned_use_dynamic_rules) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init MCW & Dynamic")); OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init MCW & Dynamic"));
if (ompi_coll_tuned_dynamic_rules_filename) { if (ompi_coll_tuned_dynamic_rules_filename) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Opening [%s]", OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Opening [%s]",
ompi_coll_tuned_dynamic_rules_filename)); ompi_coll_tuned_dynamic_rules_filename));
rc = ompi_coll_tuned_read_rules_config_file (ompi_coll_tuned_dynamic_rules_filename, rc = ompi_coll_tuned_read_rules_config_file (ompi_coll_tuned_dynamic_rules_filename,
&(data->all_base_rules), COLLCOUNT); &(data->all_base_rules), COLLCOUNT);
if (rc>=0) { if (rc>=0) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Read %d valid rules\n", rc)); OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Read %d valid rules\n", rc));
/* at this point we all have a base set of rules */ /* at this point we all have a base set of rules */
/* now we can get our customized communicator sized rule set, for each collective */ /* now we can get our customized communicator sized rule set, for each collective */
for (i=0;i<COLLCOUNT;i++) { for (i=0;i<COLLCOUNT;i++) {
data->com_rules[i] = ompi_coll_tuned_get_com_rule_ptr (data->all_base_rules, i, size); data->com_rules[i] = ompi_coll_tuned_get_com_rule_ptr (data->all_base_rules, i, size);
}
}
else { /* failed to read config file, thus make sure its a NULL... */
data->all_base_rules = (ompi_coll_alg_rule_t*) NULL;
} }
}
else { /* failed to read config file, thus make sure its a NULL... */
data->all_base_rules = (ompi_coll_alg_rule_t*) NULL;
}
} /* end if a config filename exists */ } /* end if a config filename exists */
} /* end if dynamic_rules */ } /* end if dynamic_rules */
} /* end if MCW */ } /* end if MCW */
/* ok, if using dynamic rules, not MCW and we are just any rank and a base set of rules exist.. ref them */ /* ok, if using dynamic rules, not MCW and we are just any rank and a base set of rules exist.. ref them */
/* order of eval is important here, if we are MCW ompi_mpi_comm_world.c_coll_selected_data is NULL still.. */ /* order of eval is important here, if we are MCW ompi_mpi_comm_world.c_coll_selected_data is NULL still.. */
if ((ompi_coll_tuned_use_dynamic_rules)&&(!(&ompi_mpi_comm_world==comm))&& if ((ompi_coll_tuned_use_dynamic_rules)&&(!(&ompi_mpi_comm_world==comm))&&
((ompi_mpi_comm_world.c_coll_selected_data)->all_base_rules)) { ((ompi_mpi_comm_world.c_coll_selected_data)->all_base_rules)) {
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init NOT MCW & Dynamic")); OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init NOT MCW & Dynamic"));
/* this will, erm fail if MCW doesn't exist which it should! */ /* this will, erm fail if MCW doesn't exist which it should! */
data->all_base_rules = (ompi_mpi_comm_world.c_coll_selected_data)->all_base_rules; data->all_base_rules = (ompi_mpi_comm_world.c_coll_selected_data)->all_base_rules;
/* at this point we all have a base set of rules if they exist atall */ /* at this point we all have a base set of rules if they exist atall */
/* now we can get our customized communicator sized rule set, for each collective */ /* now we can get our customized communicator sized rule set, for each collective */
for (i=0;i<COLLCOUNT;i++) { for (i=0;i<COLLCOUNT;i++) {
data->com_rules[i] = ompi_coll_tuned_get_com_rule_ptr (data->all_base_rules, i, size); data->com_rules[i] = ompi_coll_tuned_get_com_rule_ptr (data->all_base_rules, i, size);
} }
} }
/* /*
* now for the cached topo functions * now for the cached topo functions
* guess the initial topologies to use rank 0 as root * guess the initial topologies to use rank 0 as root
*/ */
/* general n fan out tree */ /* general n fan out tree */
data->cached_ntree = ompi_coll_tuned_topo_build_tree (ompi_coll_tuned_init_tree_fanout, comm, 0); data->cached_ntree = ompi_coll_tuned_topo_build_tree (ompi_coll_tuned_init_tree_fanout, comm, 0);
data->cached_ntree_root = 0; data->cached_ntree_root = 0;
data->cached_ntree_fanout = ompi_coll_tuned_init_tree_fanout; data->cached_ntree_fanout = ompi_coll_tuned_init_tree_fanout;
/* binary tree */ /* binary tree */
data->cached_bintree = ompi_coll_tuned_topo_build_tree (2, comm, 0); data->cached_bintree = ompi_coll_tuned_topo_build_tree (2, comm, 0);
data->cached_bintree_root = 0; data->cached_bintree_root = 0;
/* binomial tree */ /* binomial tree */
data->cached_bmtree = ompi_coll_tuned_topo_build_bmtree (comm, 0); data->cached_bmtree = ompi_coll_tuned_topo_build_bmtree (comm, 0);
data->cached_bmtree_root = 0; data->cached_bmtree_root = 0;
/* /*
* chains (fanout followed by pipelines) * chains (fanout followed by pipelines)
* are more difficuilt as the fan out really really depends on message size [sometimes].. * are more difficuilt as the fan out really really depends on message size [sometimes]..
* as size gets larger fan-out gets smaller [usually] * as size gets larger fan-out gets smaller [usually]
* *
* will probably change how we cache this later, for now a midsize * will probably change how we cache this later, for now a midsize
* GEF * GEF
*/ */
data->cached_chain = ompi_coll_tuned_topo_build_chain (ompi_coll_tuned_init_chain_fanout, comm, 0); data->cached_chain = ompi_coll_tuned_topo_build_chain (ompi_coll_tuned_init_chain_fanout, comm, 0);
data->cached_chain_root = 0; data->cached_chain_root = 0;
data->cached_chain_fanout = ompi_coll_tuned_init_chain_fanout; data->cached_chain_fanout = ompi_coll_tuned_init_chain_fanout;
/* standard pipeline */ /* standard pipeline */
data->cached_pipeline = ompi_coll_tuned_topo_build_chain (1, comm, 0); data->cached_pipeline = ompi_coll_tuned_topo_build_chain (1, comm, 0);
data->cached_pipeline_root = 0; data->cached_pipeline_root = 0;
/* All done */ /* All done */
comm->c_coll_selected_data = data; comm->c_coll_selected_data = data;
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Tuned is in use")); OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Tuned is in use"));
return to_use; return to_use;
} }
@ -490,48 +490,48 @@ ompi_coll_tuned_module_init(struct ompi_communicator_t *comm)
*/ */
int ompi_coll_tuned_module_finalize(struct ompi_communicator_t *comm) int ompi_coll_tuned_module_finalize(struct ompi_communicator_t *comm)
{ {
if (NULL == comm->c_coll_selected_module) { if (NULL == comm->c_coll_selected_module) {
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
#if OMPI_ENABLE_DEBUG #if OMPI_ENABLE_DEBUG
/* Reset the reqs to NULL/0 -- they'll be freed as part of freeing /* Reset the reqs to NULL/0 -- they'll be freed as part of freeing
the generel c_coll_selected_data */ the generel c_coll_selected_data */
comm->c_coll_selected_data->mcct_reqs = NULL; comm->c_coll_selected_data->mcct_reqs = NULL;
comm->c_coll_selected_data->mcct_num_reqs = 0; comm->c_coll_selected_data->mcct_num_reqs = 0;
#endif #endif
/* free any cached information that has been allocated */ /* free any cached information that has been allocated */
if (comm->c_coll_selected_data->cached_ntree) { /* destroy general tree if defined */ if (comm->c_coll_selected_data->cached_ntree) { /* destroy general tree if defined */
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_ntree); ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_ntree);
} }
if (comm->c_coll_selected_data->cached_bintree) { /* destroy bintree if defined */ if (comm->c_coll_selected_data->cached_bintree) { /* destroy bintree if defined */
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bintree); ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bintree);
} }
if (comm->c_coll_selected_data->cached_bmtree) { /* destroy bmtree if defined */ if (comm->c_coll_selected_data->cached_bmtree) { /* destroy bmtree if defined */
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bmtree); ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bmtree);
} }
if (comm->c_coll_selected_data->cached_chain) { /* destroy general chain if defined */ if (comm->c_coll_selected_data->cached_chain) { /* destroy general chain if defined */
ompi_coll_tuned_topo_destroy_chain (&comm->c_coll_selected_data->cached_chain); ompi_coll_tuned_topo_destroy_chain (&comm->c_coll_selected_data->cached_chain);
} }
if (comm->c_coll_selected_data->cached_pipeline) { /* destroy pipeline if defined */ if (comm->c_coll_selected_data->cached_pipeline) { /* destroy pipeline if defined */
ompi_coll_tuned_topo_destroy_chain (&comm->c_coll_selected_data->cached_pipeline); ompi_coll_tuned_topo_destroy_chain (&comm->c_coll_selected_data->cached_pipeline);
} }
/* if any algorithm rules are cached on the communicator, only free them if its MCW */ /* if any algorithm rules are cached on the communicator, only free them if its MCW */
/* as this is the only place they are allocated by reading the decision configure file */ /* as this is the only place they are allocated by reading the decision configure file */
if ((ompi_coll_tuned_use_dynamic_rules)&&(&ompi_mpi_comm_world==comm)) { if ((ompi_coll_tuned_use_dynamic_rules)&&(&ompi_mpi_comm_world==comm)) {
if (comm->c_coll_selected_data->all_base_rules) { if (comm->c_coll_selected_data->all_base_rules) {
ompi_coll_tuned_free_all_rules (comm->c_coll_selected_data->all_base_rules, COLLCOUNT); ompi_coll_tuned_free_all_rules (comm->c_coll_selected_data->all_base_rules, COLLCOUNT);
} }
} }
/* if allocated memory free it */ /* if allocated memory free it */
if (comm->c_coll_selected_data) { if (comm->c_coll_selected_data) {
free(comm->c_coll_selected_data); free(comm->c_coll_selected_data);
comm->c_coll_selected_data = NULL; comm->c_coll_selected_data = NULL;
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }

Просмотреть файл

@ -37,9 +37,9 @@
*/ */
int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count, int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count,
ompi_datatype_t* datatype, ompi_op_t* op, ompi_datatype_t* datatype, ompi_op_t* op,
int root, ompi_communicator_t* comm, uint32_t segsize, int root, ompi_communicator_t* comm, uint32_t segsize,
int fanout) int fanout)
{ {
int ret, line, rank, size, i = 0; int ret, line, rank, size, i = 0;
int recvcount, sendcount, prevcount, inbi, previnbi; int recvcount, sendcount, prevcount, inbi, previnbi;
@ -96,10 +96,10 @@ int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count,
} }
realsegsize = segcount * ext; realsegsize = segcount * ext;
/* printf("rank %d root %d count %d \t\t segsize %d typesize %d typeext %d realsegsize %d segcount %d num_segments %d\n", */ /* printf("rank %d root %d count %d \t\t segsize %d typesize %d typeext %d realsegsize %d segcount %d num_segments %d\n", */
/* rank, root, count, segsize, typelng, ext, realsegsize, segcount, num_segments); */ /* rank, root, count, segsize, typelng, ext, realsegsize, segcount, num_segments); */
/* ompi_coll_tuned_topo_dump_chain (chain, rank); */ /* ompi_coll_tuned_topo_dump_chain (chain, rank); */
if (sendbuf != MPI_IN_PLACE) { if (sendbuf != MPI_IN_PLACE) {
@ -111,10 +111,10 @@ int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count,
/* handle special case when size == 1 */ /* handle special case when size == 1 */
if (1 == size ) { if (1 == size ) {
if (sendbuf != MPI_IN_PLACE) { if (sendbuf != MPI_IN_PLACE) {
ompi_ddt_copy_content_same_ddt( datatype, count, (char*)recvbuf, (char*)sendbuf ); ompi_ddt_copy_content_same_ddt( datatype, count, (char*)recvbuf, (char*)sendbuf );
} }
return MPI_SUCCESS; return MPI_SUCCESS;
} }
/* handle non existant recv buffer (i.e. its NULL.. like basic allreduce uses!) */ /* handle non existant recv buffer (i.e. its NULL.. like basic allreduce uses!) */
@ -173,26 +173,26 @@ int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count,
/* check for root might not be needed as it should be checked higher up */ /* check for root might not be needed as it should be checked higher up */
if ((MPI_IN_PLACE==sendbuf)&&(rank==root)) { if ((MPI_IN_PLACE==sendbuf)&&(rank==root)) {
ret = MCA_PML_CALL(irecv(inbuf[inbi], ret = MCA_PML_CALL(irecv(inbuf[inbi],
recvcount,datatype, recvcount,datatype,
chain->chain_next[i], chain->chain_next[i],
MCA_COLL_BASE_TAG_REDUCE, MCA_COLL_BASE_TAG_REDUCE,
comm, &reqs[inbi])); comm, &reqs[inbi]));
if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
} else { } else {
ret = MCA_PML_CALL(irecv(accumbuf+segindex*realsegsize, ret = MCA_PML_CALL(irecv(accumbuf+segindex*realsegsize,
recvcount,datatype, recvcount,datatype,
chain->chain_next[i], chain->chain_next[i],
MCA_COLL_BASE_TAG_REDUCE, MCA_COLL_BASE_TAG_REDUCE,
comm, &reqs[inbi])); comm, &reqs[inbi]));
if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
} }
} /* if first segment */ } /* if first segment */
else { /* perform a irecv into the standard inbuf */ else { /* perform a irecv into the standard inbuf */
ret = MCA_PML_CALL(irecv(inbuf[inbi],recvcount,datatype, ret = MCA_PML_CALL(irecv(inbuf[inbi],recvcount,datatype,
chain->chain_next[i], chain->chain_next[i],
MCA_COLL_BASE_TAG_REDUCE, MCA_COLL_BASE_TAG_REDUCE,
comm, &reqs[inbi])); comm, &reqs[inbi]));
if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; } if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
} }
} }
@ -255,11 +255,11 @@ int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count,
} /* end of for each segment */ } /* end of for each segment */
/* clean up */ /* clean up */
/* if (inbuf!=NULL) { */ /* if (inbuf!=NULL) { */
if (inbuf[0] != NULL) free(inbuf[0]); if (inbuf[0] != NULL) free(inbuf[0]);
if (inbuf[1] != NULL) free(inbuf[1]); if (inbuf[1] != NULL) free(inbuf[1]);
if (allocedaccumbuf) free(accumbuf); if (allocedaccumbuf) free(accumbuf);
/* } */ /* } */
} }
/* leaf nodes */ /* leaf nodes */
@ -280,19 +280,19 @@ int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count,
/* error handler */ /* error handler */
error_hndl: error_hndl:
OPAL_OUTPUT (( ompi_coll_tuned_stream, "ERROR_HNDL: node %d file %s line %d error %d\n", rank, __FILE__, line, ret )); OPAL_OUTPUT (( ompi_coll_tuned_stream, "ERROR_HNDL: node %d file %s line %d error %d\n", rank, __FILE__, line, ret ));
/* if( inbuf != NULL ) { */ /* if( inbuf != NULL ) { */
if( inbuf[0] != NULL ) free(inbuf[0]); if( inbuf[0] != NULL ) free(inbuf[0]);
if( inbuf[1] != NULL ) free(inbuf[1]); if( inbuf[1] != NULL ) free(inbuf[1]);
if (allocedaccumbuf) free(accumbuf); if (allocedaccumbuf) free(accumbuf);
/* } */ /* } */
return ret; return ret;
} }
int ompi_coll_tuned_reduce_intra_pipeline( void *sendbuf, void *recvbuf, int ompi_coll_tuned_reduce_intra_pipeline( void *sendbuf, void *recvbuf,
int count, ompi_datatype_t* datatype, int count, ompi_datatype_t* datatype,
ompi_op_t* op, int root, ompi_op_t* op, int root,
ompi_communicator_t* comm, uint32_t segsize ) ompi_communicator_t* comm, uint32_t segsize )
{ {
int rank; int rank;
@ -301,8 +301,8 @@ int ompi_coll_tuned_reduce_intra_pipeline( void *sendbuf, void *recvbuf,
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_pipeline rank %d ss %5d", rank, segsize)); OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_pipeline rank %d ss %5d", rank, segsize));
return ompi_coll_tuned_reduce_intra_chain( sendbuf,recvbuf, count, return ompi_coll_tuned_reduce_intra_chain( sendbuf,recvbuf, count,
datatype, op, root, comm, datatype, op, root, comm,
segsize, 1 ); segsize, 1 );
} }
@ -329,9 +329,9 @@ int ompi_coll_tuned_reduce_intra_pipeline( void *sendbuf, void *recvbuf,
*/ */
int int
ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count, ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_datatype_t *dtype,
struct ompi_op_t *op, struct ompi_op_t *op,
int root, struct ompi_communicator_t *comm) int root, struct ompi_communicator_t *comm)
{ {
int i, rank, err, size; int i, rank, err, size;
ptrdiff_t true_lb, true_extent, lb, extent; ptrdiff_t true_lb, true_extent, lb, extent;
@ -356,8 +356,8 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
return err; return err;
} }
/* see discussion in ompi_coll_basic_reduce_lin_intra about extent and true extend */ /* see discussion in ompi_coll_basic_reduce_lin_intra about extent and true extend */
/* for reducing buffer allocation lengths.... */ /* for reducing buffer allocation lengths.... */
ompi_ddt_get_extent(dtype, &lb, &extent); ompi_ddt_get_extent(dtype, &lb, &extent);
ompi_ddt_get_true_extent(dtype, &true_lb, &true_extent); ompi_ddt_get_true_extent(dtype, &true_lb, &true_extent);
@ -449,88 +449,85 @@ int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_m
int rc; int rc;
int max_alg = 3; int max_alg = 3;
ompi_coll_tuned_forced_max_algorithms[REDUCE] = max_alg; ompi_coll_tuned_forced_max_algorithms[REDUCE] = max_alg;
rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version, rc = mca_base_param_reg_int (&mca_coll_tuned_component.super.collm_version,
"reduce_algorithm_count", "reduce_algorithm_count",
"Number of reduce algorithms available", "Number of reduce algorithms available",
false, true, max_alg, NULL); false, true, max_alg, NULL);
mca_param_indices->algorithm_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, mca_param_indices->algorithm_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"reduce_algorithm", "reduce_algorithm",
"Which reduce algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 chain, 3 pipeline", "Which reduce algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 chain, 3 pipeline",
false, false, 0, NULL); false, false, 0, NULL);
mca_param_indices->segsize_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, mca_param_indices->segsize_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"reduce_algorithm_segmentsize", "reduce_algorithm_segmentsize",
"Segment size in bytes used by default for reduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.", "Segment size in bytes used by default for reduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
false, false, 0, NULL); false, false, 0, NULL);
mca_param_indices->tree_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, mca_param_indices->tree_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"reduce_algorithm_tree_fanout", "reduce_algorithm_tree_fanout",
"Fanout for n-tree used for reduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.", "Fanout for n-tree used for reduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
false, false, false, false,
ompi_coll_tuned_init_tree_fanout, /* get system wide default */ ompi_coll_tuned_init_tree_fanout, /* get system wide default */
NULL); NULL);
mca_param_indices->chain_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version, mca_param_indices->chain_fanout_param_index = mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"reduce_algorithm_chain_fanout", "reduce_algorithm_chain_fanout",
"Fanout for chains used for reduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.", "Fanout for chains used for reduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
false, false, false, false,
ompi_coll_tuned_init_chain_fanout, /* get system wide default */ ompi_coll_tuned_init_chain_fanout, /* get system wide default */
NULL); NULL);
return (MPI_SUCCESS);
return (MPI_SUCCESS);
} }
int ompi_coll_tuned_reduce_intra_do_forced(void *sbuf, void* rbuf, int count, int ompi_coll_tuned_reduce_intra_do_forced(void *sbuf, void* rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_datatype_t *dtype,
struct ompi_op_t *op, int root, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm) struct ompi_communicator_t *comm)
{ {
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced selected algorithm %d", OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced selected algorithm %d",
comm->c_coll_selected_data->user_forced[REDUCE].algorithm)); comm->c_coll_selected_data->user_forced[REDUCE].algorithm));
switch (comm->c_coll_selected_data->user_forced[REDUCE].algorithm) { switch (comm->c_coll_selected_data->user_forced[REDUCE].algorithm) {
case (0): return ompi_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, root, comm); case (0): return ompi_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, root, comm);
case (1): return ompi_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, root, comm); case (1): return ompi_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, root, comm);
case (2): return ompi_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype, op, root, comm, case (2): return ompi_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype, op, root, comm,
comm->c_coll_selected_data->user_forced[REDUCE].segsize, comm->c_coll_selected_data->user_forced[REDUCE].segsize,
comm->c_coll_selected_data->user_forced[REDUCE].chain_fanout); comm->c_coll_selected_data->user_forced[REDUCE].chain_fanout);
case (3): return ompi_coll_tuned_reduce_intra_pipeline (sbuf, rbuf, count, dtype, op, root, comm, case (3): return ompi_coll_tuned_reduce_intra_pipeline (sbuf, rbuf, count, dtype, op, root, comm,
comm->c_coll_selected_data->user_forced[REDUCE].segsize); comm->c_coll_selected_data->user_forced[REDUCE].segsize);
default: default:
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
comm->c_coll_selected_data->user_forced[REDUCE].algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE])); comm->c_coll_selected_data->user_forced[REDUCE].algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE]));
return (MPI_ERR_ARG); return (MPI_ERR_ARG);
} /* switch */ } /* switch */
} }
int ompi_coll_tuned_reduce_intra_do_this(void *sbuf, void* rbuf, int count, int ompi_coll_tuned_reduce_intra_do_this(void *sbuf, void* rbuf, int count,
struct ompi_datatype_t *dtype, struct ompi_datatype_t *dtype,
struct ompi_op_t *op, int root, struct ompi_op_t *op, int root,
struct ompi_communicator_t *comm, struct ompi_communicator_t *comm,
int algorithm, int faninout, int segsize) int algorithm, int faninout, int segsize)
{ {
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_this selected algorithm %d topo faninout %d segsize %d", OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_this selected algorithm %d topo faninout %d segsize %d",
algorithm, faninout, segsize)); algorithm, faninout, segsize));
switch (algorithm) { switch (algorithm) {
case (0): return ompi_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, root, comm); case (0): return ompi_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, root, comm);
case (1): return ompi_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, root, comm); case (1): return ompi_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, root, comm);
case (2): return ompi_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype, op, root, comm, case (2): return ompi_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype, op, root, comm,
segsize, faninout); segsize, faninout);
case (3): return ompi_coll_tuned_reduce_intra_pipeline (sbuf, rbuf, count, dtype, op, root, comm, case (3): return ompi_coll_tuned_reduce_intra_pipeline (sbuf, rbuf, count, dtype, op, root, comm,
segsize); segsize);
default: default:
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE])); algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE]));
return (MPI_ERR_ARG); return (MPI_ERR_ARG);
} /* switch */ } /* switch */
} }

Просмотреть файл

@ -68,8 +68,8 @@ static int calculate_num_nodes_up_to_level( int fanout, int level )
ompi_coll_tree_t* ompi_coll_tree_t*
ompi_coll_tuned_topo_build_tree( int fanout, ompi_coll_tuned_topo_build_tree( int fanout,
struct ompi_communicator_t* comm, struct ompi_communicator_t* comm,
int root ) int root )
{ {
int rank, size; int rank, size;
int schild, sparent; int schild, sparent;
@ -187,7 +187,7 @@ int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree )
ompi_coll_tree_t* ompi_coll_tree_t*
ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm, ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
int root ) int root )
{ {
int childs = 0; int childs = 0;
int rank; int rank;
@ -256,8 +256,8 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
ompi_coll_chain_t* ompi_coll_chain_t*
ompi_coll_tuned_topo_build_chain( int fanout, ompi_coll_tuned_topo_build_chain( int fanout,
struct ompi_communicator_t* comm, struct ompi_communicator_t* comm,
int root ) int root )
{ {
int rank, size; int rank, size;
int srank; /* shifted rank */ int srank; /* shifted rank */
@ -428,23 +428,23 @@ int ompi_coll_tuned_topo_destroy_chain( ompi_coll_chain_t** chain )
int ompi_coll_tuned_topo_dump_tree (ompi_coll_tree_t* tree, int rank) int ompi_coll_tuned_topo_dump_tree (ompi_coll_tree_t* tree, int rank)
{ {
int i; int i;
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:topo_dump_tree %1d tree root %d fanout %d BM %1d nextsize %d prev %d", rank, OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:topo_dump_tree %1d tree root %d fanout %d BM %1d nextsize %d prev %d", rank,
tree->tree_root, tree->tree_bmtree, tree->tree_fanout, tree->tree_nextsize, tree->tree_prev)); tree->tree_root, tree->tree_bmtree, tree->tree_fanout, tree->tree_nextsize, tree->tree_prev));
if (tree->tree_nextsize) { if (tree->tree_nextsize) {
for (i=0;i<tree->tree_nextsize;i++) OPAL_OUTPUT((ompi_coll_tuned_stream,"[%1d] %d", i, tree->tree_next[i])); for (i=0;i<tree->tree_nextsize;i++) OPAL_OUTPUT((ompi_coll_tuned_stream,"[%1d] %d", i, tree->tree_next[i]));
} }
return (0); return (0);
} }
int ompi_coll_tuned_topo_dump_chain (ompi_coll_chain_t* chain, int rank) int ompi_coll_tuned_topo_dump_chain (ompi_coll_chain_t* chain, int rank)
{ {
int i; int i;
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:topo_dump_chain %1d chain root %d fanout %d nextsize %d prev %d\n", rank, OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:topo_dump_chain %1d chain root %d fanout %d nextsize %d prev %d\n", rank,
chain->chain_root, chain->chain_numchain, chain->chain_nextsize, chain->chain_prev)); chain->chain_root, chain->chain_numchain, chain->chain_nextsize, chain->chain_prev));
if (chain->chain_nextsize) { if (chain->chain_nextsize) {
for (i=0;i<chain->chain_nextsize;i++) OPAL_OUTPUT((ompi_coll_tuned_stream,"[%1d] %d ", i, chain->chain_next[i])); for (i=0;i<chain->chain_nextsize;i++) OPAL_OUTPUT((ompi_coll_tuned_stream,"[%1d] %d ", i, chain->chain_next[i]));
} }
return (0); return (0);
} }

Просмотреть файл

@ -65,8 +65,6 @@ int ompi_coll_tuned_topo_destroy_chain( ompi_coll_chain_t** chain );
int ompi_coll_tuned_topo_dump_tree (ompi_coll_tree_t* tree, int rank); int ompi_coll_tuned_topo_dump_tree (ompi_coll_tree_t* tree, int rank);
int ompi_coll_tuned_topo_dump_chain (ompi_coll_chain_t* chain, int rank); int ompi_coll_tuned_topo_dump_chain (ompi_coll_chain_t* chain, int rank);
#if defined(c_plusplus) || defined(__cplusplus) #if defined(c_plusplus) || defined(__cplusplus)
} }
#endif #endif

Просмотреть файл

@ -29,16 +29,16 @@
#include "coll_tuned_util.h" #include "coll_tuned_util.h"
int ompi_coll_tuned_sendrecv_actual( void* sendbuf, int scount, ompi_datatype_t* sdatatype, int ompi_coll_tuned_sendrecv_actual( void* sendbuf, int scount, ompi_datatype_t* sdatatype,
int dest, int stag, int dest, int stag,
void* recvbuf, int rcount, ompi_datatype_t* rdatatype, void* recvbuf, int rcount, ompi_datatype_t* rdatatype,
int source, int rtag, int source, int rtag,
struct ompi_communicator_t* comm, struct ompi_communicator_t* comm,
ompi_status_public_t* status ) ompi_status_public_t* status )
{ /* post receive first, then send, then waitall... should be fast (I hope) */ { /* post receive first, then send, then waitall... should be fast (I hope) */
int err, line = 0; int err, line = 0;
ompi_request_t* reqs[2]; ompi_request_t* reqs[2];
ompi_status_public_t statuses[2]; ompi_status_public_t statuses[2];
/* post new irecv */ /* post new irecv */
err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag, comm, &reqs[0])); err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag, comm, &reqs[0]));
@ -68,14 +68,14 @@ ompi_status_public_t statuses[2];
*/ */
int ompi_coll_tuned_sendrecv_actual_localcompleted ( int ompi_coll_tuned_sendrecv_actual_localcompleted (
void* sendbuf, int scount, ompi_datatype_t* sdatatype, int dest, int stag, void* sendbuf, int scount, ompi_datatype_t* sdatatype, int dest, int stag,
void* recvbuf, int rcount, ompi_datatype_t* rdatatype, int source, int rtag, void* recvbuf, int rcount, ompi_datatype_t* rdatatype, int source, int rtag,
struct ompi_communicator_t* comm, ompi_status_public_t* status ) struct ompi_communicator_t* comm, ompi_status_public_t* status )
{ /* post receive first, then [local] sync send, then wait... should be fast (I hope) */ { /* post receive first, then [local] sync send, then wait... should be fast (I hope) */
int err, line = 0; int err, line = 0;
ompi_request_t* req; ompi_request_t* req;
ompi_status_public_t tmpstatus; ompi_status_public_t tmpstatus;
/* post new irecv */ /* post new irecv */
err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag, comm, &req)); err = MCA_PML_CALL(irecv( recvbuf, rcount, rdatatype, source, rtag, comm, &req));
@ -98,3 +98,4 @@ ompi_status_public_t tmpstatus;
OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",__FILE__,line,err)); OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",__FILE__,line,err));
return (err); return (err);
} }

Просмотреть файл

@ -34,55 +34,51 @@ extern "C" {
/* prototypes */ /* prototypes */
int ompi_coll_tuned_sendrecv_actual( void* sendbuf, int scount, ompi_datatype_t* sdatatype, int ompi_coll_tuned_sendrecv_actual( void* sendbuf, int scount, ompi_datatype_t* sdatatype,
int dest, int stag, int dest, int stag,
void* recvbuf, int rcount, ompi_datatype_t* rdatatype, void* recvbuf, int rcount, ompi_datatype_t* rdatatype,
int source, int rtag, int source, int rtag,
struct ompi_communicator_t* comm, struct ompi_communicator_t* comm,
ompi_status_public_t* status ); ompi_status_public_t* status );
/* inline functions */ /* inline functions */
static inline int ompi_coll_tuned_sendrecv( void* sendbuf, int scount, ompi_datatype_t* sdatatype, static inline int ompi_coll_tuned_sendrecv( void* sendbuf, int scount, ompi_datatype_t* sdatatype,
int dest, int stag, int dest, int stag,
void* recvbuf, int rcount, ompi_datatype_t* rdatatype, void* recvbuf, int rcount, ompi_datatype_t* rdatatype,
int source, int rtag, int source, int rtag,
struct ompi_communicator_t* comm, struct ompi_communicator_t* comm,
ompi_status_public_t* status, int myid ) ompi_status_public_t* status, int myid )
{ {
if ((dest==myid)&&(source==myid)) { if ((dest==myid)&&(source==myid)) {
return (int) ompi_ddt_sndrcv(sendbuf, (int32_t) scount, sdatatype, recvbuf, (int32_t) rcount, rdatatype); return (int) ompi_ddt_sndrcv(sendbuf, (int32_t) scount, sdatatype, recvbuf, (int32_t) rcount, rdatatype);
} }
else { return ompi_coll_tuned_sendrecv_actual (sendbuf, scount, sdatatype, dest, stag, recvbuf, rcount, rdatatype,
return ompi_coll_tuned_sendrecv_actual (sendbuf, scount, sdatatype, dest, stag, recvbuf, rcount, rdatatype, source, rtag, comm, status);
source, rtag, comm, status);
}
} }
int ompi_coll_tuned_sendrecv_actual_localcompleted( void* sendbuf, int scount, ompi_datatype_t* sdatatype, int ompi_coll_tuned_sendrecv_actual_localcompleted( void* sendbuf, int scount, ompi_datatype_t* sdatatype,
int dest, int stag, int dest, int stag,
void* recvbuf, int rcount, ompi_datatype_t* rdatatype, void* recvbuf, int rcount, ompi_datatype_t* rdatatype,
int source, int rtag, int source, int rtag,
struct ompi_communicator_t* comm, struct ompi_communicator_t* comm,
ompi_status_public_t* status ); ompi_status_public_t* status );
/* inline functions */ /* inline functions */
static inline int ompi_coll_tuned_sendrecv_localcompleted( void* sendbuf, int scount, ompi_datatype_t* sdatatype, static inline int ompi_coll_tuned_sendrecv_localcompleted( void* sendbuf, int scount, ompi_datatype_t* sdatatype,
int dest, int stag, int dest, int stag,
void* recvbuf, int rcount, ompi_datatype_t* rdatatype, void* recvbuf, int rcount, ompi_datatype_t* rdatatype,
int source, int rtag, int source, int rtag,
struct ompi_communicator_t* comm, struct ompi_communicator_t* comm,
ompi_status_public_t* status, int myid ) ompi_status_public_t* status, int myid )
{ {
if ((dest==myid)&&(source==myid)) { if ((dest==myid)&&(source==myid)) {
return (int) ompi_ddt_sndrcv(sendbuf, (int32_t) scount, sdatatype, recvbuf, (int32_t) rcount, rdatatype); return (int) ompi_ddt_sndrcv(sendbuf, (int32_t) scount, sdatatype, recvbuf, (int32_t) rcount, rdatatype);
} }
else { return ompi_coll_tuned_sendrecv_actual_localcompleted (sendbuf, scount, sdatatype, dest, stag, recvbuf, rcount, rdatatype,
return ompi_coll_tuned_sendrecv_actual_localcompleted (sendbuf, scount, sdatatype, dest, stag, recvbuf, rcount, rdatatype, source, rtag, comm, status);
source, rtag, comm, status);
}
} }
#if defined(c_plusplus) || defined(__cplusplus) #if defined(c_plusplus) || defined(__cplusplus)