From ff2fd0679eb4b31bfd840395d73746612e2670f4 Mon Sep 17 00:00:00 2001 From: Sergey Oblomov Date: Mon, 26 Nov 2018 10:41:33 +0200 Subject: [PATCH] OSHMEM/COLL: optimization on zero-length ops - removed barrier call on zero-length operations Signed-off-by: Sergey Oblomov --- oshmem/mca/scoll/basic/scoll_basic_alltoall.c | 23 ++++++++++-------- .../mca/scoll/basic/scoll_basic_broadcast.c | 7 +++++- oshmem/mca/scoll/basic/scoll_basic_collect.c | 11 ++++++--- oshmem/mca/scoll/basic/scoll_basic_reduce.c | 12 ++++++---- oshmem/mca/scoll/mpi/scoll_mpi_ops.c | 24 ++++++++++++++++--- 5 files changed, 56 insertions(+), 21 deletions(-) diff --git a/oshmem/mca/scoll/basic/scoll_basic_alltoall.c b/oshmem/mca/scoll/basic/scoll_basic_alltoall.c index 82934655ea..6a87e85578 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_alltoall.c +++ b/oshmem/mca/scoll/basic/scoll_basic_alltoall.c @@ -61,17 +61,20 @@ int mca_scoll_basic_alltoall(struct oshmem_group_t *group, return OSHMEM_ERR_BAD_PARAM; } - if (nelems) { - if ((sst == 1) && (dst == 1)) { - rc = a2a_alg_simple(group, target, source, nelems, element_size); - } else { - rc = a2as_alg_simple(group, target, source, dst, sst, nelems, - element_size); - } + /* Do nothing on zero-length request */ + if (OPAL_UNLIKELY(!nelems)) { + return OPAL_SUCCESS; + } - if (rc != OSHMEM_SUCCESS) { - return rc; - } + if ((sst == 1) && (dst == 1)) { + rc = a2a_alg_simple(group, target, source, nelems, element_size); + } else { + rc = a2as_alg_simple(group, target, source, dst, sst, nelems, + element_size); + } + + if (rc != OSHMEM_SUCCESS) { + return rc; } /* quiet is needed because scoll level barrier does not diff --git a/oshmem/mca/scoll/basic/scoll_basic_broadcast.c b/oshmem/mca/scoll/basic/scoll_basic_broadcast.c index 01dd35f73c..66fc848cdc 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_broadcast.c +++ b/oshmem/mca/scoll/basic/scoll_basic_broadcast.c @@ -55,6 +55,11 @@ int mca_scoll_basic_broadcast(struct oshmem_group_t *group, if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) { int i = 0; + /* Do nothing on zero-length request */ + if (OPAL_UNLIKELY(!nlong)) { + return OSHMEM_SUCCESS; + } + if (pSync) { alg = (alg == SCOLL_DEFAULT_ALG ? mca_scoll_basic_param_broadcast_algorithm : alg); @@ -131,7 +136,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group, group->my_pe, pSync[0], PE_root); /* Check if this PE is the root */ - if ((PE_root == group->my_pe) && nlong) { + if (PE_root == group->my_pe) { int pe_cur = 0; SCOLL_VERBOSE(14, diff --git a/oshmem/mca/scoll/basic/scoll_basic_collect.c b/oshmem/mca/scoll/basic/scoll_basic_collect.c index b6f5df65f7..e631a31557 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_collect.c +++ b/oshmem/mca/scoll/basic/scoll_basic_collect.c @@ -66,7 +66,12 @@ int mca_scoll_basic_collect(struct oshmem_group_t *group, if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) { int i = 0; - if (nlong_type && nlong) { + /* Do nothing on zero-length request */ + if (OPAL_UNLIKELY(!nlong)) { + return OPAL_SUCCESS; + } + + if (nlong_type) { alg = (alg == SCOLL_DEFAULT_ALG ? mca_scoll_basic_param_collect_algorithm : alg); switch (alg) { @@ -156,7 +161,7 @@ static int _algorithm_f_central_counter(struct oshmem_group_t *group, group->my_pe); SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]); - if ((PE_root == group->my_pe) && nlong) { + if (PE_root == group->my_pe) { int pe_cur = 0; memcpy((void*) ((unsigned char*) target + 0 * nlong), @@ -543,7 +548,7 @@ static int _algorithm_central_collector(struct oshmem_group_t *group, /* Set own data size */ pSync[0] = (nlong ? (long)nlong : SHMEM_SYNC_READY); - if ((PE_root == group->my_pe) && nlong) { + if (PE_root == group->my_pe) { long value = 0; int pe_cur = 0; long wait_pe_count = 0; diff --git a/oshmem/mca/scoll/basic/scoll_basic_reduce.c b/oshmem/mca/scoll/basic/scoll_basic_reduce.c index 22c64b47e7..b7f6f12310 100644 --- a/oshmem/mca/scoll/basic/scoll_basic_reduce.c +++ b/oshmem/mca/scoll/basic/scoll_basic_reduce.c @@ -78,10 +78,14 @@ int mca_scoll_basic_reduce(struct oshmem_group_t *group, if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) { int i = 0; + /* Do nothing on zero-length request */ + if (OPAL_UNLIKELY(!nlong)) { + return OSHMEM_SUCCESS; + } + if (pSync) { - alg = (nlong ? (alg == SCOLL_DEFAULT_ALG ? - mca_scoll_basic_param_reduce_algorithm : alg) : - SCOLL_ALG_REDUCE_CENTRAL_COUNTER ); + alg = (alg == SCOLL_DEFAULT_ALG ? + mca_scoll_basic_param_reduce_algorithm : alg); switch (alg) { case SCOLL_ALG_REDUCE_CENTRAL_COUNTER: { @@ -186,7 +190,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group, SCOLL_VERBOSE(12, "[#%d] Reduce algorithm: Central Counter", group->my_pe); - if ((PE_root == group->my_pe) && nlong) { + if (PE_root == group->my_pe) { int pe_cur = 0; void *target_cur = NULL; diff --git a/oshmem/mca/scoll/mpi/scoll_mpi_ops.c b/oshmem/mca/scoll/mpi/scoll_mpi_ops.c index df4392acc2..a52a832588 100644 --- a/oshmem/mca/scoll/mpi/scoll_mpi_ops.c +++ b/oshmem/mca/scoll/mpi/scoll_mpi_ops.c @@ -54,6 +54,12 @@ int mca_scoll_mpi_broadcast(struct oshmem_group_t *group, } dtype = &ompi_mpi_char.dt; root = oshmem_proc_group_find_id(group, PE_root); + + /* Do nothing on zero-length request */ + if (OPAL_UNLIKELY(!nlong)) { + return OSHMEM_SUCCESS; + } + /* Open SHMEM specification has the following constrains (page 85): * "If using C/C++, nelems must be of type integer. If you are using Fortran, it must be a * default integer value". And also fortran signature says "INTEGER". @@ -61,7 +67,7 @@ int mca_scoll_mpi_broadcast(struct oshmem_group_t *group, * and considering this contradiction, we cast size_t to int here * in case if the value is less than INT_MAX and fallback to previous module otherwise. */ #ifdef INCOMPATIBLE_SHMEM_OMPI_COLL_APIS - if ((INT_MAX < nlong) || !nlong) { + if (INT_MAX < nlong) { MPI_COLL_VERBOSE(20,"RUNNING FALLBACK BCAST"); PREVIOUS_SCOLL_FN(mpi_module, broadcast, group, PE_root, @@ -104,7 +110,13 @@ int mca_scoll_mpi_collect(struct oshmem_group_t *group, void *sbuf, *rbuf; MPI_COLL_VERBOSE(20,"RUNNING MPI ALLGATHER"); mpi_module = (mca_scoll_mpi_module_t *) group->g_scoll.scoll_collect_module; - if ((nlong_type == true) && nlong) { + + /* Do nothing on zero-length request */ + if (OPAL_UNLIKELY(!nlong)) { + return OSHMEM_SUCCESS; + } + + if (nlong_type == true) { sbuf = (void *) source; rbuf = target; stype = &ompi_mpi_char.dt; @@ -177,6 +189,12 @@ int mca_scoll_mpi_reduce(struct oshmem_group_t *group, dtype = shmem_dtype_to_ompi_dtype(op); h_op = shmem_op_to_ompi_op(op->op); count = nlong/op->dt_size; + + /* Do nothing on zero-length request */ + if (OPAL_UNLIKELY(!nlong)) { + return OSHMEM_SUCCESS; + } + /* Open SHMEM specification has the following constrains (page 85): * "If using C/C++, nelems must be of type integer. If you are using Fortran, it must be a * default integer value". And also fortran signature says "INTEGER". @@ -184,7 +202,7 @@ int mca_scoll_mpi_reduce(struct oshmem_group_t *group, * and considering this contradiction, we cast size_t to int here * in case if the value is less than INT_MAX and fallback to previous module otherwise. */ #ifdef INCOMPATIBLE_SHMEM_OMPI_COLL_APIS - if ((INT_MAX < count) || !nlong) { + if (INT_MAX < count) { MPI_COLL_VERBOSE(20,"RUNNING FALLBACK REDUCE"); PREVIOUS_SCOLL_FN(mpi_module, reduce, group, op,