OSHMEM/COLL: optimization on zero-length ops
- removed barrier call on zero-length operations Signed-off-by: Sergey Oblomov <sergeyo@mellanox.com> (cherry picked from commit ff2fd0679eb4b31bfd840395d73746612e2670f4)
Этот коммит содержится в:
родитель
dea9cf6b63
Коммит
0a064d8c8d
@ -61,17 +61,20 @@ int mca_scoll_basic_alltoall(struct oshmem_group_t *group,
|
|||||||
return OSHMEM_ERR_BAD_PARAM;
|
return OSHMEM_ERR_BAD_PARAM;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nelems) {
|
/* Do nothing on zero-length request */
|
||||||
if ((sst == 1) && (dst == 1)) {
|
if (OPAL_UNLIKELY(!nelems)) {
|
||||||
rc = a2a_alg_simple(group, target, source, nelems, element_size);
|
return OPAL_SUCCESS;
|
||||||
} else {
|
}
|
||||||
rc = a2as_alg_simple(group, target, source, dst, sst, nelems,
|
|
||||||
element_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rc != OSHMEM_SUCCESS) {
|
if ((sst == 1) && (dst == 1)) {
|
||||||
return rc;
|
rc = a2a_alg_simple(group, target, source, nelems, element_size);
|
||||||
}
|
} else {
|
||||||
|
rc = a2as_alg_simple(group, target, source, dst, sst, nelems,
|
||||||
|
element_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rc != OSHMEM_SUCCESS) {
|
||||||
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* quiet is needed because scoll level barrier does not
|
/* quiet is needed because scoll level barrier does not
|
||||||
|
@ -55,6 +55,11 @@ int mca_scoll_basic_broadcast(struct oshmem_group_t *group,
|
|||||||
if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) {
|
if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) {
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
||||||
|
/* Do nothing on zero-length request */
|
||||||
|
if (OPAL_UNLIKELY(!nlong)) {
|
||||||
|
return OSHMEM_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
if (pSync) {
|
if (pSync) {
|
||||||
alg = (alg == SCOLL_DEFAULT_ALG ?
|
alg = (alg == SCOLL_DEFAULT_ALG ?
|
||||||
mca_scoll_basic_param_broadcast_algorithm : alg);
|
mca_scoll_basic_param_broadcast_algorithm : alg);
|
||||||
@ -131,7 +136,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group,
|
|||||||
group->my_pe, pSync[0], PE_root);
|
group->my_pe, pSync[0], PE_root);
|
||||||
|
|
||||||
/* Check if this PE is the root */
|
/* Check if this PE is the root */
|
||||||
if ((PE_root == group->my_pe) && nlong) {
|
if (PE_root == group->my_pe) {
|
||||||
int pe_cur = 0;
|
int pe_cur = 0;
|
||||||
|
|
||||||
SCOLL_VERBOSE(14,
|
SCOLL_VERBOSE(14,
|
||||||
|
@ -66,7 +66,12 @@ int mca_scoll_basic_collect(struct oshmem_group_t *group,
|
|||||||
if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) {
|
if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) {
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
||||||
if (nlong_type && nlong) {
|
/* Do nothing on zero-length request */
|
||||||
|
if (OPAL_UNLIKELY(!nlong)) {
|
||||||
|
return OPAL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nlong_type) {
|
||||||
alg = (alg == SCOLL_DEFAULT_ALG ?
|
alg = (alg == SCOLL_DEFAULT_ALG ?
|
||||||
mca_scoll_basic_param_collect_algorithm : alg);
|
mca_scoll_basic_param_collect_algorithm : alg);
|
||||||
switch (alg) {
|
switch (alg) {
|
||||||
@ -156,7 +161,7 @@ static int _algorithm_f_central_counter(struct oshmem_group_t *group,
|
|||||||
group->my_pe);
|
group->my_pe);
|
||||||
SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
|
SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
|
||||||
|
|
||||||
if ((PE_root == group->my_pe) && nlong) {
|
if (PE_root == group->my_pe) {
|
||||||
int pe_cur = 0;
|
int pe_cur = 0;
|
||||||
|
|
||||||
memcpy((void*) ((unsigned char*) target + 0 * nlong),
|
memcpy((void*) ((unsigned char*) target + 0 * nlong),
|
||||||
@ -543,7 +548,7 @@ static int _algorithm_central_collector(struct oshmem_group_t *group,
|
|||||||
/* Set own data size */
|
/* Set own data size */
|
||||||
pSync[0] = (nlong ? (long)nlong : SHMEM_SYNC_READY);
|
pSync[0] = (nlong ? (long)nlong : SHMEM_SYNC_READY);
|
||||||
|
|
||||||
if ((PE_root == group->my_pe) && nlong) {
|
if (PE_root == group->my_pe) {
|
||||||
long value = 0;
|
long value = 0;
|
||||||
int pe_cur = 0;
|
int pe_cur = 0;
|
||||||
long wait_pe_count = 0;
|
long wait_pe_count = 0;
|
||||||
|
@ -78,10 +78,14 @@ int mca_scoll_basic_reduce(struct oshmem_group_t *group,
|
|||||||
if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) {
|
if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) {
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
||||||
|
/* Do nothing on zero-length request */
|
||||||
|
if (OPAL_UNLIKELY(!nlong)) {
|
||||||
|
return OSHMEM_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
if (pSync) {
|
if (pSync) {
|
||||||
alg = (nlong ? (alg == SCOLL_DEFAULT_ALG ?
|
alg = (alg == SCOLL_DEFAULT_ALG ?
|
||||||
mca_scoll_basic_param_reduce_algorithm : alg) :
|
mca_scoll_basic_param_reduce_algorithm : alg);
|
||||||
SCOLL_ALG_REDUCE_CENTRAL_COUNTER );
|
|
||||||
switch (alg) {
|
switch (alg) {
|
||||||
case SCOLL_ALG_REDUCE_CENTRAL_COUNTER:
|
case SCOLL_ALG_REDUCE_CENTRAL_COUNTER:
|
||||||
{
|
{
|
||||||
@ -186,7 +190,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group,
|
|||||||
|
|
||||||
SCOLL_VERBOSE(12, "[#%d] Reduce algorithm: Central Counter", group->my_pe);
|
SCOLL_VERBOSE(12, "[#%d] Reduce algorithm: Central Counter", group->my_pe);
|
||||||
|
|
||||||
if ((PE_root == group->my_pe) && nlong) {
|
if (PE_root == group->my_pe) {
|
||||||
int pe_cur = 0;
|
int pe_cur = 0;
|
||||||
void *target_cur = NULL;
|
void *target_cur = NULL;
|
||||||
|
|
||||||
|
@ -54,6 +54,12 @@ int mca_scoll_mpi_broadcast(struct oshmem_group_t *group,
|
|||||||
}
|
}
|
||||||
dtype = &ompi_mpi_char.dt;
|
dtype = &ompi_mpi_char.dt;
|
||||||
root = oshmem_proc_group_find_id(group, PE_root);
|
root = oshmem_proc_group_find_id(group, PE_root);
|
||||||
|
|
||||||
|
/* Do nothing on zero-length request */
|
||||||
|
if (OPAL_UNLIKELY(!nlong)) {
|
||||||
|
return OSHMEM_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
/* Open SHMEM specification has the following constrains (page 85):
|
/* Open SHMEM specification has the following constrains (page 85):
|
||||||
* "If using C/C++, nelems must be of type integer. If you are using Fortran, it must be a
|
* "If using C/C++, nelems must be of type integer. If you are using Fortran, it must be a
|
||||||
* default integer value". And also fortran signature says "INTEGER".
|
* default integer value". And also fortran signature says "INTEGER".
|
||||||
@ -61,7 +67,7 @@ int mca_scoll_mpi_broadcast(struct oshmem_group_t *group,
|
|||||||
* and considering this contradiction, we cast size_t to int here
|
* and considering this contradiction, we cast size_t to int here
|
||||||
* in case if the value is less than INT_MAX and fallback to previous module otherwise. */
|
* in case if the value is less than INT_MAX and fallback to previous module otherwise. */
|
||||||
#ifdef INCOMPATIBLE_SHMEM_OMPI_COLL_APIS
|
#ifdef INCOMPATIBLE_SHMEM_OMPI_COLL_APIS
|
||||||
if ((INT_MAX < nlong) || !nlong) {
|
if (INT_MAX < nlong) {
|
||||||
MPI_COLL_VERBOSE(20,"RUNNING FALLBACK BCAST");
|
MPI_COLL_VERBOSE(20,"RUNNING FALLBACK BCAST");
|
||||||
PREVIOUS_SCOLL_FN(mpi_module, broadcast, group,
|
PREVIOUS_SCOLL_FN(mpi_module, broadcast, group,
|
||||||
PE_root,
|
PE_root,
|
||||||
@ -104,7 +110,13 @@ int mca_scoll_mpi_collect(struct oshmem_group_t *group,
|
|||||||
void *sbuf, *rbuf;
|
void *sbuf, *rbuf;
|
||||||
MPI_COLL_VERBOSE(20,"RUNNING MPI ALLGATHER");
|
MPI_COLL_VERBOSE(20,"RUNNING MPI ALLGATHER");
|
||||||
mpi_module = (mca_scoll_mpi_module_t *) group->g_scoll.scoll_collect_module;
|
mpi_module = (mca_scoll_mpi_module_t *) group->g_scoll.scoll_collect_module;
|
||||||
if ((nlong_type == true) && nlong) {
|
|
||||||
|
/* Do nothing on zero-length request */
|
||||||
|
if (OPAL_UNLIKELY(!nlong)) {
|
||||||
|
return OSHMEM_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nlong_type == true) {
|
||||||
sbuf = (void *) source;
|
sbuf = (void *) source;
|
||||||
rbuf = target;
|
rbuf = target;
|
||||||
stype = &ompi_mpi_char.dt;
|
stype = &ompi_mpi_char.dt;
|
||||||
@ -177,6 +189,12 @@ int mca_scoll_mpi_reduce(struct oshmem_group_t *group,
|
|||||||
dtype = shmem_dtype_to_ompi_dtype(op);
|
dtype = shmem_dtype_to_ompi_dtype(op);
|
||||||
h_op = shmem_op_to_ompi_op(op->op);
|
h_op = shmem_op_to_ompi_op(op->op);
|
||||||
count = nlong/op->dt_size;
|
count = nlong/op->dt_size;
|
||||||
|
|
||||||
|
/* Do nothing on zero-length request */
|
||||||
|
if (OPAL_UNLIKELY(!nlong)) {
|
||||||
|
return OSHMEM_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
/* Open SHMEM specification has the following constrains (page 85):
|
/* Open SHMEM specification has the following constrains (page 85):
|
||||||
* "If using C/C++, nelems must be of type integer. If you are using Fortran, it must be a
|
* "If using C/C++, nelems must be of type integer. If you are using Fortran, it must be a
|
||||||
* default integer value". And also fortran signature says "INTEGER".
|
* default integer value". And also fortran signature says "INTEGER".
|
||||||
@ -184,7 +202,7 @@ int mca_scoll_mpi_reduce(struct oshmem_group_t *group,
|
|||||||
* and considering this contradiction, we cast size_t to int here
|
* and considering this contradiction, we cast size_t to int here
|
||||||
* in case if the value is less than INT_MAX and fallback to previous module otherwise. */
|
* in case if the value is less than INT_MAX and fallback to previous module otherwise. */
|
||||||
#ifdef INCOMPATIBLE_SHMEM_OMPI_COLL_APIS
|
#ifdef INCOMPATIBLE_SHMEM_OMPI_COLL_APIS
|
||||||
if ((INT_MAX < count) || !nlong) {
|
if (INT_MAX < count) {
|
||||||
MPI_COLL_VERBOSE(20,"RUNNING FALLBACK REDUCE");
|
MPI_COLL_VERBOSE(20,"RUNNING FALLBACK REDUCE");
|
||||||
PREVIOUS_SCOLL_FN(mpi_module, reduce, group,
|
PREVIOUS_SCOLL_FN(mpi_module, reduce, group,
|
||||||
op,
|
op,
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user