1
1

OSHMEM: added processing of zero-length collectives

- according spec 1.4, annex C shmem collectives should process
  calls where number of elements is zero independently from pointer
  value
- added zero-count processing - it just call barrier to
  sync ranks

Signed-off-by: Sergey Oblomov <sergeyo@mellanox.com>
Этот коммит содержится в:
Sergey Oblomov 2018-11-23 12:05:19 +02:00
родитель f05ebe82d3
Коммит 9de128afaf
10 изменённых файлов: 56 добавлений и 35 удалений

Просмотреть файл

@ -61,15 +61,17 @@ int mca_scoll_basic_alltoall(struct oshmem_group_t *group,
return OSHMEM_ERR_BAD_PARAM; return OSHMEM_ERR_BAD_PARAM;
} }
if ((sst == 1) && (dst == 1)) { if (nelems) {
rc = a2a_alg_simple(group, target, source, nelems, element_size); if ((sst == 1) && (dst == 1)) {
} else { rc = a2a_alg_simple(group, target, source, nelems, element_size);
rc = a2as_alg_simple(group, target, source, dst, sst, nelems, } else {
element_size); rc = a2as_alg_simple(group, target, source, dst, sst, nelems,
} element_size);
}
if (rc != OSHMEM_SUCCESS) { if (rc != OSHMEM_SUCCESS) {
return rc; return rc;
}
} }
/* quiet is needed because scoll level barrier does not /* quiet is needed because scoll level barrier does not

Просмотреть файл

@ -131,7 +131,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group,
group->my_pe, pSync[0], PE_root); group->my_pe, pSync[0], PE_root);
/* Check if this PE is the root */ /* Check if this PE is the root */
if (PE_root == group->my_pe) { if ((PE_root == group->my_pe) && nlong) {
int pe_cur = 0; int pe_cur = 0;
SCOLL_VERBOSE(14, SCOLL_VERBOSE(14,
@ -192,6 +192,16 @@ static int _algorithm_binomial_tree(struct oshmem_group_t *group,
"[#%d] pSync[0] = %ld root = #%d", "[#%d] pSync[0] = %ld root = #%d",
group->my_pe, pSync[0], PE_root); group->my_pe, pSync[0], PE_root);
if (OPAL_UNLIKELY(!nlong)) {
SCOLL_VERBOSE(14, "[#%d] Wait for operation completion", group->my_pe);
/* wait until root finishes sending data */
rc = BARRIER_FUNC(group,
(pSync + 1),
SCOLL_DEFAULT_ALG);
return rc;
}
vrank = (my_id + group->proc_count - root_id) % group->proc_count; vrank = (my_id + group->proc_count - root_id) % group->proc_count;
hibit = opal_hibit(vrank, dim); hibit = opal_hibit(vrank, dim);

Просмотреть файл

@ -66,7 +66,7 @@ int mca_scoll_basic_collect(struct oshmem_group_t *group,
if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) { if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) {
int i = 0; int i = 0;
if (nlong_type) { if (nlong_type && nlong) {
alg = (alg == SCOLL_DEFAULT_ALG ? alg = (alg == SCOLL_DEFAULT_ALG ?
mca_scoll_basic_param_collect_algorithm : alg); mca_scoll_basic_param_collect_algorithm : alg);
switch (alg) { switch (alg) {
@ -156,7 +156,7 @@ static int _algorithm_f_central_counter(struct oshmem_group_t *group,
group->my_pe); group->my_pe);
SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]); SCOLL_VERBOSE(15, "[#%d] pSync[0] = %ld", group->my_pe, pSync[0]);
if (PE_root == group->my_pe) { if ((PE_root == group->my_pe) && nlong) {
int pe_cur = 0; int pe_cur = 0;
memcpy((void*) ((unsigned char*) target + 0 * nlong), memcpy((void*) ((unsigned char*) target + 0 * nlong),
@ -543,7 +543,7 @@ static int _algorithm_central_collector(struct oshmem_group_t *group,
/* Set own data size */ /* Set own data size */
pSync[0] = (nlong ? (long)nlong : SHMEM_SYNC_READY); pSync[0] = (nlong ? (long)nlong : SHMEM_SYNC_READY);
if (PE_root == group->my_pe) { if ((PE_root == group->my_pe) && nlong) {
long value = 0; long value = 0;
int pe_cur = 0; int pe_cur = 0;
long wait_pe_count = 0; long wait_pe_count = 0;

Просмотреть файл

@ -79,8 +79,9 @@ int mca_scoll_basic_reduce(struct oshmem_group_t *group,
int i = 0; int i = 0;
if (pSync) { if (pSync) {
alg = (alg == SCOLL_DEFAULT_ALG ? alg = (nlong ? (alg == SCOLL_DEFAULT_ALG ?
mca_scoll_basic_param_reduce_algorithm : alg); mca_scoll_basic_param_reduce_algorithm : alg) :
SCOLL_ALG_REDUCE_CENTRAL_COUNTER );
switch (alg) { switch (alg) {
case SCOLL_ALG_REDUCE_CENTRAL_COUNTER: case SCOLL_ALG_REDUCE_CENTRAL_COUNTER:
{ {
@ -185,7 +186,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group,
SCOLL_VERBOSE(12, "[#%d] Reduce algorithm: Central Counter", group->my_pe); SCOLL_VERBOSE(12, "[#%d] Reduce algorithm: Central Counter", group->my_pe);
if (PE_root == group->my_pe) { if ((PE_root == group->my_pe) && nlong) {
int pe_cur = 0; int pe_cur = 0;
void *target_cur = NULL; void *target_cur = NULL;

Просмотреть файл

@ -61,7 +61,7 @@ int mca_scoll_mpi_broadcast(struct oshmem_group_t *group,
* and considering this contradiction, we cast size_t to int here * and considering this contradiction, we cast size_t to int here
* in case if the value is less than INT_MAX and fallback to previous module otherwise. */ * in case if the value is less than INT_MAX and fallback to previous module otherwise. */
#ifdef INCOMPATIBLE_SHMEM_OMPI_COLL_APIS #ifdef INCOMPATIBLE_SHMEM_OMPI_COLL_APIS
if (INT_MAX < nlong) { if ((INT_MAX < nlong) || !nlong) {
MPI_COLL_VERBOSE(20,"RUNNING FALLBACK BCAST"); MPI_COLL_VERBOSE(20,"RUNNING FALLBACK BCAST");
PREVIOUS_SCOLL_FN(mpi_module, broadcast, group, PREVIOUS_SCOLL_FN(mpi_module, broadcast, group,
PE_root, PE_root,
@ -104,7 +104,7 @@ int mca_scoll_mpi_collect(struct oshmem_group_t *group,
void *sbuf, *rbuf; void *sbuf, *rbuf;
MPI_COLL_VERBOSE(20,"RUNNING MPI ALLGATHER"); MPI_COLL_VERBOSE(20,"RUNNING MPI ALLGATHER");
mpi_module = (mca_scoll_mpi_module_t *) group->g_scoll.scoll_collect_module; mpi_module = (mca_scoll_mpi_module_t *) group->g_scoll.scoll_collect_module;
if (nlong_type == true) { if ((nlong_type == true) && nlong) {
sbuf = (void *) source; sbuf = (void *) source;
rbuf = target; rbuf = target;
stype = &ompi_mpi_char.dt; stype = &ompi_mpi_char.dt;
@ -184,7 +184,7 @@ int mca_scoll_mpi_reduce(struct oshmem_group_t *group,
* and considering this contradiction, we cast size_t to int here * and considering this contradiction, we cast size_t to int here
* in case if the value is less than INT_MAX and fallback to previous module otherwise. */ * in case if the value is less than INT_MAX and fallback to previous module otherwise. */
#ifdef INCOMPATIBLE_SHMEM_OMPI_COLL_APIS #ifdef INCOMPATIBLE_SHMEM_OMPI_COLL_APIS
if (INT_MAX < count) { if ((INT_MAX < count) || !nlong) {
MPI_COLL_VERBOSE(20,"RUNNING FALLBACK REDUCE"); MPI_COLL_VERBOSE(20,"RUNNING FALLBACK REDUCE");
PREVIOUS_SCOLL_FN(mpi_module, reduce, group, PREVIOUS_SCOLL_FN(mpi_module, reduce, group,
op, op,

Просмотреть файл

@ -200,6 +200,13 @@ OSHMEM_DECLSPEC int oshmem_shmem_register_params(void);
RUNTIME_CHECK_ERROR("Required address %p is not in symmetric space\n", ((void*)x)); \ RUNTIME_CHECK_ERROR("Required address %p is not in symmetric space\n", ((void*)x)); \
oshmem_shmem_abort(-1); \ oshmem_shmem_abort(-1); \
} }
/* Check if address is in symmetric space or size is zero */
#define RUNTIME_CHECK_ADDR_SIZE(x,s) \
if (OPAL_UNLIKELY((s) && !MCA_MEMHEAP_CALL(is_symmetric_addr((x))))) \
{ \
RUNTIME_CHECK_ERROR("Required address %p is not in symmetric space\n", ((void*)x)); \
oshmem_shmem_abort(-1); \
}
#define RUNTIME_CHECK_WITH_MEMHEAP_SIZE(x) \ #define RUNTIME_CHECK_WITH_MEMHEAP_SIZE(x) \
if (OPAL_UNLIKELY((long)(x) > MCA_MEMHEAP_CALL(size))) \ if (OPAL_UNLIKELY((long)(x) > MCA_MEMHEAP_CALL(size))) \
{ \ { \
@ -212,6 +219,7 @@ OSHMEM_DECLSPEC int oshmem_shmem_register_params(void);
#define RUNTIME_CHECK_INIT() #define RUNTIME_CHECK_INIT()
#define RUNTIME_CHECK_PE(x) #define RUNTIME_CHECK_PE(x)
#define RUNTIME_CHECK_ADDR(x) #define RUNTIME_CHECK_ADDR(x)
#define RUNTIME_CHECK_ADDR_SIZE(x,s)
#define RUNTIME_CHECK_WITH_MEMHEAP_SIZE(x) #define RUNTIME_CHECK_WITH_MEMHEAP_SIZE(x)
#endif /* OSHMEM_PARAM_CHECK */ #endif /* OSHMEM_PARAM_CHECK */

Просмотреть файл

@ -30,7 +30,7 @@ static void _shmem_alltoall(void *target,
int PE_size, int PE_size,
long *pSync); long *pSync);
#define SHMEM_TYPE_ALLTOALL(name, element_size) \ #define SHMEM_TYPE_ALLTOALL(name, element_size) \
void shmem##name(void *target, \ void shmem##name(void *target, \
const void *source, \ const void *source, \
size_t nelems, \ size_t nelems, \
@ -40,15 +40,15 @@ static void _shmem_alltoall(void *target,
long *pSync) \ long *pSync) \
{ \ { \
RUNTIME_CHECK_INIT(); \ RUNTIME_CHECK_INIT(); \
RUNTIME_CHECK_ADDR(target); \ RUNTIME_CHECK_ADDR_SIZE(target, nelems); \
RUNTIME_CHECK_ADDR(source); \ RUNTIME_CHECK_ADDR_SIZE(source, nelems); \
\ \
_shmem_alltoall(target, source, 1, 1, nelems, element_size, \ _shmem_alltoall(target, source, 1, 1, nelems, element_size, \
PE_start, logPE_stride, PE_size, \ PE_start, logPE_stride, PE_size, \
pSync); \ pSync); \
} }
#define SHMEM_TYPE_ALLTOALLS(name, element_size) \ #define SHMEM_TYPE_ALLTOALLS(name, element_size) \
void shmem##name(void *target, \ void shmem##name(void *target, \
const void *source, \ const void *source, \
ptrdiff_t dst, ptrdiff_t sst, \ ptrdiff_t dst, ptrdiff_t sst, \
@ -59,8 +59,8 @@ static void _shmem_alltoall(void *target,
long *pSync) \ long *pSync) \
{ \ { \
RUNTIME_CHECK_INIT(); \ RUNTIME_CHECK_INIT(); \
RUNTIME_CHECK_ADDR(target); \ RUNTIME_CHECK_ADDR_SIZE(target, nelems); \
RUNTIME_CHECK_ADDR(source); \ RUNTIME_CHECK_ADDR_SIZE(source, nelems); \
\ \
_shmem_alltoall(target, source, dst, sst, nelems, element_size, \ _shmem_alltoall(target, source, dst, sst, nelems, element_size, \
PE_start, logPE_stride, PE_size, \ PE_start, logPE_stride, PE_size, \

Просмотреть файл

@ -29,7 +29,7 @@ static void _shmem_broadcast(void *target,
int PE_size, int PE_size,
long *pSync); long *pSync);
#define SHMEM_TYPE_BROADCAST(name, element_size) \ #define SHMEM_TYPE_BROADCAST(name, element_size) \
void shmem##name( void *target, \ void shmem##name( void *target, \
const void *source, \ const void *source, \
size_t nelems, \ size_t nelems, \
@ -40,10 +40,10 @@ static void _shmem_broadcast(void *target,
long *pSync) \ long *pSync) \
{ \ { \
RUNTIME_CHECK_INIT(); \ RUNTIME_CHECK_INIT(); \
RUNTIME_CHECK_ADDR(target); \ RUNTIME_CHECK_ADDR_SIZE(target, nelems); \
RUNTIME_CHECK_ADDR(source); \ RUNTIME_CHECK_ADDR_SIZE(source, nelems); \
\ \
_shmem_broadcast( target, source, nelems * element_size, \ _shmem_broadcast( target, source, nelems * element_size, \
PE_root, PE_start, logPE_stride, PE_size, \ PE_root, PE_start, logPE_stride, PE_size, \
pSync); \ pSync); \
} }

Просмотреть файл

@ -39,10 +39,10 @@ static void _shmem_collect(void *target,
long *pSync) \ long *pSync) \
{ \ { \
RUNTIME_CHECK_INIT(); \ RUNTIME_CHECK_INIT(); \
RUNTIME_CHECK_ADDR(target); \ RUNTIME_CHECK_ADDR_SIZE(target, nelems); \
RUNTIME_CHECK_ADDR(source); \ RUNTIME_CHECK_ADDR_SIZE(source, nelems); \
\ \
_shmem_collect( target, source, nelems * element_size, \ _shmem_collect( target, source, nelems * element_size, \
PE_start, logPE_stride, PE_size, \ PE_start, logPE_stride, PE_size, \
pSync, \ pSync, \
nelems_type); \ nelems_type); \

Просмотреть файл

@ -26,8 +26,8 @@
* object of every PE in the active set. The active set of PEs is defined by the triple PE_start, * object of every PE in the active set. The active set of PEs is defined by the triple PE_start,
* logPE_stride and PE_size. * logPE_stride and PE_size.
*/ */
#define SHMEM_TYPE_REDUCE_OP(name, type_name, type, prefix) \ #define SHMEM_TYPE_REDUCE_OP(name, type_name, type, prefix) \
void prefix##type_name##_##name##_to_all( type *target, \ void prefix##type_name##_##name##_to_all( type *target, \
const type *source, \ const type *source, \
int nreduce, \ int nreduce, \
int PE_start, \ int PE_start, \
@ -40,8 +40,8 @@
oshmem_group_t* group = NULL; \ oshmem_group_t* group = NULL; \
\ \
RUNTIME_CHECK_INIT(); \ RUNTIME_CHECK_INIT(); \
RUNTIME_CHECK_ADDR(target); \ RUNTIME_CHECK_ADDR_SIZE(target, nreduce); \
RUNTIME_CHECK_ADDR(source); \ RUNTIME_CHECK_ADDR_SIZE(source, nreduce); \
\ \
{ \ { \
group = oshmem_proc_group_create_nofail(PE_start, 1<<logPE_stride, PE_size); \ group = oshmem_proc_group_create_nofail(PE_start, 1<<logPE_stride, PE_size); \