ad4b33336d
Fixes scoll_basic failures with shmem_verifier, caused by recent changes
in handling of zero-size collectives.
- Check for zero-size length only for fixed size collect (shmem_fcollect),
but not for variable-size collect (shmem_collect)
- Add 'nlong_type' parameter to internal broadcast function, to indicate
whether the 'nlong' parameter is valid on non-root PEs, since it's
used by shmem_collect algorithm. Before this change, some components
assumed it's true (scoll_mpi) while others assumed it's false
(scoll_basic).
- In scoll_basic, if nlong_type==false, do not exit if nlong==0, since
this parameter may not be the same on all PEs.
- In scoll_mpi, fallback to scoll_basic if nlong_type==false, since MPI
requires the 'count' argument of MPI_Bcast to be valid on all ranks.
(Picked from master 939162e
)
Signed-off-by: Yossi Itigin <yosefe@mellanox.com>
111 строки
3.5 KiB
C
111 строки
3.5 KiB
C
/*
|
|
* Copyright (c) 2013-2016 Mellanox Technologies, Inc.
|
|
* All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#ifndef MCA_SCOLL_BASIC_H
|
|
#define MCA_SCOLL_BASIC_H
|
|
|
|
#include "oshmem_config.h"
|
|
|
|
#include "oshmem/mca/mca.h"
|
|
#include "oshmem/mca/scoll/scoll.h"
|
|
#include "oshmem/util/oshmem_util.h"
|
|
|
|
BEGIN_C_DECLS
|
|
|
|
/* These functions (BARRIER_FUNC, BCAST_FUNC) may be called from any basic algorithm.
|
|
* In case of shmem, the implementation of broadcast doesn't require
|
|
* each process to know message size ( just root should know).
|
|
* It differs from other implementations, so it may cause problems if
|
|
* BCAST_FUNC is a callback to another implementation (e.g, fca, hcoll).
|
|
* So we replace a callback (group->g_scoll.scoll_[func])
|
|
* with a corresponding basic function. */
|
|
|
|
#define BARRIER_FUNC mca_scoll_basic_barrier
|
|
#define BCAST_FUNC mca_scoll_basic_broadcast
|
|
|
|
/* Globally exported variables */
|
|
|
|
OSHMEM_MODULE_DECLSPEC extern mca_scoll_base_component_1_0_0_t
|
|
mca_scoll_basic_component;
|
|
|
|
extern int mca_scoll_basic_priority_param;
|
|
OSHMEM_DECLSPEC extern int mca_scoll_basic_param_barrier_algorithm;
|
|
extern int mca_scoll_basic_param_broadcast_algorithm;
|
|
extern int mca_scoll_basic_param_collect_algorithm;
|
|
extern int mca_scoll_basic_param_reduce_algorithm;
|
|
|
|
/* API functions */
|
|
|
|
int mca_scoll_basic_init(bool enable_progress_threads, bool enable_threads);
|
|
mca_scoll_base_module_t*
|
|
mca_scoll_basic_query(struct oshmem_group_t *group, int *priority);
|
|
|
|
enum {
|
|
SHMEM_SYNC_INIT = _SHMEM_SYNC_VALUE,
|
|
SHMEM_SYNC_WAIT = -2,
|
|
SHMEM_SYNC_RUN = -3,
|
|
SHMEM_SYNC_READY = -4,
|
|
};
|
|
|
|
int mca_scoll_basic_barrier(struct oshmem_group_t *group, long *pSync, int alg);
|
|
int mca_scoll_basic_broadcast(struct oshmem_group_t *group,
|
|
int PE_root,
|
|
void *target,
|
|
const void *source,
|
|
size_t nlong,
|
|
long *pSync,
|
|
bool nlong_type,
|
|
int alg);
|
|
int mca_scoll_basic_collect(struct oshmem_group_t *group,
|
|
void *target,
|
|
const void *source,
|
|
size_t nlong,
|
|
long *pSync,
|
|
bool nlong_type,
|
|
int alg);
|
|
int mca_scoll_basic_reduce(struct oshmem_group_t *group,
|
|
struct oshmem_op_t *op,
|
|
void *target,
|
|
const void *source,
|
|
size_t nlong,
|
|
long *pSync,
|
|
void *pWrk,
|
|
int alg);
|
|
int mca_scoll_basic_alltoall(struct oshmem_group_t *group,
|
|
void *target,
|
|
const void *source,
|
|
ptrdiff_t dst, ptrdiff_t sst,
|
|
size_t nelems,
|
|
size_t element_size,
|
|
long *pSync,
|
|
int alg);
|
|
|
|
static inline unsigned int scoll_log2(unsigned long val)
|
|
{
|
|
unsigned int count = 0;
|
|
|
|
while (val > 0) {
|
|
val = val >> 1;
|
|
count++;
|
|
}
|
|
|
|
return count > 0 ? count - 1 : 0;
|
|
}
|
|
|
|
struct mca_scoll_basic_module_t {
|
|
mca_scoll_base_module_t super;
|
|
};
|
|
typedef struct mca_scoll_basic_module_t mca_scoll_basic_module_t;
|
|
OBJ_CLASS_DECLARATION(mca_scoll_basic_module_t);
|
|
|
|
END_C_DECLS
|
|
|
|
#endif /* MCA_SCOLL_BASIC_H */
|