Merge pull request #1478 from igor-ivanov/pr/oshmem-v1.3-alltoall
oshmem: Add alltoall
Этот коммит содержится в:
Коммит
1d8fbfefb0
@ -101,6 +101,7 @@ enum shmem_wait_ops {
|
||||
#define _SHMEM_BCAST_SYNC_SIZE (1 + _SHMEM_BARRIER_SYNC_SIZE)
|
||||
#define _SHMEM_COLLECT_SYNC_SIZE (1 + _SHMEM_BCAST_SYNC_SIZE)
|
||||
#define _SHMEM_REDUCE_SYNC_SIZE (1 + _SHMEM_BCAST_SYNC_SIZE)
|
||||
#define _SHMEM_ALLTOALL_SYNC_SIZE (1)
|
||||
#define _SHMEM_REDUCE_MIN_WRKDATA_SIZE (1)
|
||||
#define _SHMEM_SYNC_VALUE (-1)
|
||||
|
||||
@ -108,6 +109,7 @@ enum shmem_wait_ops {
|
||||
#define SHMEM_BCAST_SYNC_SIZE _SHMEM_BCAST_SYNC_SIZE
|
||||
#define SHMEM_COLLECT_SYNC_SIZE _SHMEM_COLLECT_SYNC_SIZE
|
||||
#define SHMEM_REDUCE_SYNC_SIZE _SHMEM_REDUCE_SYNC_SIZE
|
||||
#define SHMEM_ALLTOALL_SYNC_SIZE _SHMEM_ALLTOALL_SYNC_SIZE
|
||||
#define SHMEM_REDUCE_MIN_WRKDATA_SIZE _SHMEM_REDUCE_MIN_WRKDATA_SIZE
|
||||
#define SHMEM_SYNC_VALUE _SHMEM_SYNC_VALUE
|
||||
|
||||
@ -344,6 +346,11 @@ OSHMEM_DECLSPEC void shmem_collect32(void *target, const void *source, size_t nl
|
||||
OSHMEM_DECLSPEC void shmem_collect64(void *target, const void *source, size_t nlong, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_fcollect32(void *target, const void *source, size_t nlong, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_fcollect64(void *target, const void *source, size_t nlong, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_alltoall32(void *target, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_alltoall64(void *target, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_alltoalls32(void *target, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_alltoalls64(void *target, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
|
||||
|
||||
/*
|
||||
* Reduction routines
|
||||
|
@ -46,6 +46,7 @@ static void scoll_base_module_construct(mca_scoll_base_module_t *m)
|
||||
m->scoll_broadcast = NULL;
|
||||
m->scoll_collect = NULL;
|
||||
m->scoll_reduce = NULL;
|
||||
m->scoll_alltoall = NULL;
|
||||
m->scoll_module_enable = NULL;
|
||||
}
|
||||
|
||||
|
@ -118,6 +118,22 @@ static int scoll_null_reduce(struct oshmem_group_t *group,
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int scoll_null_alltoall(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
ptrdiff_t dst, ptrdiff_t sst,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
int alg)
|
||||
{
|
||||
if (oshmem_proc_group_is_member(group)) {
|
||||
SCOLL_ERROR("internal error");
|
||||
oshmem_shmem_abort(-1);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Stuff for the OBJ interface
|
||||
*/
|
||||
@ -160,6 +176,7 @@ int mca_scoll_base_group_unselect(struct oshmem_group_t * group)
|
||||
CLOSE(group, broadcast);
|
||||
CLOSE(group, collect);
|
||||
CLOSE(group, reduce);
|
||||
CLOSE(group, alltoall);
|
||||
|
||||
/* All done */
|
||||
return OSHMEM_SUCCESS;
|
||||
@ -184,6 +201,7 @@ int mca_scoll_base_select(struct oshmem_group_t *group)
|
||||
group->g_scoll.scoll_broadcast = scoll_null_broadcast;
|
||||
group->g_scoll.scoll_collect = scoll_null_collect;
|
||||
group->g_scoll.scoll_reduce = scoll_null_reduce;
|
||||
group->g_scoll.scoll_alltoall = scoll_null_alltoall;
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
SCOLL_VERBOSE(10,
|
||||
@ -206,10 +224,11 @@ int mca_scoll_base_select(struct oshmem_group_t *group)
|
||||
if (OSHMEM_SUCCESS != ret) {
|
||||
mca_scoll_base_group_unselect(group);
|
||||
} else {
|
||||
COPY(avail->ac_module, group, barrier);
|
||||
COPY(avail->ac_module, group, broadcast);
|
||||
COPY(avail->ac_module, group, collect);
|
||||
COPY(avail->ac_module, group, reduce);
|
||||
COPY(avail->ac_module, group, barrier);
|
||||
COPY(avail->ac_module, group, alltoall);
|
||||
}
|
||||
OBJ_RELEASE(avail->ac_module);
|
||||
OBJ_RELEASE(avail);
|
||||
@ -220,7 +239,8 @@ int mca_scoll_base_select(struct oshmem_group_t *group)
|
||||
if ((NULL == group->g_scoll.scoll_barrier)
|
||||
|| (NULL == group->g_scoll.scoll_broadcast)
|
||||
|| (NULL == group->g_scoll.scoll_collect)
|
||||
|| (NULL == group->g_scoll.scoll_reduce)) {
|
||||
|| (NULL == group->g_scoll.scoll_reduce)
|
||||
|| (NULL == group->g_scoll.scoll_alltoall)) {
|
||||
mca_scoll_base_group_unselect(group);
|
||||
return OSHMEM_ERR_NOT_FOUND;
|
||||
}
|
||||
@ -228,8 +248,8 @@ int mca_scoll_base_select(struct oshmem_group_t *group)
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int avail_coll_compare (opal_list_item_t **a,
|
||||
opal_list_item_t **b)
|
||||
static int avail_coll_compare(opal_list_item_t **a,
|
||||
opal_list_item_t **b)
|
||||
{
|
||||
avail_com_t *acom = (avail_com_t *) *a;
|
||||
avail_com_t *bcom = (avail_com_t *) *b;
|
||||
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# Copyright (c) 2013-2016 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
@ -15,7 +15,8 @@ sources = \
|
||||
scoll_basic_barrier.c \
|
||||
scoll_basic_broadcast.c \
|
||||
scoll_basic_collect.c \
|
||||
scoll_basic_reduce.c
|
||||
scoll_basic_reduce.c \
|
||||
scoll_basic_alltoall.c
|
||||
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* Copyright (c) 2013-2016 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -77,6 +77,14 @@ int mca_scoll_basic_reduce(struct oshmem_group_t *group,
|
||||
long *pSync,
|
||||
void *pWrk,
|
||||
int alg);
|
||||
int mca_scoll_basic_alltoall(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
ptrdiff_t dst, ptrdiff_t sst,
|
||||
size_t nelems,
|
||||
size_t element_size,
|
||||
long *pSync,
|
||||
int alg);
|
||||
|
||||
static inline unsigned int scoll_log2(unsigned long val)
|
||||
{
|
||||
|
118
oshmem/mca/scoll/basic/scoll_basic_alltoall.c
Обычный файл
118
oshmem/mca/scoll/basic/scoll_basic_alltoall.c
Обычный файл
@ -0,0 +1,118 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/op/op.h"
|
||||
#include "oshmem/mca/spml/spml.h"
|
||||
#include "oshmem/mca/scoll/scoll.h"
|
||||
#include "oshmem/mca/scoll/base/base.h"
|
||||
#include "scoll_basic.h"
|
||||
|
||||
static int _algorithm_simple(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
ptrdiff_t dst, ptrdiff_t sst,
|
||||
size_t nelems,
|
||||
size_t element_size,
|
||||
long *pSync);
|
||||
|
||||
int mca_scoll_basic_alltoall(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
ptrdiff_t dst, ptrdiff_t sst,
|
||||
size_t nelems,
|
||||
size_t element_size,
|
||||
long *pSync,
|
||||
int alg)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
|
||||
/* Arguments validation */
|
||||
if (!group) {
|
||||
SCOLL_ERROR("Active set (group) of PE is not defined");
|
||||
rc = OSHMEM_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* Check if this PE is part of the group */
|
||||
if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) {
|
||||
int i = 0;
|
||||
|
||||
if (pSync) {
|
||||
rc = _algorithm_simple(group,
|
||||
target,
|
||||
source,
|
||||
dst,
|
||||
sst,
|
||||
nelems,
|
||||
element_size,
|
||||
pSync);
|
||||
} else {
|
||||
SCOLL_ERROR("Incorrect argument pSync");
|
||||
rc = OSHMEM_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* Restore initial values */
|
||||
SCOLL_VERBOSE(12,
|
||||
"PE#%d Restore special synchronization array",
|
||||
group->my_pe);
|
||||
for (i = 0; pSync && (i < _SHMEM_ALLTOALL_SYNC_SIZE); i++) {
|
||||
pSync[i] = _SHMEM_SYNC_VALUE;
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int _algorithm_simple(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
ptrdiff_t tst, ptrdiff_t sst,
|
||||
size_t nelems,
|
||||
size_t element_size,
|
||||
long *pSync)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
int pe_cur;
|
||||
int i;
|
||||
int j;
|
||||
int k;
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] send data to all PE in the group",
|
||||
group->my_pe);
|
||||
j = oshmem_proc_group_find_id(group, group->my_pe);
|
||||
for (i = 0; i < group->proc_count; i++) {
|
||||
/* index permutation for better distribution of traffic */
|
||||
k = (((j)+(i))%(group->proc_count));
|
||||
pe_cur = oshmem_proc_pe(group->proc_array[k]);
|
||||
rc = MCA_SPML_CALL(put(
|
||||
(void *)((char *)target + j * tst * nelems * element_size),
|
||||
nelems * element_size,
|
||||
(void *)((char *)source + i * sst * nelems * element_size),
|
||||
pe_cur));
|
||||
if (OSHMEM_SUCCESS != rc) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Wait for operation completion */
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
SCOLL_VERBOSE(14, "[#%d] Wait for operation completion", group->my_pe);
|
||||
rc = BARRIER_FUNC(group,
|
||||
(pSync + 1),
|
||||
SCOLL_DEFAULT_ALG);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* Copyright (c) 2013-2016 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -52,9 +52,10 @@ mca_scoll_basic_query(struct oshmem_group_t *group, int *priority)
|
||||
module->super.scoll_broadcast = mca_scoll_basic_broadcast;
|
||||
module->super.scoll_collect = mca_scoll_basic_collect;
|
||||
module->super.scoll_reduce = mca_scoll_basic_reduce;
|
||||
module->super.scoll_alltoall = mca_scoll_basic_alltoall;
|
||||
module->super.scoll_module_enable = mca_scoll_basic_enable;
|
||||
return &(module->super);
|
||||
}
|
||||
|
||||
return NULL ;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -93,6 +93,8 @@ struct mca_scoll_fca_module_t {
|
||||
mca_scoll_base_module_t *previous_collect_module;
|
||||
mca_scoll_base_module_reduce_fn_t previous_reduce;
|
||||
mca_scoll_base_module_t *previous_reduce_module;
|
||||
mca_scoll_base_module_alltoall_fn_t previous_alltoall;
|
||||
mca_scoll_base_module_t *previous_alltoall_module;
|
||||
};
|
||||
typedef struct mca_scoll_fca_module_t mca_scoll_fca_module_t;
|
||||
OBJ_CLASS_DECLARATION(mca_scoll_fca_module_t);
|
||||
|
@ -391,6 +391,7 @@ static int _save_coll_handlers(mca_scoll_fca_module_t *fca_module)
|
||||
FCA_SAVE_PREV_SCOLL_API(broadcast);
|
||||
FCA_SAVE_PREV_SCOLL_API(collect);
|
||||
FCA_SAVE_PREV_SCOLL_API(reduce);
|
||||
FCA_SAVE_PREV_SCOLL_API(alltoall);
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
@ -450,6 +451,7 @@ static void mca_scoll_fca_module_clear(mca_scoll_fca_module_t *fca_module)
|
||||
fca_module->previous_broadcast = NULL;
|
||||
fca_module->previous_collect = NULL;
|
||||
fca_module->previous_reduce = NULL;
|
||||
fca_module->previous_alltoall = NULL;
|
||||
}
|
||||
|
||||
static void mca_scoll_fca_module_construct(mca_scoll_fca_module_t *fca_module)
|
||||
@ -465,6 +467,7 @@ static void mca_scoll_fca_module_destruct(mca_scoll_fca_module_t *fca_module)
|
||||
OBJ_RELEASE(fca_module->previous_broadcast_module);
|
||||
OBJ_RELEASE(fca_module->previous_collect_module);
|
||||
OBJ_RELEASE(fca_module->previous_reduce_module);
|
||||
OBJ_RELEASE(fca_module->previous_alltoall_module);
|
||||
if (fca_module->fca_comm)
|
||||
_destroy_fca_comm(fca_module);
|
||||
free(fca_module->local_ranks);
|
||||
@ -541,6 +544,7 @@ mca_scoll_fca_comm_query(struct oshmem_group_t *comm, int *priority)
|
||||
fca_module->super.scoll_broadcast =
|
||||
mca_scoll_fca_component.fca_enable_bcast ? mca_scoll_fca_broadcast :
|
||||
NULL;
|
||||
fca_module->super.scoll_alltoall = NULL;
|
||||
|
||||
*priority = mca_scoll_fca_component.fca_priority;
|
||||
module = &fca_module->super;
|
||||
|
@ -69,6 +69,8 @@ struct mca_scoll_mpi_module_t {
|
||||
mca_scoll_base_module_t *previous_barrier_module;
|
||||
mca_scoll_base_module_collect_fn_t previous_collect;
|
||||
mca_scoll_base_module_t *previous_collect_module;
|
||||
mca_scoll_base_module_alltoall_fn_t previous_alltoall;
|
||||
mca_scoll_base_module_t *previous_alltoall_module;
|
||||
};
|
||||
typedef struct mca_scoll_mpi_module_t mca_scoll_mpi_module_t;
|
||||
|
||||
|
@ -29,6 +29,7 @@ static void mca_scoll_mpi_module_clear(mca_scoll_mpi_module_t *mpi_module)
|
||||
mpi_module->previous_broadcast = NULL;
|
||||
mpi_module->previous_reduce = NULL;
|
||||
mpi_module->previous_collect = NULL;
|
||||
mpi_module->previous_alltoall = NULL;
|
||||
}
|
||||
|
||||
static void mca_scoll_mpi_module_construct(mca_scoll_mpi_module_t *mpi_module)
|
||||
@ -43,6 +44,7 @@ static void mca_scoll_mpi_module_destruct(mca_scoll_mpi_module_t *mpi_module)
|
||||
OBJ_RELEASE(mpi_module->previous_broadcast_module);
|
||||
OBJ_RELEASE(mpi_module->previous_reduce_module);
|
||||
OBJ_RELEASE(mpi_module->previous_collect_module);
|
||||
OBJ_RELEASE(mpi_module->previous_alltoall_module);
|
||||
|
||||
mca_scoll_mpi_module_clear(mpi_module);
|
||||
/* Free ompi_comm */
|
||||
@ -68,6 +70,7 @@ static int mca_scoll_mpi_save_coll_handlers(mca_scoll_base_module_t *module, osh
|
||||
MPI_SAVE_PREV_SCOLL_API(broadcast);
|
||||
MPI_SAVE_PREV_SCOLL_API(reduce);
|
||||
MPI_SAVE_PREV_SCOLL_API(collect);
|
||||
MPI_SAVE_PREV_SCOLL_API(alltoall);
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
@ -173,6 +176,7 @@ mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority)
|
||||
mpi_module->super.scoll_broadcast = mca_scoll_mpi_broadcast;
|
||||
mpi_module->super.scoll_reduce = mca_scoll_mpi_reduce;
|
||||
mpi_module->super.scoll_collect = mca_scoll_mpi_collect;
|
||||
mpi_module->super.scoll_alltoall = NULL;
|
||||
|
||||
*priority = cm->mpi_priority;
|
||||
module = &mpi_module->super;
|
||||
|
@ -89,7 +89,6 @@ typedef struct mca_scoll_base_component_1_0_0_t mca_scoll_base_component_t;
|
||||
typedef int
|
||||
(*mca_scoll_base_module_enable_1_0_0_fn_t)(struct mca_scoll_base_module_1_0_0_t* module,
|
||||
struct oshmem_group_t *comm);
|
||||
typedef int (*mca_scoll_base_module_ft_event_fn_t)(int state);
|
||||
|
||||
#define SCOLL_DEFAULT_ALG (-1)
|
||||
|
||||
@ -139,6 +138,14 @@ typedef int (*mca_scoll_base_module_reduce_fn_t)(struct oshmem_group_t *group,
|
||||
long *pSync,
|
||||
void *pWrk,
|
||||
int alg);
|
||||
typedef int (*mca_scoll_base_module_alltoall_fn_t)(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
ptrdiff_t dst, ptrdiff_t sst,
|
||||
size_t nelems,
|
||||
size_t element_size,
|
||||
long *pSync,
|
||||
int alg);
|
||||
|
||||
struct mca_scoll_base_module_1_0_0_t {
|
||||
/** Collective modules all inherit from opal_object */
|
||||
@ -149,13 +156,14 @@ struct mca_scoll_base_module_1_0_0_t {
|
||||
mca_scoll_base_module_broadcast_fn_t scoll_broadcast;
|
||||
mca_scoll_base_module_collect_fn_t scoll_collect;
|
||||
mca_scoll_base_module_reduce_fn_t scoll_reduce;
|
||||
mca_scoll_base_module_alltoall_fn_t scoll_alltoall;
|
||||
mca_scoll_base_module_enable_1_0_0_fn_t scoll_module_enable;
|
||||
};
|
||||
typedef struct mca_scoll_base_module_1_0_0_t mca_scoll_base_module_1_0_0_t;
|
||||
|
||||
/** Per guidence in mca.h, use the unversioned struct name if you just
|
||||
/** Per guidance in mca.h, use the unversioned struct name if you just
|
||||
want to always keep up with the most recent version of the
|
||||
interace. */
|
||||
interface. */
|
||||
typedef struct mca_scoll_base_module_1_0_0_t mca_scoll_base_module_t;
|
||||
OSHMEM_DECLSPEC OBJ_CLASS_DECLARATION(mca_scoll_base_module_t);
|
||||
|
||||
@ -171,7 +179,7 @@ OSHMEM_DECLSPEC OBJ_CLASS_DECLARATION(mca_scoll_base_module_t);
|
||||
/*
|
||||
* Collectives group cache structure
|
||||
*
|
||||
* Collectives gorup cache structure, used to find functions to
|
||||
* Collectives group cache structure, used to find functions to
|
||||
* implement collective algorithms and their associated modules.
|
||||
*/
|
||||
struct mca_scoll_base_group_scoll_t {
|
||||
@ -183,6 +191,8 @@ struct mca_scoll_base_group_scoll_t {
|
||||
mca_scoll_base_module_1_0_0_t *scoll_collect_module;
|
||||
mca_scoll_base_module_reduce_fn_t scoll_reduce;
|
||||
mca_scoll_base_module_1_0_0_t *scoll_reduce_module;
|
||||
mca_scoll_base_module_alltoall_fn_t scoll_alltoall;
|
||||
mca_scoll_base_module_1_0_0_t *scoll_alltoall_module;
|
||||
};
|
||||
typedef struct mca_scoll_base_group_scoll_t mca_scoll_base_group_scoll_t;
|
||||
|
||||
|
@ -31,6 +31,7 @@ OSHMEM_API_SOURCES = \
|
||||
shmem_get.c \
|
||||
shmem_broadcast.c \
|
||||
shmem_collect.c \
|
||||
shmem_alltoall.c \
|
||||
shmem_ptr.c \
|
||||
shmem_pe_accessible.c \
|
||||
shmem_addr_accessible.c \
|
||||
|
@ -41,6 +41,7 @@ OSHMEM_API_SOURCES = \
|
||||
pshmem_put.c \
|
||||
pshmem_g.c \
|
||||
pshmem_get.c \
|
||||
pshmem_alltoall.c \
|
||||
pshmem_broadcast.c \
|
||||
pshmem_collect.c \
|
||||
pshmem_ptr.c \
|
||||
|
@ -360,6 +360,14 @@
|
||||
#define shmemx_int32_prod_to_all pshmemx_int32_prod_to_all
|
||||
#define shmemx_int64_prod_to_all pshmemx_int64_prod_to_all
|
||||
|
||||
/*
|
||||
* Alltoall routines
|
||||
*/
|
||||
#define shmem_alltoall32 pshmem_alltoall32
|
||||
#define shmem_alltoall64 pshmem_alltoall64
|
||||
#define shmem_alltoalls32 pshmem_alltoalls32
|
||||
#define shmem_alltoalls64 pshmem_alltoalls64
|
||||
|
||||
/*
|
||||
* Platform specific cache management routines
|
||||
*/
|
||||
|
137
oshmem/shmem/c/shmem_alltoall.c
Обычный файл
137
oshmem/shmem/c/shmem_alltoall.c
Обычный файл
@ -0,0 +1,137 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/include/shmem.h"
|
||||
|
||||
#include "orte/mca/grpcomm/grpcomm.h"
|
||||
|
||||
#include "oshmem/runtime/runtime.h"
|
||||
|
||||
#include "oshmem/mca/scoll/scoll.h"
|
||||
|
||||
#include "oshmem/proc/proc.h"
|
||||
#include "oshmem/proc/proc_group_cache.h"
|
||||
|
||||
static void _shmem_alltoall(void *target,
|
||||
const void *source,
|
||||
ptrdiff_t dst, ptrdiff_t sst,
|
||||
size_t nelems,
|
||||
size_t element_size,
|
||||
int PE_start,
|
||||
int logPE_stride,
|
||||
int PE_size,
|
||||
long *pSync);
|
||||
|
||||
#define SHMEM_TYPE_ALLTOALL(name, element_size) \
|
||||
void shmem##name(void *target, \
|
||||
const void *source, \
|
||||
size_t nelems, \
|
||||
int PE_start, \
|
||||
int logPE_stride, \
|
||||
int PE_size, \
|
||||
long *pSync) \
|
||||
{ \
|
||||
RUNTIME_CHECK_INIT(); \
|
||||
RUNTIME_CHECK_ADDR(target); \
|
||||
RUNTIME_CHECK_ADDR(source); \
|
||||
\
|
||||
_shmem_alltoall(target, source, 1, 1, nelems, element_size, \
|
||||
PE_start, logPE_stride, PE_size, \
|
||||
pSync); \
|
||||
}
|
||||
|
||||
#define SHMEM_TYPE_ALLTOALLS(name, element_size) \
|
||||
void shmem##name(void *target, \
|
||||
const void *source, \
|
||||
ptrdiff_t dst, ptrdiff_t sst, \
|
||||
size_t nelems, \
|
||||
int PE_start, \
|
||||
int logPE_stride, \
|
||||
int PE_size, \
|
||||
long *pSync) \
|
||||
{ \
|
||||
RUNTIME_CHECK_INIT(); \
|
||||
RUNTIME_CHECK_ADDR(target); \
|
||||
RUNTIME_CHECK_ADDR(source); \
|
||||
\
|
||||
_shmem_alltoall(target, source, dst, sst, nelems, element_size, \
|
||||
PE_start, logPE_stride, PE_size, \
|
||||
pSync); \
|
||||
}
|
||||
|
||||
static void _shmem_alltoall(void *target,
|
||||
const void *source,
|
||||
ptrdiff_t dst, ptrdiff_t sst,
|
||||
size_t nelems,
|
||||
size_t element_size,
|
||||
int PE_start,
|
||||
int logPE_stride,
|
||||
int PE_size,
|
||||
long *pSync)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
oshmem_group_t* group = NULL;
|
||||
|
||||
if ((0 <= PE_start) && (0 <= logPE_stride)) {
|
||||
/* Create group basing PE_start, logPE_stride and PE_size */
|
||||
#if OSHMEM_GROUP_CACHE_ENABLED == 0
|
||||
group = oshmem_proc_group_create(PE_start, (1 << logPE_stride), PE_size);
|
||||
if (!group)
|
||||
rc = OSHMEM_ERROR;
|
||||
#else
|
||||
group = find_group_in_cache(PE_start, logPE_stride, PE_size);
|
||||
if (!group) {
|
||||
group = oshmem_proc_group_create(PE_start,
|
||||
(1 << logPE_stride),
|
||||
PE_size);
|
||||
if (!group) {
|
||||
rc = OSHMEM_ERROR;
|
||||
} else {
|
||||
cache_group(group, PE_start, logPE_stride, PE_size);
|
||||
}
|
||||
}
|
||||
#endif /* OSHMEM_GROUP_CACHE_ENABLED */
|
||||
|
||||
/* Collective operation call */
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
/* Call collective alltoall operation */
|
||||
rc = group->g_scoll.scoll_alltoall(group,
|
||||
target,
|
||||
source,
|
||||
dst,
|
||||
sst,
|
||||
nelems,
|
||||
element_size,
|
||||
pSync,
|
||||
SCOLL_DEFAULT_ALG);
|
||||
}
|
||||
#if OSHMEM_GROUP_CACHE_ENABLED == 0
|
||||
if ( rc == OSHMEM_SUCCESS ) {
|
||||
oshmem_proc_group_destroy(group);
|
||||
}
|
||||
#endif /* OSHMEM_GROUP_CACHE_ENABLED */
|
||||
}
|
||||
}
|
||||
|
||||
#if OSHMEM_PROFILING
|
||||
#include "oshmem/include/pshmem.h"
|
||||
#pragma weak shmem_alltoall32 = pshmem_alltoall32
|
||||
#pragma weak shmem_alltoall64 = pshmem_alltoall64
|
||||
#pragma weak shmem_alltoalls32 = pshmem_alltoalls32
|
||||
#pragma weak shmem_alltoalls64 = pshmem_alltoalls64
|
||||
#include "oshmem/shmem/c/profile/defines.h"
|
||||
#endif
|
||||
|
||||
SHMEM_TYPE_ALLTOALL(_alltoall32, sizeof(uint32_t))
|
||||
SHMEM_TYPE_ALLTOALL(_alltoall64, sizeof(uint64_t))
|
||||
SHMEM_TYPE_ALLTOALLS(_alltoalls32, sizeof(uint32_t))
|
||||
SHMEM_TYPE_ALLTOALLS(_alltoalls64, sizeof(uint64_t))
|
@ -122,6 +122,7 @@ liboshmem_fortran_la_SOURCES += \
|
||||
shmem_prod_to_all_f.c \
|
||||
shmem_collect_f.c \
|
||||
shmem_broadcast_f.c \
|
||||
shmem_alltoall_f.c \
|
||||
shmem_lock_f.c \
|
||||
shmem_cache_f.c \
|
||||
shmem_int4_inc_f.c \
|
||||
|
@ -114,6 +114,7 @@ nodist_liboshmem_fortran_pshmem_la_SOURCES = \
|
||||
pshmem_prod_to_all_f.c \
|
||||
pshmem_collect_f.c \
|
||||
pshmem_broadcast_f.c \
|
||||
pshmem_alltoall_f.c \
|
||||
pshmem_lock_f.c \
|
||||
pshmem_cache_f.c \
|
||||
pshmem_int4_inc_f.c \
|
||||
|
@ -86,6 +86,22 @@
|
||||
#define shmem_broadcast64_ pshmem_broadcast64_
|
||||
#define shmem_broadcast64__ pshmem_broadcast64__
|
||||
|
||||
#define SHMEM_ALLTOALL32 PSHMEM_ALLTOALL32
|
||||
#define shmem_alltoall32_ pshmem_alltoall32_
|
||||
#define shmem_alltoall32__ pshmem_alltoall32__
|
||||
|
||||
#define SHMEM_ALLTOALL64 PSHMEM_ALLTOALL64
|
||||
#define shmem_alltoall64_ pshmem_alltoall64_
|
||||
#define shmem_alltoall64__ pshmem_alltoall64__
|
||||
|
||||
#define SHMEM_ALLTOALLS32 PSHMEM_ALLTOALLS32
|
||||
#define shmem_alltoalls32_ pshmem_alltoalls32_
|
||||
#define shmem_alltoalls32__ pshmem_alltoalls32__
|
||||
|
||||
#define SHMEM_ALLTOALLS64 PSHMEM_ALLTOALLS64
|
||||
#define shmem_alltoalls64_ pshmem_alltoalls64_
|
||||
#define shmem_alltoalls64__ pshmem_alltoalls64__
|
||||
|
||||
#define SHMEM_SET_CACHE_INV PSHMEM_SET_CACHE_INV
|
||||
#define shmem_set_cache_inv_ pshmem_set_cache_inv_
|
||||
#define shmem_set_cache_inv__ pshmem_set_cache_inv__
|
||||
|
@ -185,6 +185,10 @@ PN (void, pshmem_broadcast4, PSHMEM_BROADCAST4, (FORTRAN_POINTER_T target, FORTR
|
||||
PN (void, pshmem_broadcast8, PSHMEM_BROADCAST8, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_root, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||
PN (void, pshmem_broadcast32, PSHMEM_BROADCAST32, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_root, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||
PN (void, pshmem_broadcast64, PSHMEM_BROADCAST64, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_root, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||
PN (void, pshmem_alltoall32, PSHMEM_ALLTOALL32, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||
PN (void, pshmem_alltoall64, PSHMEM_ALLTOALL64, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||
PN (void, pshmem_alltoalls32, PSHMEM_ALLTOALLS32, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *dst, MPI_Fint *sst, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||
PN (void, pshmem_alltoalls64, PSHMEM_ALLTOALLS64, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *dst, MPI_Fint *sst, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||
PN (void, pshmem_set_lock, PSHMEM_SET_LOCK, (FORTRAN_POINTER_T lock));
|
||||
PN (void, pshmem_clear_lock, PSHMEM_CLEAR_LOCK, (FORTRAN_POINTER_T lock));
|
||||
PN (MPI_Fint, pshmem_test_lock, PSHMEM_TEST_LOCK, (FORTRAN_POINTER_T lock));
|
||||
|
@ -189,6 +189,10 @@ PN (void, shmem_broadcast4, SHMEM_BROADCAST4, (FORTRAN_POINTER_T target, FORTRAN
|
||||
PN (void, shmem_broadcast8, SHMEM_BROADCAST8, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_root, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||
PN (void, shmem_broadcast32, SHMEM_BROADCAST32, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_root, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||
PN (void, shmem_broadcast64, SHMEM_BROADCAST64, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_root, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||
PN (void, shmem_alltoall32, SHMEM_ALLTOALL32, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||
PN (void, shmem_alltoall64, SHMEM_ALLTOALL64, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||
PN (void, shmem_alltoalls32, SHMEM_ALLTOALLS32, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *dst, MPI_Fint *sst, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||
PN (void, shmem_alltoalls64, SHMEM_ALLTOALLS64, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *dst, MPI_Fint *sst, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||
PN (void, shmem_set_lock, SHMEM_SET_LOCK, (FORTRAN_POINTER_T lock));
|
||||
PN (void, shmem_clear_lock, SHMEM_CLEAR_LOCK, (FORTRAN_POINTER_T lock));
|
||||
PN (MPI_Fint, shmem_test_lock, SHMEM_TEST_LOCK, (FORTRAN_POINTER_T lock));
|
||||
|
191
oshmem/shmem/fortran/shmem_alltoall_f.c
Обычный файл
191
oshmem/shmem/fortran/shmem_alltoall_f.c
Обычный файл
@ -0,0 +1,191 @@
|
||||
/*
|
||||
* Copyright (c) 2013-2016 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include "oshmem/shmem/fortran/bindings.h"
|
||||
#include "oshmem/include/shmem.h"
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/mca/scoll/scoll.h"
|
||||
#include "oshmem/proc/proc.h"
|
||||
#include "oshmem/proc/proc_group_cache.h"
|
||||
#include "oshmem/op/op.h"
|
||||
|
||||
#if OSHMEM_PROFILING
|
||||
#include "oshmem/shmem/fortran/profile/pbindings.h"
|
||||
SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_ALLTOALL32, shmem_alltoall32)
|
||||
SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_ALLTOALL64, shmem_alltoall64)
|
||||
SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_ALLTOALLS32, shmem_alltoalls32)
|
||||
SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_ALLTOALLS64, shmem_alltoalls64)
|
||||
#include "oshmem/shmem/fortran/profile/defines.h"
|
||||
#endif
|
||||
|
||||
SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void,
|
||||
SHMEM_ALLTOALL32,
|
||||
shmem_alltoall32_,
|
||||
shmem_alltoall32__,
|
||||
shmem_alltoall32_f,
|
||||
(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync),
|
||||
(target, source, nlong, PE_start, logPE_stride, PE_size, pSync))
|
||||
|
||||
SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void,
|
||||
SHMEM_ALLTOALL64,
|
||||
shmem_alltoall64_,
|
||||
shmem_alltoall64__,
|
||||
shmem_alltoall64_f,
|
||||
(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync),
|
||||
(target, source, nlong, PE_start, logPE_stride, PE_size, pSync))
|
||||
|
||||
SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void,
|
||||
SHMEM_ALLTOALLS32,
|
||||
shmem_alltoalls32_,
|
||||
shmem_alltoalls32__,
|
||||
shmem_alltoalls32_f,
|
||||
(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *dst, MPI_Fint *sst, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync),
|
||||
(target, source, dst, sst, nlong, PE_start, logPE_stride, PE_size, pSync))
|
||||
|
||||
SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void,
|
||||
SHMEM_ALLTOALLS64,
|
||||
shmem_alltoalls64_,
|
||||
shmem_alltoalls64__,
|
||||
shmem_alltoalls64_f,
|
||||
(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *dst, MPI_Fint *sst, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync),
|
||||
(target, source, dst, sst, nlong, PE_start, logPE_stride, PE_size, pSync))
|
||||
|
||||
#define SHMEM_ALLTOALL(F_NAME, T_NAME, OSHMEM_GROUP_CACHE_ENABLED) void F_NAME(FORTRAN_POINTER_T target, \
|
||||
FORTRAN_POINTER_T source, \
|
||||
MPI_Fint *nlong,\
|
||||
MPI_Fint *PE_start, \
|
||||
MPI_Fint *logPE_stride, \
|
||||
MPI_Fint *PE_size, \
|
||||
FORTRAN_POINTER_T pSync)\
|
||||
{\
|
||||
int rc = OSHMEM_SUCCESS;\
|
||||
oshmem_group_t* group = NULL;\
|
||||
{\
|
||||
/* Create group basing PE_start, logPE_stride and PE_size */\
|
||||
if (OSHMEM_GROUP_CACHE_ENABLED == 0)\
|
||||
{\
|
||||
group = oshmem_proc_group_create(OMPI_FINT_2_INT(*PE_start), \
|
||||
(1 << OMPI_FINT_2_INT(*logPE_stride)), \
|
||||
OMPI_FINT_2_INT(*PE_size));\
|
||||
if (!group)\
|
||||
rc = OSHMEM_ERROR;\
|
||||
}\
|
||||
else\
|
||||
{\
|
||||
group = find_group_in_cache(OMPI_FINT_2_INT(*PE_start),\
|
||||
OMPI_FINT_2_INT(*logPE_stride),\
|
||||
OMPI_FINT_2_INT(*PE_size));\
|
||||
if (!group)\
|
||||
{\
|
||||
group = oshmem_proc_group_create(OMPI_FINT_2_INT(*PE_start), \
|
||||
(1 << OMPI_FINT_2_INT(*logPE_stride)), \
|
||||
OMPI_FINT_2_INT(*PE_size));\
|
||||
if (!group)\
|
||||
rc = OSHMEM_ERROR;\
|
||||
cache_group(group,OMPI_FINT_2_INT(*PE_start),\
|
||||
OMPI_FINT_2_INT(*logPE_stride),\
|
||||
OMPI_FINT_2_INT(*PE_size));\
|
||||
}\
|
||||
} /* OSHMEM_GROUP_CACHE_ENABLED */\
|
||||
/* Collective operation call */\
|
||||
if ( rc == OSHMEM_SUCCESS )\
|
||||
{\
|
||||
oshmem_op_t* op = T_NAME;\
|
||||
\
|
||||
/* Call collective broadcast operation */\
|
||||
rc = group->g_scoll.scoll_alltoall( group, \
|
||||
FPTR_2_VOID_PTR(target), \
|
||||
FPTR_2_VOID_PTR(source), \
|
||||
1, \
|
||||
1, \
|
||||
OMPI_FINT_2_INT(*nlong), \
|
||||
op->dt_size, \
|
||||
FPTR_2_VOID_PTR(pSync), SCOLL_DEFAULT_ALG );\
|
||||
}\
|
||||
if (OSHMEM_GROUP_CACHE_ENABLED == 0) \
|
||||
{\
|
||||
if ( group )\
|
||||
{\
|
||||
oshmem_proc_group_destroy(group);\
|
||||
}\
|
||||
} /* OSHMEM_GROUP_CACHE_ENABLED */\
|
||||
}\
|
||||
}
|
||||
|
||||
#define SHMEM_ALLTOALLS(F_NAME, T_NAME, OSHMEM_GROUP_CACHE_ENABLED) void F_NAME(FORTRAN_POINTER_T target, \
|
||||
FORTRAN_POINTER_T source, \
|
||||
MPI_Fint *dst,\
|
||||
MPI_Fint *sst,\
|
||||
MPI_Fint *nlong,\
|
||||
MPI_Fint *PE_start, \
|
||||
MPI_Fint *logPE_stride, \
|
||||
MPI_Fint *PE_size, \
|
||||
FORTRAN_POINTER_T pSync)\
|
||||
{\
|
||||
int rc = OSHMEM_SUCCESS;\
|
||||
oshmem_group_t* group = NULL;\
|
||||
{\
|
||||
/* Create group basing PE_start, logPE_stride and PE_size */\
|
||||
if (OSHMEM_GROUP_CACHE_ENABLED == 0)\
|
||||
{\
|
||||
group = oshmem_proc_group_create(OMPI_FINT_2_INT(*PE_start), \
|
||||
(1 << OMPI_FINT_2_INT(*logPE_stride)), \
|
||||
OMPI_FINT_2_INT(*PE_size));\
|
||||
if (!group)\
|
||||
rc = OSHMEM_ERROR;\
|
||||
}\
|
||||
else\
|
||||
{\
|
||||
group = find_group_in_cache(OMPI_FINT_2_INT(*PE_start),\
|
||||
OMPI_FINT_2_INT(*logPE_stride),\
|
||||
OMPI_FINT_2_INT(*PE_size));\
|
||||
if (!group)\
|
||||
{\
|
||||
group = oshmem_proc_group_create(OMPI_FINT_2_INT(*PE_start), \
|
||||
(1 << OMPI_FINT_2_INT(*logPE_stride)), \
|
||||
OMPI_FINT_2_INT(*PE_size));\
|
||||
if (!group)\
|
||||
rc = OSHMEM_ERROR;\
|
||||
cache_group(group,OMPI_FINT_2_INT(*PE_start),\
|
||||
OMPI_FINT_2_INT(*logPE_stride),\
|
||||
OMPI_FINT_2_INT(*PE_size));\
|
||||
}\
|
||||
} /* OSHMEM_GROUP_CACHE_ENABLED */\
|
||||
/* Collective operation call */\
|
||||
if ( rc == OSHMEM_SUCCESS )\
|
||||
{\
|
||||
oshmem_op_t* op = T_NAME;\
|
||||
\
|
||||
/* Call collective broadcast operation */\
|
||||
rc = group->g_scoll.scoll_alltoall( group, \
|
||||
FPTR_2_VOID_PTR(target), \
|
||||
FPTR_2_VOID_PTR(source), \
|
||||
OMPI_FINT_2_INT(*dst), \
|
||||
OMPI_FINT_2_INT(*sst), \
|
||||
OMPI_FINT_2_INT(*nlong), \
|
||||
op->dt_size, \
|
||||
FPTR_2_VOID_PTR(pSync), SCOLL_DEFAULT_ALG );\
|
||||
}\
|
||||
if (OSHMEM_GROUP_CACHE_ENABLED == 0) \
|
||||
{\
|
||||
if ( group )\
|
||||
{\
|
||||
oshmem_proc_group_destroy(group);\
|
||||
}\
|
||||
} /* OSHMEM_GROUP_CACHE_ENABLED */\
|
||||
}\
|
||||
}
|
||||
|
||||
SHMEM_ALLTOALL(shmem_alltoall32_f, oshmem_op_prod_fint4, OSHMEM_GROUP_CACHE_ENABLED)
|
||||
SHMEM_ALLTOALL(shmem_alltoall64_f, oshmem_op_prod_fint8, OSHMEM_GROUP_CACHE_ENABLED)
|
||||
SHMEM_ALLTOALLS(shmem_alltoalls32_f, oshmem_op_prod_fint4, OSHMEM_GROUP_CACHE_ENABLED)
|
||||
SHMEM_ALLTOALLS(shmem_alltoalls64_f, oshmem_op_prod_fint8, OSHMEM_GROUP_CACHE_ENABLED)
|
@ -132,6 +132,10 @@ shmem_api_man_pages = \
|
||||
shmem/man/man3/shmem_quiet.3 \
|
||||
shmem/man/man3/shmem_broadcast32.3 \
|
||||
shmem/man/man3/shmem_broadcast64.3 \
|
||||
shmem/man/man3/shmem_alltoall32.3 \
|
||||
shmem/man/man3/shmem_alltoall64.3 \
|
||||
shmem/man/man3/shmem_alltoalls32.3 \
|
||||
shmem/man/man3/shmem_alltoalls64.3 \
|
||||
shmem/man/man3/shmem_collect32.3 \
|
||||
shmem/man/man3/shmem_collect64.3 \
|
||||
shmem/man/man3/shmem_fcollect32.3 \
|
||||
|
226
oshmem/shmem/man/man3/shmem_alltoall32.3in
Обычный файл
226
oshmem/shmem/man/man3/shmem_alltoall32.3in
Обычный файл
@ -0,0 +1,226 @@
|
||||
.\" -*- nroff -*-
|
||||
.\" Copyright (c) 2016 Mellanox Technologies, Inc.
|
||||
.\" $COPYRIGHT$
|
||||
.de Vb
|
||||
.ft CW
|
||||
.nf
|
||||
..
|
||||
.de Ve
|
||||
.ft R
|
||||
|
||||
.fi
|
||||
..
|
||||
.TH "SHMEM\\_ALLTOALL" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
|
||||
.SH NAME
|
||||
|
||||
\fIshmem_alltoall32\fP(3),
|
||||
\fIshmem_alltoall64\fP(3),
|
||||
\fIshmem_alltoalls32\fP(3),
|
||||
\fIshmem_alltoalls64\fP(3)
|
||||
\- collective routine where each PE exchanges a fixed amount of data with all
|
||||
other PEs in the Active set
|
||||
.SH SYNOPSIS
|
||||
|
||||
C or C++:
|
||||
.Vb
|
||||
#include <mpp/shmem.h>
|
||||
|
||||
void shmem_alltoall32(void *target, const void *source,
|
||||
size_t nelems, int PE_start, int logPE_stride,
|
||||
int PE_size, long *pSync);
|
||||
|
||||
void shmem_alltoall64(void *target, const void *source,
|
||||
size_t nelems, int PE_start, int logPE_stride,
|
||||
int PE_size, long *pSync);
|
||||
|
||||
void shmem_alltoalls32(void *target, const void *source,
|
||||
ptrdiff_t dst, ptrdiff_t sst,
|
||||
size_t nelems, int PE_start, int logPE_stride,
|
||||
int PE_size, long *pSync);
|
||||
|
||||
void shmem_alltoalls64(void *target, const void *source,
|
||||
ptrdiff_t dst, ptrdiff_t sst,
|
||||
size_t nelems, int PE_start, int logPE_stride,
|
||||
int PE_size, long *pSync);
|
||||
.Ve
|
||||
Fortran:
|
||||
.Vb
|
||||
INCLUDE "mpp/shmem.fh"
|
||||
|
||||
INTEGER dst, sst, nelems, PE_root, PE_start, logPE_stride, PE_size
|
||||
INTEGER pSync(SHMEM_ALLTOALL_SYNC_SIZE)
|
||||
|
||||
CALL SHMEM_ALLTOALL32(target, source, nelems,
|
||||
& PE_start, logPE_stride, PE_size, fIpSync)
|
||||
|
||||
CALL SHMEM_ALLTOALL64(target, source, nelems,
|
||||
& PE_start, logPE_stride, PE_size, pSync)
|
||||
|
||||
CALL SHMEM_ALLTOALLS32(target, source, dst, sst, nelems,
|
||||
& PE_start, logPE_stride, PE_size, pSync)
|
||||
|
||||
CALL SHMEM_ALLTOALLS64(target, source, dst, sst, nelems,
|
||||
& PE_start, logPE_stride, PE_size, pSync)
|
||||
.Ve
|
||||
.SH DESCRIPTION
|
||||
|
||||
.PP
|
||||
The shmem_alltoalls routines are collective routines. Each PE in the Active set exchanges nelems strided
|
||||
data elements of size 32 bits (for shmem_alltoalls32) or 64 bits (for shmem_alltoalls64) with all other PEs
|
||||
in the set. Both strides, dst and sst, must be greater than or equal to 1. The sst*jth block sent from PE i to
|
||||
PE j is placed in the dst*ith block of the dest data object on PE j.
|
||||
As with all OpenSHMEM collective routines, these routines assume that only PEs in the Active set call the
|
||||
routine. If a PE not in the Active set calls an OpenSHMEM collective routine, undefined behavior results.
|
||||
The values of arguments dst, sst, nelems, PE_start, logPE_stride, and PE_size must be equal on all PEs in
|
||||
the Active set. The same dest and source data objects, and the same pSync work array must be passed to all
|
||||
PEs in the Active set.
|
||||
Before any PE calls to a shmem_alltoalls routine, the following conditions must exist (synchronization via
|
||||
a barrier or some other method is often needed to ensure this): The pSync array on all PEs in the Active set
|
||||
is not still in use from a prior call to a shmem_alltoalls routine. The dest data object on all PEs in the
|
||||
Active set is ready to accept the shmem_alltoalls data.
|
||||
Upon return from a shmem_alltoalls routine, the following is true for the local PE: Its dest symmetric data
|
||||
object is completely updated and the data has been copied out of the source data object. The values in the
|
||||
pSync array are restored to the original values.
|
||||
.PP
|
||||
The arguments are as follows:
|
||||
.TP
|
||||
A symmetric data object with one of the following data types:
|
||||
.RS
|
||||
.TP
|
||||
\fBshmem_alltoall32\fP: Any noncharacter type that
|
||||
has an element size of 32 bits. No Fortran derived types or C/C++ structures are allowed.
|
||||
.TP
|
||||
\fBshmem_alltoall64\fP: Any noncharacter type that has an element size
|
||||
of 64 bits. No Fortran derived types or C/C++ structures are allowed.
|
||||
.RE
|
||||
.RS
|
||||
.PP
|
||||
.RE
|
||||
target
|
||||
A symmetric data object large enough to receive the combined total of
|
||||
nelems elements from each PE in the Active set.
|
||||
.TP
|
||||
source
|
||||
A symmetric data object that contains nelems elements of data for each
|
||||
PE in the Active set, ordered according to destination PE.
|
||||
.TP
|
||||
dst
|
||||
The stride between consecutive elements of the dest data object. The
|
||||
stride is scaled by the element size. A value of 1 indicates contiguous
|
||||
data. dst must be of type ptrdiff_t. If you are using Fortran, it must be
|
||||
a default integer value.
|
||||
.TP
|
||||
sst
|
||||
The stride between consecutive elements of the source data object. The
|
||||
stride is scaled by the element size. A value of 1 indicates contiguous
|
||||
data. sst must be of type ptrdiff_t. If you are using Fortran, it must be a
|
||||
default integer value.
|
||||
.TP
|
||||
nelems
|
||||
The number of elements to exchange for each PE. nelems must be of
|
||||
type size_t for C/C++. If you are using Fortran, it must be a default
|
||||
integer value
|
||||
.TP
|
||||
PE_start
|
||||
The lowest virtual PE number of the active set of PEs. PE_start must be of
|
||||
type integer. If you are using Fortran, it must be a default integer value.
|
||||
.TP
|
||||
logPE_stride
|
||||
The log (base 2) of the stride between consecutive virtual PE numbers in
|
||||
the active set. log_PE_stride must be of type integer. If you are using Fortran, it must be a
|
||||
default integer value.
|
||||
.TP
|
||||
PE_size
|
||||
The number of PEs in the active set. PE_size must be of type integer. If you
|
||||
are using Fortran, it must be a default integer value.
|
||||
.PP
|
||||
.TP
|
||||
pSync
|
||||
A symmetric work array. In C/C++, pSync must be of type long and size
|
||||
_SHMEM_ALLTOALL_SYNC_SIZE.
|
||||
In Fortran, pSync must be of type integer and size SHMEM_ALLTOALL_SYNC_SIZE. Every
|
||||
element of this array must be initialized with the value _SHMEM_SYNC_VALUE (in C/C++)
|
||||
or SHMEM_SYNC_VALUE (in Fortran) before any of the PEs in the active set enter
|
||||
shmem_barrier().
|
||||
.PP
|
||||
Upon return from a shmem_alltoalls routine, the following is true for the local PE: Its dest
|
||||
symmetric data object is completely updated and the data has been copied out of the source
|
||||
data object. The values in the pSync array are restored to the original values.
|
||||
.PP
|
||||
The values of arguments PE_root, PE_start, logPE_stride, and PE_size must be equal on
|
||||
all PEs in the active set. The same target and source data objects and the same pSync work
|
||||
array must be passed to all PEs in the active set.
|
||||
.PP
|
||||
Before any PE calls a alltoall routine, you must ensure that the following conditions exist
|
||||
(synchronization via a barrier or some other method is often needed to ensure this): The
|
||||
pSync array on all PEs in the active set is not still in use from a prior call to a alltoall
|
||||
routine. The target array on all PEs in the active set is ready to accept the alltoall data.
|
||||
.SH NOTES
|
||||
|
||||
The terms collective and symmetric are defined in \fIintro_shmem\fP(3)\&.
|
||||
.PP
|
||||
All SHMEM alltoall routines restore pSync to its original contents. Multiple calls to SHMEM
|
||||
routines that use the same pSync array do not require that pSync be reinitialized after the
|
||||
first call.
|
||||
.PP
|
||||
You must ensure the that the pSync array is not being updated by any PE in the active set
|
||||
while any of the PEs participates in processing of a SHMEM broadcast routine. Be careful to
|
||||
avoid these situations: If the pSync array is initialized at run time, some type of
|
||||
synchronization is needed to ensure that all PEs in the working set have initialized pSync
|
||||
before any of them enter a SHMEM routine called with the pSync synchronization array. A
|
||||
pSync array may be reused on a subsequent SHMEM broadcast routine only if none of the PEs
|
||||
in the active set are still processing a prior SHMEM alltoall routine call that used the same
|
||||
pSync array. In general, this can be ensured only by doing some type of synchronization.
|
||||
However, in the special case of SHMEM routines being called with the same active set, you
|
||||
can allocate two pSync arrays and alternate between them on successive calls.
|
||||
.PP
|
||||
.SH EXAMPLES
|
||||
|
||||
.PP
|
||||
C/C++ example:
|
||||
.Vb
|
||||
#include <shmem.h>
|
||||
#include <stdio.h>
|
||||
long pSync[SHMEM_ALLTOALL_SYNC_SIZE];
|
||||
int main(void)
|
||||
{
|
||||
int64_t *source, *dest;
|
||||
int i, count, pe;
|
||||
shmem_init();
|
||||
count = 2;
|
||||
dest = (int64_t*) shmem_malloc(count * shmem_n_pes() * sizeof(int64_t));
|
||||
source = (int64_t*) shmem_malloc(count * shmem_n_pes() * sizeof(int64_t));
|
||||
/* assign source values */
|
||||
for (pe=0; pe <shmem_n_pes(); pe++){
|
||||
for (i=0; i<count; i++){
|
||||
source[(pe*count)+i] = shmem_my_pe() + pe;
|
||||
dest[(pe*count)+i] = 9999;
|
||||
}
|
||||
}
|
||||
for (i=0; i< SHMEM_ALLTOALLS_SYNC_SIZE; i++) {
|
||||
pSync[i] = SHMEM_SYNC_VALUE;
|
||||
}
|
||||
/* wait for all PEs to initialize pSync */
|
||||
shmem_barrier_all();
|
||||
/* alltoalls on all PES */
|
||||
shmem_alltoalls64(dest, source, 1, 1, count, 0, 0, shmem_n_pes(), pSync);
|
||||
/* verify results */
|
||||
for (pe=0; pe<shmem_n_pes(); pe++) {
|
||||
for (i=0; i<count; i++){
|
||||
if (dest[(pe*count)+i] != shmem_my_pe() + pe) {
|
||||
printf("[%d] ERROR: dest[%d]=%ld, should be %d\n",
|
||||
shmem_my_pe(),(pe*count)+i,dest[(pe*count)+i],
|
||||
shmem_n_pes() + pe);
|
||||
}
|
||||
}
|
||||
}
|
||||
shmem_barrier_all();
|
||||
shmem_free(dest);
|
||||
shmem_free(source);
|
||||
shmem_finalize();
|
||||
return 0;
|
||||
}
|
||||
.PP
|
||||
.SH SEE ALSO
|
||||
|
||||
\fIintro_shmem\fP(3)
|
1
oshmem/shmem/man/man3/shmem_alltoall64.3in
Обычный файл
1
oshmem/shmem/man/man3/shmem_alltoall64.3in
Обычный файл
@ -0,0 +1 @@
|
||||
.so man3/shmem_alltoall32.3
|
1
oshmem/shmem/man/man3/shmem_alltoalls32.3in
Обычный файл
1
oshmem/shmem/man/man3/shmem_alltoalls32.3in
Обычный файл
@ -0,0 +1 @@
|
||||
.so man3/shmem_alltoall32.3
|
1
oshmem/shmem/man/man3/shmem_alltoalls64.3in
Обычный файл
1
oshmem/shmem/man/man3/shmem_alltoalls64.3in
Обычный файл
@ -0,0 +1 @@
|
||||
.so man3/shmem_alltoall32.3
|
Загрузка…
Ссылка в новой задаче
Block a user