Merge pull request #1478 from igor-ivanov/pr/oshmem-v1.3-alltoall
oshmem: Add alltoall
Этот коммит содержится в:
Коммит
1d8fbfefb0
@ -101,6 +101,7 @@ enum shmem_wait_ops {
|
|||||||
#define _SHMEM_BCAST_SYNC_SIZE (1 + _SHMEM_BARRIER_SYNC_SIZE)
|
#define _SHMEM_BCAST_SYNC_SIZE (1 + _SHMEM_BARRIER_SYNC_SIZE)
|
||||||
#define _SHMEM_COLLECT_SYNC_SIZE (1 + _SHMEM_BCAST_SYNC_SIZE)
|
#define _SHMEM_COLLECT_SYNC_SIZE (1 + _SHMEM_BCAST_SYNC_SIZE)
|
||||||
#define _SHMEM_REDUCE_SYNC_SIZE (1 + _SHMEM_BCAST_SYNC_SIZE)
|
#define _SHMEM_REDUCE_SYNC_SIZE (1 + _SHMEM_BCAST_SYNC_SIZE)
|
||||||
|
#define _SHMEM_ALLTOALL_SYNC_SIZE (1)
|
||||||
#define _SHMEM_REDUCE_MIN_WRKDATA_SIZE (1)
|
#define _SHMEM_REDUCE_MIN_WRKDATA_SIZE (1)
|
||||||
#define _SHMEM_SYNC_VALUE (-1)
|
#define _SHMEM_SYNC_VALUE (-1)
|
||||||
|
|
||||||
@ -108,6 +109,7 @@ enum shmem_wait_ops {
|
|||||||
#define SHMEM_BCAST_SYNC_SIZE _SHMEM_BCAST_SYNC_SIZE
|
#define SHMEM_BCAST_SYNC_SIZE _SHMEM_BCAST_SYNC_SIZE
|
||||||
#define SHMEM_COLLECT_SYNC_SIZE _SHMEM_COLLECT_SYNC_SIZE
|
#define SHMEM_COLLECT_SYNC_SIZE _SHMEM_COLLECT_SYNC_SIZE
|
||||||
#define SHMEM_REDUCE_SYNC_SIZE _SHMEM_REDUCE_SYNC_SIZE
|
#define SHMEM_REDUCE_SYNC_SIZE _SHMEM_REDUCE_SYNC_SIZE
|
||||||
|
#define SHMEM_ALLTOALL_SYNC_SIZE _SHMEM_ALLTOALL_SYNC_SIZE
|
||||||
#define SHMEM_REDUCE_MIN_WRKDATA_SIZE _SHMEM_REDUCE_MIN_WRKDATA_SIZE
|
#define SHMEM_REDUCE_MIN_WRKDATA_SIZE _SHMEM_REDUCE_MIN_WRKDATA_SIZE
|
||||||
#define SHMEM_SYNC_VALUE _SHMEM_SYNC_VALUE
|
#define SHMEM_SYNC_VALUE _SHMEM_SYNC_VALUE
|
||||||
|
|
||||||
@ -344,6 +346,11 @@ OSHMEM_DECLSPEC void shmem_collect32(void *target, const void *source, size_t nl
|
|||||||
OSHMEM_DECLSPEC void shmem_collect64(void *target, const void *source, size_t nlong, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
OSHMEM_DECLSPEC void shmem_collect64(void *target, const void *source, size_t nlong, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||||
OSHMEM_DECLSPEC void shmem_fcollect32(void *target, const void *source, size_t nlong, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
OSHMEM_DECLSPEC void shmem_fcollect32(void *target, const void *source, size_t nlong, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||||
OSHMEM_DECLSPEC void shmem_fcollect64(void *target, const void *source, size_t nlong, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
OSHMEM_DECLSPEC void shmem_fcollect64(void *target, const void *source, size_t nlong, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||||
|
OSHMEM_DECLSPEC void shmem_alltoall32(void *target, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||||
|
OSHMEM_DECLSPEC void shmem_alltoall64(void *target, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||||
|
OSHMEM_DECLSPEC void shmem_alltoalls32(void *target, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||||
|
OSHMEM_DECLSPEC void shmem_alltoalls64(void *target, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Reduction routines
|
* Reduction routines
|
||||||
|
@ -46,6 +46,7 @@ static void scoll_base_module_construct(mca_scoll_base_module_t *m)
|
|||||||
m->scoll_broadcast = NULL;
|
m->scoll_broadcast = NULL;
|
||||||
m->scoll_collect = NULL;
|
m->scoll_collect = NULL;
|
||||||
m->scoll_reduce = NULL;
|
m->scoll_reduce = NULL;
|
||||||
|
m->scoll_alltoall = NULL;
|
||||||
m->scoll_module_enable = NULL;
|
m->scoll_module_enable = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -118,6 +118,22 @@ static int scoll_null_reduce(struct oshmem_group_t *group,
|
|||||||
return OSHMEM_SUCCESS;
|
return OSHMEM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int scoll_null_alltoall(struct oshmem_group_t *group,
|
||||||
|
void *target,
|
||||||
|
const void *source,
|
||||||
|
ptrdiff_t dst, ptrdiff_t sst,
|
||||||
|
size_t nlong,
|
||||||
|
long *pSync,
|
||||||
|
int alg)
|
||||||
|
{
|
||||||
|
if (oshmem_proc_group_is_member(group)) {
|
||||||
|
SCOLL_ERROR("internal error");
|
||||||
|
oshmem_shmem_abort(-1);
|
||||||
|
return OSHMEM_ERROR;
|
||||||
|
}
|
||||||
|
return OSHMEM_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Stuff for the OBJ interface
|
* Stuff for the OBJ interface
|
||||||
*/
|
*/
|
||||||
@ -160,6 +176,7 @@ int mca_scoll_base_group_unselect(struct oshmem_group_t * group)
|
|||||||
CLOSE(group, broadcast);
|
CLOSE(group, broadcast);
|
||||||
CLOSE(group, collect);
|
CLOSE(group, collect);
|
||||||
CLOSE(group, reduce);
|
CLOSE(group, reduce);
|
||||||
|
CLOSE(group, alltoall);
|
||||||
|
|
||||||
/* All done */
|
/* All done */
|
||||||
return OSHMEM_SUCCESS;
|
return OSHMEM_SUCCESS;
|
||||||
@ -184,6 +201,7 @@ int mca_scoll_base_select(struct oshmem_group_t *group)
|
|||||||
group->g_scoll.scoll_broadcast = scoll_null_broadcast;
|
group->g_scoll.scoll_broadcast = scoll_null_broadcast;
|
||||||
group->g_scoll.scoll_collect = scoll_null_collect;
|
group->g_scoll.scoll_collect = scoll_null_collect;
|
||||||
group->g_scoll.scoll_reduce = scoll_null_reduce;
|
group->g_scoll.scoll_reduce = scoll_null_reduce;
|
||||||
|
group->g_scoll.scoll_alltoall = scoll_null_alltoall;
|
||||||
return OSHMEM_SUCCESS;
|
return OSHMEM_SUCCESS;
|
||||||
}
|
}
|
||||||
SCOLL_VERBOSE(10,
|
SCOLL_VERBOSE(10,
|
||||||
@ -206,10 +224,11 @@ int mca_scoll_base_select(struct oshmem_group_t *group)
|
|||||||
if (OSHMEM_SUCCESS != ret) {
|
if (OSHMEM_SUCCESS != ret) {
|
||||||
mca_scoll_base_group_unselect(group);
|
mca_scoll_base_group_unselect(group);
|
||||||
} else {
|
} else {
|
||||||
|
COPY(avail->ac_module, group, barrier);
|
||||||
COPY(avail->ac_module, group, broadcast);
|
COPY(avail->ac_module, group, broadcast);
|
||||||
COPY(avail->ac_module, group, collect);
|
COPY(avail->ac_module, group, collect);
|
||||||
COPY(avail->ac_module, group, reduce);
|
COPY(avail->ac_module, group, reduce);
|
||||||
COPY(avail->ac_module, group, barrier);
|
COPY(avail->ac_module, group, alltoall);
|
||||||
}
|
}
|
||||||
OBJ_RELEASE(avail->ac_module);
|
OBJ_RELEASE(avail->ac_module);
|
||||||
OBJ_RELEASE(avail);
|
OBJ_RELEASE(avail);
|
||||||
@ -220,7 +239,8 @@ int mca_scoll_base_select(struct oshmem_group_t *group)
|
|||||||
if ((NULL == group->g_scoll.scoll_barrier)
|
if ((NULL == group->g_scoll.scoll_barrier)
|
||||||
|| (NULL == group->g_scoll.scoll_broadcast)
|
|| (NULL == group->g_scoll.scoll_broadcast)
|
||||||
|| (NULL == group->g_scoll.scoll_collect)
|
|| (NULL == group->g_scoll.scoll_collect)
|
||||||
|| (NULL == group->g_scoll.scoll_reduce)) {
|
|| (NULL == group->g_scoll.scoll_reduce)
|
||||||
|
|| (NULL == group->g_scoll.scoll_alltoall)) {
|
||||||
mca_scoll_base_group_unselect(group);
|
mca_scoll_base_group_unselect(group);
|
||||||
return OSHMEM_ERR_NOT_FOUND;
|
return OSHMEM_ERR_NOT_FOUND;
|
||||||
}
|
}
|
||||||
@ -228,8 +248,8 @@ int mca_scoll_base_select(struct oshmem_group_t *group)
|
|||||||
return OSHMEM_SUCCESS;
|
return OSHMEM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int avail_coll_compare (opal_list_item_t **a,
|
static int avail_coll_compare(opal_list_item_t **a,
|
||||||
opal_list_item_t **b)
|
opal_list_item_t **b)
|
||||||
{
|
{
|
||||||
avail_com_t *acom = (avail_com_t *) *a;
|
avail_com_t *acom = (avail_com_t *) *a;
|
||||||
avail_com_t *bcom = (avail_com_t *) *b;
|
avail_com_t *bcom = (avail_com_t *) *b;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
#
|
#
|
||||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
# Copyright (c) 2013-2016 Mellanox Technologies, Inc.
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
# $COPYRIGHT$
|
# $COPYRIGHT$
|
||||||
#
|
#
|
||||||
@ -15,7 +15,8 @@ sources = \
|
|||||||
scoll_basic_barrier.c \
|
scoll_basic_barrier.c \
|
||||||
scoll_basic_broadcast.c \
|
scoll_basic_broadcast.c \
|
||||||
scoll_basic_collect.c \
|
scoll_basic_collect.c \
|
||||||
scoll_basic_reduce.c
|
scoll_basic_reduce.c \
|
||||||
|
scoll_basic_alltoall.c
|
||||||
|
|
||||||
|
|
||||||
# Make the output library in this directory, and name it either
|
# Make the output library in this directory, and name it either
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
* Copyright (c) 2013-2016 Mellanox Technologies, Inc.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -77,6 +77,14 @@ int mca_scoll_basic_reduce(struct oshmem_group_t *group,
|
|||||||
long *pSync,
|
long *pSync,
|
||||||
void *pWrk,
|
void *pWrk,
|
||||||
int alg);
|
int alg);
|
||||||
|
int mca_scoll_basic_alltoall(struct oshmem_group_t *group,
|
||||||
|
void *target,
|
||||||
|
const void *source,
|
||||||
|
ptrdiff_t dst, ptrdiff_t sst,
|
||||||
|
size_t nelems,
|
||||||
|
size_t element_size,
|
||||||
|
long *pSync,
|
||||||
|
int alg);
|
||||||
|
|
||||||
static inline unsigned int scoll_log2(unsigned long val)
|
static inline unsigned int scoll_log2(unsigned long val)
|
||||||
{
|
{
|
||||||
|
118
oshmem/mca/scoll/basic/scoll_basic_alltoall.c
Обычный файл
118
oshmem/mca/scoll/basic/scoll_basic_alltoall.c
Обычный файл
@ -0,0 +1,118 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016 Mellanox Technologies, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "oshmem_config.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "oshmem/constants.h"
|
||||||
|
#include "oshmem/op/op.h"
|
||||||
|
#include "oshmem/mca/spml/spml.h"
|
||||||
|
#include "oshmem/mca/scoll/scoll.h"
|
||||||
|
#include "oshmem/mca/scoll/base/base.h"
|
||||||
|
#include "scoll_basic.h"
|
||||||
|
|
||||||
|
static int _algorithm_simple(struct oshmem_group_t *group,
|
||||||
|
void *target,
|
||||||
|
const void *source,
|
||||||
|
ptrdiff_t dst, ptrdiff_t sst,
|
||||||
|
size_t nelems,
|
||||||
|
size_t element_size,
|
||||||
|
long *pSync);
|
||||||
|
|
||||||
|
int mca_scoll_basic_alltoall(struct oshmem_group_t *group,
|
||||||
|
void *target,
|
||||||
|
const void *source,
|
||||||
|
ptrdiff_t dst, ptrdiff_t sst,
|
||||||
|
size_t nelems,
|
||||||
|
size_t element_size,
|
||||||
|
long *pSync,
|
||||||
|
int alg)
|
||||||
|
{
|
||||||
|
int rc = OSHMEM_SUCCESS;
|
||||||
|
|
||||||
|
/* Arguments validation */
|
||||||
|
if (!group) {
|
||||||
|
SCOLL_ERROR("Active set (group) of PE is not defined");
|
||||||
|
rc = OSHMEM_ERR_BAD_PARAM;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if this PE is part of the group */
|
||||||
|
if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) {
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
|
if (pSync) {
|
||||||
|
rc = _algorithm_simple(group,
|
||||||
|
target,
|
||||||
|
source,
|
||||||
|
dst,
|
||||||
|
sst,
|
||||||
|
nelems,
|
||||||
|
element_size,
|
||||||
|
pSync);
|
||||||
|
} else {
|
||||||
|
SCOLL_ERROR("Incorrect argument pSync");
|
||||||
|
rc = OSHMEM_ERR_BAD_PARAM;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Restore initial values */
|
||||||
|
SCOLL_VERBOSE(12,
|
||||||
|
"PE#%d Restore special synchronization array",
|
||||||
|
group->my_pe);
|
||||||
|
for (i = 0; pSync && (i < _SHMEM_ALLTOALL_SYNC_SIZE); i++) {
|
||||||
|
pSync[i] = _SHMEM_SYNC_VALUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int _algorithm_simple(struct oshmem_group_t *group,
|
||||||
|
void *target,
|
||||||
|
const void *source,
|
||||||
|
ptrdiff_t tst, ptrdiff_t sst,
|
||||||
|
size_t nelems,
|
||||||
|
size_t element_size,
|
||||||
|
long *pSync)
|
||||||
|
{
|
||||||
|
int rc = OSHMEM_SUCCESS;
|
||||||
|
int pe_cur;
|
||||||
|
int i;
|
||||||
|
int j;
|
||||||
|
int k;
|
||||||
|
|
||||||
|
SCOLL_VERBOSE(14,
|
||||||
|
"[#%d] send data to all PE in the group",
|
||||||
|
group->my_pe);
|
||||||
|
j = oshmem_proc_group_find_id(group, group->my_pe);
|
||||||
|
for (i = 0; i < group->proc_count; i++) {
|
||||||
|
/* index permutation for better distribution of traffic */
|
||||||
|
k = (((j)+(i))%(group->proc_count));
|
||||||
|
pe_cur = oshmem_proc_pe(group->proc_array[k]);
|
||||||
|
rc = MCA_SPML_CALL(put(
|
||||||
|
(void *)((char *)target + j * tst * nelems * element_size),
|
||||||
|
nelems * element_size,
|
||||||
|
(void *)((char *)source + i * sst * nelems * element_size),
|
||||||
|
pe_cur));
|
||||||
|
if (OSHMEM_SUCCESS != rc) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wait for operation completion */
|
||||||
|
if (rc == OSHMEM_SUCCESS) {
|
||||||
|
SCOLL_VERBOSE(14, "[#%d] Wait for operation completion", group->my_pe);
|
||||||
|
rc = BARRIER_FUNC(group,
|
||||||
|
(pSync + 1),
|
||||||
|
SCOLL_DEFAULT_ALG);
|
||||||
|
}
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
}
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
* Copyright (c) 2013-2016 Mellanox Technologies, Inc.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -52,9 +52,10 @@ mca_scoll_basic_query(struct oshmem_group_t *group, int *priority)
|
|||||||
module->super.scoll_broadcast = mca_scoll_basic_broadcast;
|
module->super.scoll_broadcast = mca_scoll_basic_broadcast;
|
||||||
module->super.scoll_collect = mca_scoll_basic_collect;
|
module->super.scoll_collect = mca_scoll_basic_collect;
|
||||||
module->super.scoll_reduce = mca_scoll_basic_reduce;
|
module->super.scoll_reduce = mca_scoll_basic_reduce;
|
||||||
|
module->super.scoll_alltoall = mca_scoll_basic_alltoall;
|
||||||
module->super.scoll_module_enable = mca_scoll_basic_enable;
|
module->super.scoll_module_enable = mca_scoll_basic_enable;
|
||||||
return &(module->super);
|
return &(module->super);
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL ;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -93,6 +93,8 @@ struct mca_scoll_fca_module_t {
|
|||||||
mca_scoll_base_module_t *previous_collect_module;
|
mca_scoll_base_module_t *previous_collect_module;
|
||||||
mca_scoll_base_module_reduce_fn_t previous_reduce;
|
mca_scoll_base_module_reduce_fn_t previous_reduce;
|
||||||
mca_scoll_base_module_t *previous_reduce_module;
|
mca_scoll_base_module_t *previous_reduce_module;
|
||||||
|
mca_scoll_base_module_alltoall_fn_t previous_alltoall;
|
||||||
|
mca_scoll_base_module_t *previous_alltoall_module;
|
||||||
};
|
};
|
||||||
typedef struct mca_scoll_fca_module_t mca_scoll_fca_module_t;
|
typedef struct mca_scoll_fca_module_t mca_scoll_fca_module_t;
|
||||||
OBJ_CLASS_DECLARATION(mca_scoll_fca_module_t);
|
OBJ_CLASS_DECLARATION(mca_scoll_fca_module_t);
|
||||||
|
@ -391,6 +391,7 @@ static int _save_coll_handlers(mca_scoll_fca_module_t *fca_module)
|
|||||||
FCA_SAVE_PREV_SCOLL_API(broadcast);
|
FCA_SAVE_PREV_SCOLL_API(broadcast);
|
||||||
FCA_SAVE_PREV_SCOLL_API(collect);
|
FCA_SAVE_PREV_SCOLL_API(collect);
|
||||||
FCA_SAVE_PREV_SCOLL_API(reduce);
|
FCA_SAVE_PREV_SCOLL_API(reduce);
|
||||||
|
FCA_SAVE_PREV_SCOLL_API(alltoall);
|
||||||
|
|
||||||
return OSHMEM_SUCCESS;
|
return OSHMEM_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -450,6 +451,7 @@ static void mca_scoll_fca_module_clear(mca_scoll_fca_module_t *fca_module)
|
|||||||
fca_module->previous_broadcast = NULL;
|
fca_module->previous_broadcast = NULL;
|
||||||
fca_module->previous_collect = NULL;
|
fca_module->previous_collect = NULL;
|
||||||
fca_module->previous_reduce = NULL;
|
fca_module->previous_reduce = NULL;
|
||||||
|
fca_module->previous_alltoall = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mca_scoll_fca_module_construct(mca_scoll_fca_module_t *fca_module)
|
static void mca_scoll_fca_module_construct(mca_scoll_fca_module_t *fca_module)
|
||||||
@ -465,6 +467,7 @@ static void mca_scoll_fca_module_destruct(mca_scoll_fca_module_t *fca_module)
|
|||||||
OBJ_RELEASE(fca_module->previous_broadcast_module);
|
OBJ_RELEASE(fca_module->previous_broadcast_module);
|
||||||
OBJ_RELEASE(fca_module->previous_collect_module);
|
OBJ_RELEASE(fca_module->previous_collect_module);
|
||||||
OBJ_RELEASE(fca_module->previous_reduce_module);
|
OBJ_RELEASE(fca_module->previous_reduce_module);
|
||||||
|
OBJ_RELEASE(fca_module->previous_alltoall_module);
|
||||||
if (fca_module->fca_comm)
|
if (fca_module->fca_comm)
|
||||||
_destroy_fca_comm(fca_module);
|
_destroy_fca_comm(fca_module);
|
||||||
free(fca_module->local_ranks);
|
free(fca_module->local_ranks);
|
||||||
@ -541,6 +544,7 @@ mca_scoll_fca_comm_query(struct oshmem_group_t *comm, int *priority)
|
|||||||
fca_module->super.scoll_broadcast =
|
fca_module->super.scoll_broadcast =
|
||||||
mca_scoll_fca_component.fca_enable_bcast ? mca_scoll_fca_broadcast :
|
mca_scoll_fca_component.fca_enable_bcast ? mca_scoll_fca_broadcast :
|
||||||
NULL;
|
NULL;
|
||||||
|
fca_module->super.scoll_alltoall = NULL;
|
||||||
|
|
||||||
*priority = mca_scoll_fca_component.fca_priority;
|
*priority = mca_scoll_fca_component.fca_priority;
|
||||||
module = &fca_module->super;
|
module = &fca_module->super;
|
||||||
|
@ -69,6 +69,8 @@ struct mca_scoll_mpi_module_t {
|
|||||||
mca_scoll_base_module_t *previous_barrier_module;
|
mca_scoll_base_module_t *previous_barrier_module;
|
||||||
mca_scoll_base_module_collect_fn_t previous_collect;
|
mca_scoll_base_module_collect_fn_t previous_collect;
|
||||||
mca_scoll_base_module_t *previous_collect_module;
|
mca_scoll_base_module_t *previous_collect_module;
|
||||||
|
mca_scoll_base_module_alltoall_fn_t previous_alltoall;
|
||||||
|
mca_scoll_base_module_t *previous_alltoall_module;
|
||||||
};
|
};
|
||||||
typedef struct mca_scoll_mpi_module_t mca_scoll_mpi_module_t;
|
typedef struct mca_scoll_mpi_module_t mca_scoll_mpi_module_t;
|
||||||
|
|
||||||
|
@ -29,6 +29,7 @@ static void mca_scoll_mpi_module_clear(mca_scoll_mpi_module_t *mpi_module)
|
|||||||
mpi_module->previous_broadcast = NULL;
|
mpi_module->previous_broadcast = NULL;
|
||||||
mpi_module->previous_reduce = NULL;
|
mpi_module->previous_reduce = NULL;
|
||||||
mpi_module->previous_collect = NULL;
|
mpi_module->previous_collect = NULL;
|
||||||
|
mpi_module->previous_alltoall = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mca_scoll_mpi_module_construct(mca_scoll_mpi_module_t *mpi_module)
|
static void mca_scoll_mpi_module_construct(mca_scoll_mpi_module_t *mpi_module)
|
||||||
@ -43,6 +44,7 @@ static void mca_scoll_mpi_module_destruct(mca_scoll_mpi_module_t *mpi_module)
|
|||||||
OBJ_RELEASE(mpi_module->previous_broadcast_module);
|
OBJ_RELEASE(mpi_module->previous_broadcast_module);
|
||||||
OBJ_RELEASE(mpi_module->previous_reduce_module);
|
OBJ_RELEASE(mpi_module->previous_reduce_module);
|
||||||
OBJ_RELEASE(mpi_module->previous_collect_module);
|
OBJ_RELEASE(mpi_module->previous_collect_module);
|
||||||
|
OBJ_RELEASE(mpi_module->previous_alltoall_module);
|
||||||
|
|
||||||
mca_scoll_mpi_module_clear(mpi_module);
|
mca_scoll_mpi_module_clear(mpi_module);
|
||||||
/* Free ompi_comm */
|
/* Free ompi_comm */
|
||||||
@ -68,6 +70,7 @@ static int mca_scoll_mpi_save_coll_handlers(mca_scoll_base_module_t *module, osh
|
|||||||
MPI_SAVE_PREV_SCOLL_API(broadcast);
|
MPI_SAVE_PREV_SCOLL_API(broadcast);
|
||||||
MPI_SAVE_PREV_SCOLL_API(reduce);
|
MPI_SAVE_PREV_SCOLL_API(reduce);
|
||||||
MPI_SAVE_PREV_SCOLL_API(collect);
|
MPI_SAVE_PREV_SCOLL_API(collect);
|
||||||
|
MPI_SAVE_PREV_SCOLL_API(alltoall);
|
||||||
return OSHMEM_SUCCESS;
|
return OSHMEM_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -173,6 +176,7 @@ mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority)
|
|||||||
mpi_module->super.scoll_broadcast = mca_scoll_mpi_broadcast;
|
mpi_module->super.scoll_broadcast = mca_scoll_mpi_broadcast;
|
||||||
mpi_module->super.scoll_reduce = mca_scoll_mpi_reduce;
|
mpi_module->super.scoll_reduce = mca_scoll_mpi_reduce;
|
||||||
mpi_module->super.scoll_collect = mca_scoll_mpi_collect;
|
mpi_module->super.scoll_collect = mca_scoll_mpi_collect;
|
||||||
|
mpi_module->super.scoll_alltoall = NULL;
|
||||||
|
|
||||||
*priority = cm->mpi_priority;
|
*priority = cm->mpi_priority;
|
||||||
module = &mpi_module->super;
|
module = &mpi_module->super;
|
||||||
|
@ -89,7 +89,6 @@ typedef struct mca_scoll_base_component_1_0_0_t mca_scoll_base_component_t;
|
|||||||
typedef int
|
typedef int
|
||||||
(*mca_scoll_base_module_enable_1_0_0_fn_t)(struct mca_scoll_base_module_1_0_0_t* module,
|
(*mca_scoll_base_module_enable_1_0_0_fn_t)(struct mca_scoll_base_module_1_0_0_t* module,
|
||||||
struct oshmem_group_t *comm);
|
struct oshmem_group_t *comm);
|
||||||
typedef int (*mca_scoll_base_module_ft_event_fn_t)(int state);
|
|
||||||
|
|
||||||
#define SCOLL_DEFAULT_ALG (-1)
|
#define SCOLL_DEFAULT_ALG (-1)
|
||||||
|
|
||||||
@ -139,6 +138,14 @@ typedef int (*mca_scoll_base_module_reduce_fn_t)(struct oshmem_group_t *group,
|
|||||||
long *pSync,
|
long *pSync,
|
||||||
void *pWrk,
|
void *pWrk,
|
||||||
int alg);
|
int alg);
|
||||||
|
typedef int (*mca_scoll_base_module_alltoall_fn_t)(struct oshmem_group_t *group,
|
||||||
|
void *target,
|
||||||
|
const void *source,
|
||||||
|
ptrdiff_t dst, ptrdiff_t sst,
|
||||||
|
size_t nelems,
|
||||||
|
size_t element_size,
|
||||||
|
long *pSync,
|
||||||
|
int alg);
|
||||||
|
|
||||||
struct mca_scoll_base_module_1_0_0_t {
|
struct mca_scoll_base_module_1_0_0_t {
|
||||||
/** Collective modules all inherit from opal_object */
|
/** Collective modules all inherit from opal_object */
|
||||||
@ -149,13 +156,14 @@ struct mca_scoll_base_module_1_0_0_t {
|
|||||||
mca_scoll_base_module_broadcast_fn_t scoll_broadcast;
|
mca_scoll_base_module_broadcast_fn_t scoll_broadcast;
|
||||||
mca_scoll_base_module_collect_fn_t scoll_collect;
|
mca_scoll_base_module_collect_fn_t scoll_collect;
|
||||||
mca_scoll_base_module_reduce_fn_t scoll_reduce;
|
mca_scoll_base_module_reduce_fn_t scoll_reduce;
|
||||||
|
mca_scoll_base_module_alltoall_fn_t scoll_alltoall;
|
||||||
mca_scoll_base_module_enable_1_0_0_fn_t scoll_module_enable;
|
mca_scoll_base_module_enable_1_0_0_fn_t scoll_module_enable;
|
||||||
};
|
};
|
||||||
typedef struct mca_scoll_base_module_1_0_0_t mca_scoll_base_module_1_0_0_t;
|
typedef struct mca_scoll_base_module_1_0_0_t mca_scoll_base_module_1_0_0_t;
|
||||||
|
|
||||||
/** Per guidence in mca.h, use the unversioned struct name if you just
|
/** Per guidance in mca.h, use the unversioned struct name if you just
|
||||||
want to always keep up with the most recent version of the
|
want to always keep up with the most recent version of the
|
||||||
interace. */
|
interface. */
|
||||||
typedef struct mca_scoll_base_module_1_0_0_t mca_scoll_base_module_t;
|
typedef struct mca_scoll_base_module_1_0_0_t mca_scoll_base_module_t;
|
||||||
OSHMEM_DECLSPEC OBJ_CLASS_DECLARATION(mca_scoll_base_module_t);
|
OSHMEM_DECLSPEC OBJ_CLASS_DECLARATION(mca_scoll_base_module_t);
|
||||||
|
|
||||||
@ -171,7 +179,7 @@ OSHMEM_DECLSPEC OBJ_CLASS_DECLARATION(mca_scoll_base_module_t);
|
|||||||
/*
|
/*
|
||||||
* Collectives group cache structure
|
* Collectives group cache structure
|
||||||
*
|
*
|
||||||
* Collectives gorup cache structure, used to find functions to
|
* Collectives group cache structure, used to find functions to
|
||||||
* implement collective algorithms and their associated modules.
|
* implement collective algorithms and their associated modules.
|
||||||
*/
|
*/
|
||||||
struct mca_scoll_base_group_scoll_t {
|
struct mca_scoll_base_group_scoll_t {
|
||||||
@ -183,6 +191,8 @@ struct mca_scoll_base_group_scoll_t {
|
|||||||
mca_scoll_base_module_1_0_0_t *scoll_collect_module;
|
mca_scoll_base_module_1_0_0_t *scoll_collect_module;
|
||||||
mca_scoll_base_module_reduce_fn_t scoll_reduce;
|
mca_scoll_base_module_reduce_fn_t scoll_reduce;
|
||||||
mca_scoll_base_module_1_0_0_t *scoll_reduce_module;
|
mca_scoll_base_module_1_0_0_t *scoll_reduce_module;
|
||||||
|
mca_scoll_base_module_alltoall_fn_t scoll_alltoall;
|
||||||
|
mca_scoll_base_module_1_0_0_t *scoll_alltoall_module;
|
||||||
};
|
};
|
||||||
typedef struct mca_scoll_base_group_scoll_t mca_scoll_base_group_scoll_t;
|
typedef struct mca_scoll_base_group_scoll_t mca_scoll_base_group_scoll_t;
|
||||||
|
|
||||||
|
@ -31,6 +31,7 @@ OSHMEM_API_SOURCES = \
|
|||||||
shmem_get.c \
|
shmem_get.c \
|
||||||
shmem_broadcast.c \
|
shmem_broadcast.c \
|
||||||
shmem_collect.c \
|
shmem_collect.c \
|
||||||
|
shmem_alltoall.c \
|
||||||
shmem_ptr.c \
|
shmem_ptr.c \
|
||||||
shmem_pe_accessible.c \
|
shmem_pe_accessible.c \
|
||||||
shmem_addr_accessible.c \
|
shmem_addr_accessible.c \
|
||||||
|
@ -41,6 +41,7 @@ OSHMEM_API_SOURCES = \
|
|||||||
pshmem_put.c \
|
pshmem_put.c \
|
||||||
pshmem_g.c \
|
pshmem_g.c \
|
||||||
pshmem_get.c \
|
pshmem_get.c \
|
||||||
|
pshmem_alltoall.c \
|
||||||
pshmem_broadcast.c \
|
pshmem_broadcast.c \
|
||||||
pshmem_collect.c \
|
pshmem_collect.c \
|
||||||
pshmem_ptr.c \
|
pshmem_ptr.c \
|
||||||
|
@ -360,6 +360,14 @@
|
|||||||
#define shmemx_int32_prod_to_all pshmemx_int32_prod_to_all
|
#define shmemx_int32_prod_to_all pshmemx_int32_prod_to_all
|
||||||
#define shmemx_int64_prod_to_all pshmemx_int64_prod_to_all
|
#define shmemx_int64_prod_to_all pshmemx_int64_prod_to_all
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Alltoall routines
|
||||||
|
*/
|
||||||
|
#define shmem_alltoall32 pshmem_alltoall32
|
||||||
|
#define shmem_alltoall64 pshmem_alltoall64
|
||||||
|
#define shmem_alltoalls32 pshmem_alltoalls32
|
||||||
|
#define shmem_alltoalls64 pshmem_alltoalls64
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Platform specific cache management routines
|
* Platform specific cache management routines
|
||||||
*/
|
*/
|
||||||
|
137
oshmem/shmem/c/shmem_alltoall.c
Обычный файл
137
oshmem/shmem/c/shmem_alltoall.c
Обычный файл
@ -0,0 +1,137 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016 Mellanox Technologies, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
#include "oshmem_config.h"
|
||||||
|
|
||||||
|
#include "oshmem/constants.h"
|
||||||
|
#include "oshmem/include/shmem.h"
|
||||||
|
|
||||||
|
#include "orte/mca/grpcomm/grpcomm.h"
|
||||||
|
|
||||||
|
#include "oshmem/runtime/runtime.h"
|
||||||
|
|
||||||
|
#include "oshmem/mca/scoll/scoll.h"
|
||||||
|
|
||||||
|
#include "oshmem/proc/proc.h"
|
||||||
|
#include "oshmem/proc/proc_group_cache.h"
|
||||||
|
|
||||||
|
static void _shmem_alltoall(void *target,
|
||||||
|
const void *source,
|
||||||
|
ptrdiff_t dst, ptrdiff_t sst,
|
||||||
|
size_t nelems,
|
||||||
|
size_t element_size,
|
||||||
|
int PE_start,
|
||||||
|
int logPE_stride,
|
||||||
|
int PE_size,
|
||||||
|
long *pSync);
|
||||||
|
|
||||||
|
#define SHMEM_TYPE_ALLTOALL(name, element_size) \
|
||||||
|
void shmem##name(void *target, \
|
||||||
|
const void *source, \
|
||||||
|
size_t nelems, \
|
||||||
|
int PE_start, \
|
||||||
|
int logPE_stride, \
|
||||||
|
int PE_size, \
|
||||||
|
long *pSync) \
|
||||||
|
{ \
|
||||||
|
RUNTIME_CHECK_INIT(); \
|
||||||
|
RUNTIME_CHECK_ADDR(target); \
|
||||||
|
RUNTIME_CHECK_ADDR(source); \
|
||||||
|
\
|
||||||
|
_shmem_alltoall(target, source, 1, 1, nelems, element_size, \
|
||||||
|
PE_start, logPE_stride, PE_size, \
|
||||||
|
pSync); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SHMEM_TYPE_ALLTOALLS(name, element_size) \
|
||||||
|
void shmem##name(void *target, \
|
||||||
|
const void *source, \
|
||||||
|
ptrdiff_t dst, ptrdiff_t sst, \
|
||||||
|
size_t nelems, \
|
||||||
|
int PE_start, \
|
||||||
|
int logPE_stride, \
|
||||||
|
int PE_size, \
|
||||||
|
long *pSync) \
|
||||||
|
{ \
|
||||||
|
RUNTIME_CHECK_INIT(); \
|
||||||
|
RUNTIME_CHECK_ADDR(target); \
|
||||||
|
RUNTIME_CHECK_ADDR(source); \
|
||||||
|
\
|
||||||
|
_shmem_alltoall(target, source, dst, sst, nelems, element_size, \
|
||||||
|
PE_start, logPE_stride, PE_size, \
|
||||||
|
pSync); \
|
||||||
|
}
|
||||||
|
|
||||||
|
static void _shmem_alltoall(void *target,
|
||||||
|
const void *source,
|
||||||
|
ptrdiff_t dst, ptrdiff_t sst,
|
||||||
|
size_t nelems,
|
||||||
|
size_t element_size,
|
||||||
|
int PE_start,
|
||||||
|
int logPE_stride,
|
||||||
|
int PE_size,
|
||||||
|
long *pSync)
|
||||||
|
{
|
||||||
|
int rc = OSHMEM_SUCCESS;
|
||||||
|
oshmem_group_t* group = NULL;
|
||||||
|
|
||||||
|
if ((0 <= PE_start) && (0 <= logPE_stride)) {
|
||||||
|
/* Create group basing PE_start, logPE_stride and PE_size */
|
||||||
|
#if OSHMEM_GROUP_CACHE_ENABLED == 0
|
||||||
|
group = oshmem_proc_group_create(PE_start, (1 << logPE_stride), PE_size);
|
||||||
|
if (!group)
|
||||||
|
rc = OSHMEM_ERROR;
|
||||||
|
#else
|
||||||
|
group = find_group_in_cache(PE_start, logPE_stride, PE_size);
|
||||||
|
if (!group) {
|
||||||
|
group = oshmem_proc_group_create(PE_start,
|
||||||
|
(1 << logPE_stride),
|
||||||
|
PE_size);
|
||||||
|
if (!group) {
|
||||||
|
rc = OSHMEM_ERROR;
|
||||||
|
} else {
|
||||||
|
cache_group(group, PE_start, logPE_stride, PE_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* OSHMEM_GROUP_CACHE_ENABLED */
|
||||||
|
|
||||||
|
/* Collective operation call */
|
||||||
|
if (rc == OSHMEM_SUCCESS) {
|
||||||
|
/* Call collective alltoall operation */
|
||||||
|
rc = group->g_scoll.scoll_alltoall(group,
|
||||||
|
target,
|
||||||
|
source,
|
||||||
|
dst,
|
||||||
|
sst,
|
||||||
|
nelems,
|
||||||
|
element_size,
|
||||||
|
pSync,
|
||||||
|
SCOLL_DEFAULT_ALG);
|
||||||
|
}
|
||||||
|
#if OSHMEM_GROUP_CACHE_ENABLED == 0
|
||||||
|
if ( rc == OSHMEM_SUCCESS ) {
|
||||||
|
oshmem_proc_group_destroy(group);
|
||||||
|
}
|
||||||
|
#endif /* OSHMEM_GROUP_CACHE_ENABLED */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#if OSHMEM_PROFILING
|
||||||
|
#include "oshmem/include/pshmem.h"
|
||||||
|
#pragma weak shmem_alltoall32 = pshmem_alltoall32
|
||||||
|
#pragma weak shmem_alltoall64 = pshmem_alltoall64
|
||||||
|
#pragma weak shmem_alltoalls32 = pshmem_alltoalls32
|
||||||
|
#pragma weak shmem_alltoalls64 = pshmem_alltoalls64
|
||||||
|
#include "oshmem/shmem/c/profile/defines.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
SHMEM_TYPE_ALLTOALL(_alltoall32, sizeof(uint32_t))
|
||||||
|
SHMEM_TYPE_ALLTOALL(_alltoall64, sizeof(uint64_t))
|
||||||
|
SHMEM_TYPE_ALLTOALLS(_alltoalls32, sizeof(uint32_t))
|
||||||
|
SHMEM_TYPE_ALLTOALLS(_alltoalls64, sizeof(uint64_t))
|
@ -122,6 +122,7 @@ liboshmem_fortran_la_SOURCES += \
|
|||||||
shmem_prod_to_all_f.c \
|
shmem_prod_to_all_f.c \
|
||||||
shmem_collect_f.c \
|
shmem_collect_f.c \
|
||||||
shmem_broadcast_f.c \
|
shmem_broadcast_f.c \
|
||||||
|
shmem_alltoall_f.c \
|
||||||
shmem_lock_f.c \
|
shmem_lock_f.c \
|
||||||
shmem_cache_f.c \
|
shmem_cache_f.c \
|
||||||
shmem_int4_inc_f.c \
|
shmem_int4_inc_f.c \
|
||||||
|
@ -114,6 +114,7 @@ nodist_liboshmem_fortran_pshmem_la_SOURCES = \
|
|||||||
pshmem_prod_to_all_f.c \
|
pshmem_prod_to_all_f.c \
|
||||||
pshmem_collect_f.c \
|
pshmem_collect_f.c \
|
||||||
pshmem_broadcast_f.c \
|
pshmem_broadcast_f.c \
|
||||||
|
pshmem_alltoall_f.c \
|
||||||
pshmem_lock_f.c \
|
pshmem_lock_f.c \
|
||||||
pshmem_cache_f.c \
|
pshmem_cache_f.c \
|
||||||
pshmem_int4_inc_f.c \
|
pshmem_int4_inc_f.c \
|
||||||
|
@ -86,6 +86,22 @@
|
|||||||
#define shmem_broadcast64_ pshmem_broadcast64_
|
#define shmem_broadcast64_ pshmem_broadcast64_
|
||||||
#define shmem_broadcast64__ pshmem_broadcast64__
|
#define shmem_broadcast64__ pshmem_broadcast64__
|
||||||
|
|
||||||
|
#define SHMEM_ALLTOALL32 PSHMEM_ALLTOALL32
|
||||||
|
#define shmem_alltoall32_ pshmem_alltoall32_
|
||||||
|
#define shmem_alltoall32__ pshmem_alltoall32__
|
||||||
|
|
||||||
|
#define SHMEM_ALLTOALL64 PSHMEM_ALLTOALL64
|
||||||
|
#define shmem_alltoall64_ pshmem_alltoall64_
|
||||||
|
#define shmem_alltoall64__ pshmem_alltoall64__
|
||||||
|
|
||||||
|
#define SHMEM_ALLTOALLS32 PSHMEM_ALLTOALLS32
|
||||||
|
#define shmem_alltoalls32_ pshmem_alltoalls32_
|
||||||
|
#define shmem_alltoalls32__ pshmem_alltoalls32__
|
||||||
|
|
||||||
|
#define SHMEM_ALLTOALLS64 PSHMEM_ALLTOALLS64
|
||||||
|
#define shmem_alltoalls64_ pshmem_alltoalls64_
|
||||||
|
#define shmem_alltoalls64__ pshmem_alltoalls64__
|
||||||
|
|
||||||
#define SHMEM_SET_CACHE_INV PSHMEM_SET_CACHE_INV
|
#define SHMEM_SET_CACHE_INV PSHMEM_SET_CACHE_INV
|
||||||
#define shmem_set_cache_inv_ pshmem_set_cache_inv_
|
#define shmem_set_cache_inv_ pshmem_set_cache_inv_
|
||||||
#define shmem_set_cache_inv__ pshmem_set_cache_inv__
|
#define shmem_set_cache_inv__ pshmem_set_cache_inv__
|
||||||
|
@ -185,6 +185,10 @@ PN (void, pshmem_broadcast4, PSHMEM_BROADCAST4, (FORTRAN_POINTER_T target, FORTR
|
|||||||
PN (void, pshmem_broadcast8, PSHMEM_BROADCAST8, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_root, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
PN (void, pshmem_broadcast8, PSHMEM_BROADCAST8, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_root, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||||
PN (void, pshmem_broadcast32, PSHMEM_BROADCAST32, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_root, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
PN (void, pshmem_broadcast32, PSHMEM_BROADCAST32, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_root, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||||
PN (void, pshmem_broadcast64, PSHMEM_BROADCAST64, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_root, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
PN (void, pshmem_broadcast64, PSHMEM_BROADCAST64, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_root, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||||
|
PN (void, pshmem_alltoall32, PSHMEM_ALLTOALL32, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||||
|
PN (void, pshmem_alltoall64, PSHMEM_ALLTOALL64, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||||
|
PN (void, pshmem_alltoalls32, PSHMEM_ALLTOALLS32, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *dst, MPI_Fint *sst, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||||
|
PN (void, pshmem_alltoalls64, PSHMEM_ALLTOALLS64, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *dst, MPI_Fint *sst, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||||
PN (void, pshmem_set_lock, PSHMEM_SET_LOCK, (FORTRAN_POINTER_T lock));
|
PN (void, pshmem_set_lock, PSHMEM_SET_LOCK, (FORTRAN_POINTER_T lock));
|
||||||
PN (void, pshmem_clear_lock, PSHMEM_CLEAR_LOCK, (FORTRAN_POINTER_T lock));
|
PN (void, pshmem_clear_lock, PSHMEM_CLEAR_LOCK, (FORTRAN_POINTER_T lock));
|
||||||
PN (MPI_Fint, pshmem_test_lock, PSHMEM_TEST_LOCK, (FORTRAN_POINTER_T lock));
|
PN (MPI_Fint, pshmem_test_lock, PSHMEM_TEST_LOCK, (FORTRAN_POINTER_T lock));
|
||||||
|
@ -189,6 +189,10 @@ PN (void, shmem_broadcast4, SHMEM_BROADCAST4, (FORTRAN_POINTER_T target, FORTRAN
|
|||||||
PN (void, shmem_broadcast8, SHMEM_BROADCAST8, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_root, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
PN (void, shmem_broadcast8, SHMEM_BROADCAST8, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_root, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||||
PN (void, shmem_broadcast32, SHMEM_BROADCAST32, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_root, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
PN (void, shmem_broadcast32, SHMEM_BROADCAST32, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_root, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||||
PN (void, shmem_broadcast64, SHMEM_BROADCAST64, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_root, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
PN (void, shmem_broadcast64, SHMEM_BROADCAST64, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_root, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||||
|
PN (void, shmem_alltoall32, SHMEM_ALLTOALL32, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||||
|
PN (void, shmem_alltoall64, SHMEM_ALLTOALL64, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||||
|
PN (void, shmem_alltoalls32, SHMEM_ALLTOALLS32, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *dst, MPI_Fint *sst, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||||
|
PN (void, shmem_alltoalls64, SHMEM_ALLTOALLS64, (FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *dst, MPI_Fint *sst, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync));
|
||||||
PN (void, shmem_set_lock, SHMEM_SET_LOCK, (FORTRAN_POINTER_T lock));
|
PN (void, shmem_set_lock, SHMEM_SET_LOCK, (FORTRAN_POINTER_T lock));
|
||||||
PN (void, shmem_clear_lock, SHMEM_CLEAR_LOCK, (FORTRAN_POINTER_T lock));
|
PN (void, shmem_clear_lock, SHMEM_CLEAR_LOCK, (FORTRAN_POINTER_T lock));
|
||||||
PN (MPI_Fint, shmem_test_lock, SHMEM_TEST_LOCK, (FORTRAN_POINTER_T lock));
|
PN (MPI_Fint, shmem_test_lock, SHMEM_TEST_LOCK, (FORTRAN_POINTER_T lock));
|
||||||
|
191
oshmem/shmem/fortran/shmem_alltoall_f.c
Обычный файл
191
oshmem/shmem/fortran/shmem_alltoall_f.c
Обычный файл
@ -0,0 +1,191 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2013-2016 Mellanox Technologies, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
||||||
|
* $COPYRIGHT$
|
||||||
|
*
|
||||||
|
* Additional copyrights may follow
|
||||||
|
*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "oshmem_config.h"
|
||||||
|
#include "oshmem/shmem/fortran/bindings.h"
|
||||||
|
#include "oshmem/include/shmem.h"
|
||||||
|
#include "oshmem/constants.h"
|
||||||
|
#include "oshmem/mca/scoll/scoll.h"
|
||||||
|
#include "oshmem/proc/proc.h"
|
||||||
|
#include "oshmem/proc/proc_group_cache.h"
|
||||||
|
#include "oshmem/op/op.h"
|
||||||
|
|
||||||
|
#if OSHMEM_PROFILING
|
||||||
|
#include "oshmem/shmem/fortran/profile/pbindings.h"
|
||||||
|
SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_ALLTOALL32, shmem_alltoall32)
|
||||||
|
SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_ALLTOALL64, shmem_alltoall64)
|
||||||
|
SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_ALLTOALLS32, shmem_alltoalls32)
|
||||||
|
SHMEM_GENERATE_WEAK_BINDINGS(SHMEM_ALLTOALLS64, shmem_alltoalls64)
|
||||||
|
#include "oshmem/shmem/fortran/profile/defines.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void,
|
||||||
|
SHMEM_ALLTOALL32,
|
||||||
|
shmem_alltoall32_,
|
||||||
|
shmem_alltoall32__,
|
||||||
|
shmem_alltoall32_f,
|
||||||
|
(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync),
|
||||||
|
(target, source, nlong, PE_start, logPE_stride, PE_size, pSync))
|
||||||
|
|
||||||
|
SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void,
|
||||||
|
SHMEM_ALLTOALL64,
|
||||||
|
shmem_alltoall64_,
|
||||||
|
shmem_alltoall64__,
|
||||||
|
shmem_alltoall64_f,
|
||||||
|
(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync),
|
||||||
|
(target, source, nlong, PE_start, logPE_stride, PE_size, pSync))
|
||||||
|
|
||||||
|
SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void,
|
||||||
|
SHMEM_ALLTOALLS32,
|
||||||
|
shmem_alltoalls32_,
|
||||||
|
shmem_alltoalls32__,
|
||||||
|
shmem_alltoalls32_f,
|
||||||
|
(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *dst, MPI_Fint *sst, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync),
|
||||||
|
(target, source, dst, sst, nlong, PE_start, logPE_stride, PE_size, pSync))
|
||||||
|
|
||||||
|
SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void,
|
||||||
|
SHMEM_ALLTOALLS64,
|
||||||
|
shmem_alltoalls64_,
|
||||||
|
shmem_alltoalls64__,
|
||||||
|
shmem_alltoalls64_f,
|
||||||
|
(FORTRAN_POINTER_T target, FORTRAN_POINTER_T source, MPI_Fint *dst, MPI_Fint *sst, MPI_Fint *nlong, MPI_Fint *PE_start, MPI_Fint * logPE_stride, MPI_Fint *PE_size, FORTRAN_POINTER_T pSync),
|
||||||
|
(target, source, dst, sst, nlong, PE_start, logPE_stride, PE_size, pSync))
|
||||||
|
|
||||||
|
#define SHMEM_ALLTOALL(F_NAME, T_NAME, OSHMEM_GROUP_CACHE_ENABLED) void F_NAME(FORTRAN_POINTER_T target, \
|
||||||
|
FORTRAN_POINTER_T source, \
|
||||||
|
MPI_Fint *nlong,\
|
||||||
|
MPI_Fint *PE_start, \
|
||||||
|
MPI_Fint *logPE_stride, \
|
||||||
|
MPI_Fint *PE_size, \
|
||||||
|
FORTRAN_POINTER_T pSync)\
|
||||||
|
{\
|
||||||
|
int rc = OSHMEM_SUCCESS;\
|
||||||
|
oshmem_group_t* group = NULL;\
|
||||||
|
{\
|
||||||
|
/* Create group basing PE_start, logPE_stride and PE_size */\
|
||||||
|
if (OSHMEM_GROUP_CACHE_ENABLED == 0)\
|
||||||
|
{\
|
||||||
|
group = oshmem_proc_group_create(OMPI_FINT_2_INT(*PE_start), \
|
||||||
|
(1 << OMPI_FINT_2_INT(*logPE_stride)), \
|
||||||
|
OMPI_FINT_2_INT(*PE_size));\
|
||||||
|
if (!group)\
|
||||||
|
rc = OSHMEM_ERROR;\
|
||||||
|
}\
|
||||||
|
else\
|
||||||
|
{\
|
||||||
|
group = find_group_in_cache(OMPI_FINT_2_INT(*PE_start),\
|
||||||
|
OMPI_FINT_2_INT(*logPE_stride),\
|
||||||
|
OMPI_FINT_2_INT(*PE_size));\
|
||||||
|
if (!group)\
|
||||||
|
{\
|
||||||
|
group = oshmem_proc_group_create(OMPI_FINT_2_INT(*PE_start), \
|
||||||
|
(1 << OMPI_FINT_2_INT(*logPE_stride)), \
|
||||||
|
OMPI_FINT_2_INT(*PE_size));\
|
||||||
|
if (!group)\
|
||||||
|
rc = OSHMEM_ERROR;\
|
||||||
|
cache_group(group,OMPI_FINT_2_INT(*PE_start),\
|
||||||
|
OMPI_FINT_2_INT(*logPE_stride),\
|
||||||
|
OMPI_FINT_2_INT(*PE_size));\
|
||||||
|
}\
|
||||||
|
} /* OSHMEM_GROUP_CACHE_ENABLED */\
|
||||||
|
/* Collective operation call */\
|
||||||
|
if ( rc == OSHMEM_SUCCESS )\
|
||||||
|
{\
|
||||||
|
oshmem_op_t* op = T_NAME;\
|
||||||
|
\
|
||||||
|
/* Call collective broadcast operation */\
|
||||||
|
rc = group->g_scoll.scoll_alltoall( group, \
|
||||||
|
FPTR_2_VOID_PTR(target), \
|
||||||
|
FPTR_2_VOID_PTR(source), \
|
||||||
|
1, \
|
||||||
|
1, \
|
||||||
|
OMPI_FINT_2_INT(*nlong), \
|
||||||
|
op->dt_size, \
|
||||||
|
FPTR_2_VOID_PTR(pSync), SCOLL_DEFAULT_ALG );\
|
||||||
|
}\
|
||||||
|
if (OSHMEM_GROUP_CACHE_ENABLED == 0) \
|
||||||
|
{\
|
||||||
|
if ( group )\
|
||||||
|
{\
|
||||||
|
oshmem_proc_group_destroy(group);\
|
||||||
|
}\
|
||||||
|
} /* OSHMEM_GROUP_CACHE_ENABLED */\
|
||||||
|
}\
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SHMEM_ALLTOALLS(F_NAME, T_NAME, OSHMEM_GROUP_CACHE_ENABLED) void F_NAME(FORTRAN_POINTER_T target, \
|
||||||
|
FORTRAN_POINTER_T source, \
|
||||||
|
MPI_Fint *dst,\
|
||||||
|
MPI_Fint *sst,\
|
||||||
|
MPI_Fint *nlong,\
|
||||||
|
MPI_Fint *PE_start, \
|
||||||
|
MPI_Fint *logPE_stride, \
|
||||||
|
MPI_Fint *PE_size, \
|
||||||
|
FORTRAN_POINTER_T pSync)\
|
||||||
|
{\
|
||||||
|
int rc = OSHMEM_SUCCESS;\
|
||||||
|
oshmem_group_t* group = NULL;\
|
||||||
|
{\
|
||||||
|
/* Create group basing PE_start, logPE_stride and PE_size */\
|
||||||
|
if (OSHMEM_GROUP_CACHE_ENABLED == 0)\
|
||||||
|
{\
|
||||||
|
group = oshmem_proc_group_create(OMPI_FINT_2_INT(*PE_start), \
|
||||||
|
(1 << OMPI_FINT_2_INT(*logPE_stride)), \
|
||||||
|
OMPI_FINT_2_INT(*PE_size));\
|
||||||
|
if (!group)\
|
||||||
|
rc = OSHMEM_ERROR;\
|
||||||
|
}\
|
||||||
|
else\
|
||||||
|
{\
|
||||||
|
group = find_group_in_cache(OMPI_FINT_2_INT(*PE_start),\
|
||||||
|
OMPI_FINT_2_INT(*logPE_stride),\
|
||||||
|
OMPI_FINT_2_INT(*PE_size));\
|
||||||
|
if (!group)\
|
||||||
|
{\
|
||||||
|
group = oshmem_proc_group_create(OMPI_FINT_2_INT(*PE_start), \
|
||||||
|
(1 << OMPI_FINT_2_INT(*logPE_stride)), \
|
||||||
|
OMPI_FINT_2_INT(*PE_size));\
|
||||||
|
if (!group)\
|
||||||
|
rc = OSHMEM_ERROR;\
|
||||||
|
cache_group(group,OMPI_FINT_2_INT(*PE_start),\
|
||||||
|
OMPI_FINT_2_INT(*logPE_stride),\
|
||||||
|
OMPI_FINT_2_INT(*PE_size));\
|
||||||
|
}\
|
||||||
|
} /* OSHMEM_GROUP_CACHE_ENABLED */\
|
||||||
|
/* Collective operation call */\
|
||||||
|
if ( rc == OSHMEM_SUCCESS )\
|
||||||
|
{\
|
||||||
|
oshmem_op_t* op = T_NAME;\
|
||||||
|
\
|
||||||
|
/* Call collective broadcast operation */\
|
||||||
|
rc = group->g_scoll.scoll_alltoall( group, \
|
||||||
|
FPTR_2_VOID_PTR(target), \
|
||||||
|
FPTR_2_VOID_PTR(source), \
|
||||||
|
OMPI_FINT_2_INT(*dst), \
|
||||||
|
OMPI_FINT_2_INT(*sst), \
|
||||||
|
OMPI_FINT_2_INT(*nlong), \
|
||||||
|
op->dt_size, \
|
||||||
|
FPTR_2_VOID_PTR(pSync), SCOLL_DEFAULT_ALG );\
|
||||||
|
}\
|
||||||
|
if (OSHMEM_GROUP_CACHE_ENABLED == 0) \
|
||||||
|
{\
|
||||||
|
if ( group )\
|
||||||
|
{\
|
||||||
|
oshmem_proc_group_destroy(group);\
|
||||||
|
}\
|
||||||
|
} /* OSHMEM_GROUP_CACHE_ENABLED */\
|
||||||
|
}\
|
||||||
|
}
|
||||||
|
|
||||||
|
SHMEM_ALLTOALL(shmem_alltoall32_f, oshmem_op_prod_fint4, OSHMEM_GROUP_CACHE_ENABLED)
|
||||||
|
SHMEM_ALLTOALL(shmem_alltoall64_f, oshmem_op_prod_fint8, OSHMEM_GROUP_CACHE_ENABLED)
|
||||||
|
SHMEM_ALLTOALLS(shmem_alltoalls32_f, oshmem_op_prod_fint4, OSHMEM_GROUP_CACHE_ENABLED)
|
||||||
|
SHMEM_ALLTOALLS(shmem_alltoalls64_f, oshmem_op_prod_fint8, OSHMEM_GROUP_CACHE_ENABLED)
|
@ -132,6 +132,10 @@ shmem_api_man_pages = \
|
|||||||
shmem/man/man3/shmem_quiet.3 \
|
shmem/man/man3/shmem_quiet.3 \
|
||||||
shmem/man/man3/shmem_broadcast32.3 \
|
shmem/man/man3/shmem_broadcast32.3 \
|
||||||
shmem/man/man3/shmem_broadcast64.3 \
|
shmem/man/man3/shmem_broadcast64.3 \
|
||||||
|
shmem/man/man3/shmem_alltoall32.3 \
|
||||||
|
shmem/man/man3/shmem_alltoall64.3 \
|
||||||
|
shmem/man/man3/shmem_alltoalls32.3 \
|
||||||
|
shmem/man/man3/shmem_alltoalls64.3 \
|
||||||
shmem/man/man3/shmem_collect32.3 \
|
shmem/man/man3/shmem_collect32.3 \
|
||||||
shmem/man/man3/shmem_collect64.3 \
|
shmem/man/man3/shmem_collect64.3 \
|
||||||
shmem/man/man3/shmem_fcollect32.3 \
|
shmem/man/man3/shmem_fcollect32.3 \
|
||||||
|
226
oshmem/shmem/man/man3/shmem_alltoall32.3in
Обычный файл
226
oshmem/shmem/man/man3/shmem_alltoall32.3in
Обычный файл
@ -0,0 +1,226 @@
|
|||||||
|
.\" -*- nroff -*-
|
||||||
|
.\" Copyright (c) 2016 Mellanox Technologies, Inc.
|
||||||
|
.\" $COPYRIGHT$
|
||||||
|
.de Vb
|
||||||
|
.ft CW
|
||||||
|
.nf
|
||||||
|
..
|
||||||
|
.de Ve
|
||||||
|
.ft R
|
||||||
|
|
||||||
|
.fi
|
||||||
|
..
|
||||||
|
.TH "SHMEM\\_ALLTOALL" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
|
||||||
|
.SH NAME
|
||||||
|
|
||||||
|
\fIshmem_alltoall32\fP(3),
|
||||||
|
\fIshmem_alltoall64\fP(3),
|
||||||
|
\fIshmem_alltoalls32\fP(3),
|
||||||
|
\fIshmem_alltoalls64\fP(3)
|
||||||
|
\- collective routine where each PE exchanges a fixed amount of data with all
|
||||||
|
other PEs in the Active set
|
||||||
|
.SH SYNOPSIS
|
||||||
|
|
||||||
|
C or C++:
|
||||||
|
.Vb
|
||||||
|
#include <mpp/shmem.h>
|
||||||
|
|
||||||
|
void shmem_alltoall32(void *target, const void *source,
|
||||||
|
size_t nelems, int PE_start, int logPE_stride,
|
||||||
|
int PE_size, long *pSync);
|
||||||
|
|
||||||
|
void shmem_alltoall64(void *target, const void *source,
|
||||||
|
size_t nelems, int PE_start, int logPE_stride,
|
||||||
|
int PE_size, long *pSync);
|
||||||
|
|
||||||
|
void shmem_alltoalls32(void *target, const void *source,
|
||||||
|
ptrdiff_t dst, ptrdiff_t sst,
|
||||||
|
size_t nelems, int PE_start, int logPE_stride,
|
||||||
|
int PE_size, long *pSync);
|
||||||
|
|
||||||
|
void shmem_alltoalls64(void *target, const void *source,
|
||||||
|
ptrdiff_t dst, ptrdiff_t sst,
|
||||||
|
size_t nelems, int PE_start, int logPE_stride,
|
||||||
|
int PE_size, long *pSync);
|
||||||
|
.Ve
|
||||||
|
Fortran:
|
||||||
|
.Vb
|
||||||
|
INCLUDE "mpp/shmem.fh"
|
||||||
|
|
||||||
|
INTEGER dst, sst, nelems, PE_root, PE_start, logPE_stride, PE_size
|
||||||
|
INTEGER pSync(SHMEM_ALLTOALL_SYNC_SIZE)
|
||||||
|
|
||||||
|
CALL SHMEM_ALLTOALL32(target, source, nelems,
|
||||||
|
& PE_start, logPE_stride, PE_size, fIpSync)
|
||||||
|
|
||||||
|
CALL SHMEM_ALLTOALL64(target, source, nelems,
|
||||||
|
& PE_start, logPE_stride, PE_size, pSync)
|
||||||
|
|
||||||
|
CALL SHMEM_ALLTOALLS32(target, source, dst, sst, nelems,
|
||||||
|
& PE_start, logPE_stride, PE_size, pSync)
|
||||||
|
|
||||||
|
CALL SHMEM_ALLTOALLS64(target, source, dst, sst, nelems,
|
||||||
|
& PE_start, logPE_stride, PE_size, pSync)
|
||||||
|
.Ve
|
||||||
|
.SH DESCRIPTION
|
||||||
|
|
||||||
|
.PP
|
||||||
|
The shmem_alltoalls routines are collective routines. Each PE in the Active set exchanges nelems strided
|
||||||
|
data elements of size 32 bits (for shmem_alltoalls32) or 64 bits (for shmem_alltoalls64) with all other PEs
|
||||||
|
in the set. Both strides, dst and sst, must be greater than or equal to 1. The sst*jth block sent from PE i to
|
||||||
|
PE j is placed in the dst*ith block of the dest data object on PE j.
|
||||||
|
As with all OpenSHMEM collective routines, these routines assume that only PEs in the Active set call the
|
||||||
|
routine. If a PE not in the Active set calls an OpenSHMEM collective routine, undefined behavior results.
|
||||||
|
The values of arguments dst, sst, nelems, PE_start, logPE_stride, and PE_size must be equal on all PEs in
|
||||||
|
the Active set. The same dest and source data objects, and the same pSync work array must be passed to all
|
||||||
|
PEs in the Active set.
|
||||||
|
Before any PE calls to a shmem_alltoalls routine, the following conditions must exist (synchronization via
|
||||||
|
a barrier or some other method is often needed to ensure this): The pSync array on all PEs in the Active set
|
||||||
|
is not still in use from a prior call to a shmem_alltoalls routine. The dest data object on all PEs in the
|
||||||
|
Active set is ready to accept the shmem_alltoalls data.
|
||||||
|
Upon return from a shmem_alltoalls routine, the following is true for the local PE: Its dest symmetric data
|
||||||
|
object is completely updated and the data has been copied out of the source data object. The values in the
|
||||||
|
pSync array are restored to the original values.
|
||||||
|
.PP
|
||||||
|
The arguments are as follows:
|
||||||
|
.TP
|
||||||
|
A symmetric data object with one of the following data types:
|
||||||
|
.RS
|
||||||
|
.TP
|
||||||
|
\fBshmem_alltoall32\fP: Any noncharacter type that
|
||||||
|
has an element size of 32 bits. No Fortran derived types or C/C++ structures are allowed.
|
||||||
|
.TP
|
||||||
|
\fBshmem_alltoall64\fP: Any noncharacter type that has an element size
|
||||||
|
of 64 bits. No Fortran derived types or C/C++ structures are allowed.
|
||||||
|
.RE
|
||||||
|
.RS
|
||||||
|
.PP
|
||||||
|
.RE
|
||||||
|
target
|
||||||
|
A symmetric data object large enough to receive the combined total of
|
||||||
|
nelems elements from each PE in the Active set.
|
||||||
|
.TP
|
||||||
|
source
|
||||||
|
A symmetric data object that contains nelems elements of data for each
|
||||||
|
PE in the Active set, ordered according to destination PE.
|
||||||
|
.TP
|
||||||
|
dst
|
||||||
|
The stride between consecutive elements of the dest data object. The
|
||||||
|
stride is scaled by the element size. A value of 1 indicates contiguous
|
||||||
|
data. dst must be of type ptrdiff_t. If you are using Fortran, it must be
|
||||||
|
a default integer value.
|
||||||
|
.TP
|
||||||
|
sst
|
||||||
|
The stride between consecutive elements of the source data object. The
|
||||||
|
stride is scaled by the element size. A value of 1 indicates contiguous
|
||||||
|
data. sst must be of type ptrdiff_t. If you are using Fortran, it must be a
|
||||||
|
default integer value.
|
||||||
|
.TP
|
||||||
|
nelems
|
||||||
|
The number of elements to exchange for each PE. nelems must be of
|
||||||
|
type size_t for C/C++. If you are using Fortran, it must be a default
|
||||||
|
integer value
|
||||||
|
.TP
|
||||||
|
PE_start
|
||||||
|
The lowest virtual PE number of the active set of PEs. PE_start must be of
|
||||||
|
type integer. If you are using Fortran, it must be a default integer value.
|
||||||
|
.TP
|
||||||
|
logPE_stride
|
||||||
|
The log (base 2) of the stride between consecutive virtual PE numbers in
|
||||||
|
the active set. log_PE_stride must be of type integer. If you are using Fortran, it must be a
|
||||||
|
default integer value.
|
||||||
|
.TP
|
||||||
|
PE_size
|
||||||
|
The number of PEs in the active set. PE_size must be of type integer. If you
|
||||||
|
are using Fortran, it must be a default integer value.
|
||||||
|
.PP
|
||||||
|
.TP
|
||||||
|
pSync
|
||||||
|
A symmetric work array. In C/C++, pSync must be of type long and size
|
||||||
|
_SHMEM_ALLTOALL_SYNC_SIZE.
|
||||||
|
In Fortran, pSync must be of type integer and size SHMEM_ALLTOALL_SYNC_SIZE. Every
|
||||||
|
element of this array must be initialized with the value _SHMEM_SYNC_VALUE (in C/C++)
|
||||||
|
or SHMEM_SYNC_VALUE (in Fortran) before any of the PEs in the active set enter
|
||||||
|
shmem_barrier().
|
||||||
|
.PP
|
||||||
|
Upon return from a shmem_alltoalls routine, the following is true for the local PE: Its dest
|
||||||
|
symmetric data object is completely updated and the data has been copied out of the source
|
||||||
|
data object. The values in the pSync array are restored to the original values.
|
||||||
|
.PP
|
||||||
|
The values of arguments PE_root, PE_start, logPE_stride, and PE_size must be equal on
|
||||||
|
all PEs in the active set. The same target and source data objects and the same pSync work
|
||||||
|
array must be passed to all PEs in the active set.
|
||||||
|
.PP
|
||||||
|
Before any PE calls a alltoall routine, you must ensure that the following conditions exist
|
||||||
|
(synchronization via a barrier or some other method is often needed to ensure this): The
|
||||||
|
pSync array on all PEs in the active set is not still in use from a prior call to a alltoall
|
||||||
|
routine. The target array on all PEs in the active set is ready to accept the alltoall data.
|
||||||
|
.SH NOTES
|
||||||
|
|
||||||
|
The terms collective and symmetric are defined in \fIintro_shmem\fP(3)\&.
|
||||||
|
.PP
|
||||||
|
All SHMEM alltoall routines restore pSync to its original contents. Multiple calls to SHMEM
|
||||||
|
routines that use the same pSync array do not require that pSync be reinitialized after the
|
||||||
|
first call.
|
||||||
|
.PP
|
||||||
|
You must ensure the that the pSync array is not being updated by any PE in the active set
|
||||||
|
while any of the PEs participates in processing of a SHMEM broadcast routine. Be careful to
|
||||||
|
avoid these situations: If the pSync array is initialized at run time, some type of
|
||||||
|
synchronization is needed to ensure that all PEs in the working set have initialized pSync
|
||||||
|
before any of them enter a SHMEM routine called with the pSync synchronization array. A
|
||||||
|
pSync array may be reused on a subsequent SHMEM broadcast routine only if none of the PEs
|
||||||
|
in the active set are still processing a prior SHMEM alltoall routine call that used the same
|
||||||
|
pSync array. In general, this can be ensured only by doing some type of synchronization.
|
||||||
|
However, in the special case of SHMEM routines being called with the same active set, you
|
||||||
|
can allocate two pSync arrays and alternate between them on successive calls.
|
||||||
|
.PP
|
||||||
|
.SH EXAMPLES
|
||||||
|
|
||||||
|
.PP
|
||||||
|
C/C++ example:
|
||||||
|
.Vb
|
||||||
|
#include <shmem.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
long pSync[SHMEM_ALLTOALL_SYNC_SIZE];
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
int64_t *source, *dest;
|
||||||
|
int i, count, pe;
|
||||||
|
shmem_init();
|
||||||
|
count = 2;
|
||||||
|
dest = (int64_t*) shmem_malloc(count * shmem_n_pes() * sizeof(int64_t));
|
||||||
|
source = (int64_t*) shmem_malloc(count * shmem_n_pes() * sizeof(int64_t));
|
||||||
|
/* assign source values */
|
||||||
|
for (pe=0; pe <shmem_n_pes(); pe++){
|
||||||
|
for (i=0; i<count; i++){
|
||||||
|
source[(pe*count)+i] = shmem_my_pe() + pe;
|
||||||
|
dest[(pe*count)+i] = 9999;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (i=0; i< SHMEM_ALLTOALLS_SYNC_SIZE; i++) {
|
||||||
|
pSync[i] = SHMEM_SYNC_VALUE;
|
||||||
|
}
|
||||||
|
/* wait for all PEs to initialize pSync */
|
||||||
|
shmem_barrier_all();
|
||||||
|
/* alltoalls on all PES */
|
||||||
|
shmem_alltoalls64(dest, source, 1, 1, count, 0, 0, shmem_n_pes(), pSync);
|
||||||
|
/* verify results */
|
||||||
|
for (pe=0; pe<shmem_n_pes(); pe++) {
|
||||||
|
for (i=0; i<count; i++){
|
||||||
|
if (dest[(pe*count)+i] != shmem_my_pe() + pe) {
|
||||||
|
printf("[%d] ERROR: dest[%d]=%ld, should be %d\n",
|
||||||
|
shmem_my_pe(),(pe*count)+i,dest[(pe*count)+i],
|
||||||
|
shmem_n_pes() + pe);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
shmem_barrier_all();
|
||||||
|
shmem_free(dest);
|
||||||
|
shmem_free(source);
|
||||||
|
shmem_finalize();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
.PP
|
||||||
|
.SH SEE ALSO
|
||||||
|
|
||||||
|
\fIintro_shmem\fP(3)
|
1
oshmem/shmem/man/man3/shmem_alltoall64.3in
Обычный файл
1
oshmem/shmem/man/man3/shmem_alltoall64.3in
Обычный файл
@ -0,0 +1 @@
|
|||||||
|
.so man3/shmem_alltoall32.3
|
1
oshmem/shmem/man/man3/shmem_alltoalls32.3in
Обычный файл
1
oshmem/shmem/man/man3/shmem_alltoalls32.3in
Обычный файл
@ -0,0 +1 @@
|
|||||||
|
.so man3/shmem_alltoall32.3
|
1
oshmem/shmem/man/man3/shmem_alltoalls64.3in
Обычный файл
1
oshmem/shmem/man/man3/shmem_alltoalls64.3in
Обычный файл
@ -0,0 +1 @@
|
|||||||
|
.so man3/shmem_alltoall32.3
|
Загрузка…
Ссылка в новой задаче
Block a user