oshmem: Align OSHMEM API with spec v1.3 (update scoll/basic)
Этот коммит содержится в:
родитель
9825157fc4
Коммит
1bed5d8aee
@ -193,24 +193,6 @@ OSHMEM_DECLSPEC void shmem_iput32(void* target, const void* source, ptrdiff_t ts
|
||||
OSHMEM_DECLSPEC void shmem_iput64(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_iput128(void* target, const void* source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
|
||||
|
||||
/*
|
||||
* Nonblocking put routines
|
||||
*/
|
||||
OSHMEM_DECLSPEC void shmem_putmem_nbi(void *target, const void *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_char_put_nbi(char *target, const char *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_short_put_nbi(short *target, const short *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_int_put_nbi(int* target, const int* source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_long_put_nbi(long *target, const long *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_longlong_put_nbi(long long *target, const long long *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_float_put_nbi(float *target, const float *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_double_put_nbi(double *target, const double *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_longdouble_put_nbi(long double *target, const long double *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_put8_nbi(void *target, const void *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_put16_nbi(void *target, const void *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_put32_nbi(void *target, const void *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_put64_nbi(void *target, const void *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_put128_nbi(void *target, const void *source, size_t len, int pe);
|
||||
|
||||
/*
|
||||
* Elemental get routines
|
||||
*/
|
||||
@ -346,6 +328,11 @@ OSHMEM_DECLSPEC void shmem_collect32(void *target, const void *source, size_t nl
|
||||
OSHMEM_DECLSPEC void shmem_collect64(void *target, const void *source, size_t nlong, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_fcollect32(void *target, const void *source, size_t nlong, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_fcollect64(void *target, const void *source, size_t nlong, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_alltoall32(void *target, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_alltoall64(void *target, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_alltoalls32(void *target, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
OSHMEM_DECLSPEC void shmem_alltoalls64(void *target, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync);
|
||||
|
||||
|
||||
/*
|
||||
* Reduction routines
|
||||
|
@ -81,7 +81,8 @@ int mca_scoll_basic_alltoall(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
ptrdiff_t dst, ptrdiff_t sst,
|
||||
size_t nlong,
|
||||
size_t nelems,
|
||||
size_t element_size,
|
||||
long *pSync,
|
||||
int alg);
|
||||
|
||||
|
118
oshmem/mca/scoll/basic/scoll_basic_alltoall.c
Обычный файл
118
oshmem/mca/scoll/basic/scoll_basic_alltoall.c
Обычный файл
@ -0,0 +1,118 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/op/op.h"
|
||||
#include "oshmem/mca/spml/spml.h"
|
||||
#include "oshmem/mca/scoll/scoll.h"
|
||||
#include "oshmem/mca/scoll/base/base.h"
|
||||
#include "scoll_basic.h"
|
||||
|
||||
static int _algorithm_simple(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
ptrdiff_t dst, ptrdiff_t sst,
|
||||
size_t nelems,
|
||||
size_t element_size,
|
||||
long *pSync);
|
||||
|
||||
int mca_scoll_basic_alltoall(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
ptrdiff_t dst, ptrdiff_t sst,
|
||||
size_t nelems,
|
||||
size_t element_size,
|
||||
long *pSync,
|
||||
int alg)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
|
||||
/* Arguments validation */
|
||||
if (!group) {
|
||||
SCOLL_ERROR("Active set (group) of PE is not defined");
|
||||
rc = OSHMEM_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* Check if this PE is part of the group */
|
||||
if ((rc == OSHMEM_SUCCESS) && oshmem_proc_group_is_member(group)) {
|
||||
int i = 0;
|
||||
|
||||
if (pSync) {
|
||||
rc = _algorithm_simple(group,
|
||||
target,
|
||||
source,
|
||||
dst,
|
||||
sst,
|
||||
nelems,
|
||||
element_size,
|
||||
pSync);
|
||||
} else {
|
||||
SCOLL_ERROR("Incorrect argument pSync");
|
||||
rc = OSHMEM_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* Restore initial values */
|
||||
SCOLL_VERBOSE(12,
|
||||
"PE#%d Restore special synchronization array",
|
||||
group->my_pe);
|
||||
for (i = 0; pSync && (i < _SHMEM_ALLTOALL_SYNC_SIZE); i++) {
|
||||
pSync[i] = _SHMEM_SYNC_VALUE;
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int _algorithm_simple(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
ptrdiff_t tst, ptrdiff_t sst,
|
||||
size_t nelems,
|
||||
size_t element_size,
|
||||
long *pSync)
|
||||
{
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
int pe_cur;
|
||||
int i;
|
||||
int j;
|
||||
int k;
|
||||
|
||||
SCOLL_VERBOSE(14,
|
||||
"[#%d] send data to all PE in the group",
|
||||
group->my_pe);
|
||||
j = oshmem_proc_group_find_id(group, group->my_pe);
|
||||
for (i = 0; i < group->proc_count; i++) {
|
||||
/* index permutation for better distribution of traffic */
|
||||
k = (((j)+(i))%(group->proc_count));
|
||||
pe_cur = oshmem_proc_pe(group->proc_array[k]);
|
||||
rc = MCA_SPML_CALL(put(
|
||||
(void *)((char *)target + j * tst * nelems * element_size),
|
||||
nelems * element_size,
|
||||
(void *)((char *)source + i * sst * nelems * element_size),
|
||||
pe_cur));
|
||||
if (OSHMEM_SUCCESS != rc) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Wait for operation completion */
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
SCOLL_VERBOSE(14, "[#%d] Wait for operation completion", group->my_pe);
|
||||
rc = BARRIER_FUNC(group,
|
||||
(pSync + 1),
|
||||
SCOLL_DEFAULT_ALG);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
@ -142,7 +142,8 @@ typedef int (*mca_scoll_base_module_alltoall_fn_t)(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
ptrdiff_t dst, ptrdiff_t sst,
|
||||
size_t nlong,
|
||||
size_t nelems,
|
||||
size_t element_size,
|
||||
long *pSync,
|
||||
int alg);
|
||||
|
||||
|
@ -44,7 +44,7 @@ static void _shmem_alltoall(void *target,
|
||||
RUNTIME_CHECK_ADDR(target); \
|
||||
RUNTIME_CHECK_ADDR(source); \
|
||||
\
|
||||
_shmem_alltoall(target, source, 1, 1, nelems * element_size, \
|
||||
_shmem_alltoall(target, source, 1, 1, nelems, element_size, \
|
||||
PE_start, logPE_stride, PE_size, \
|
||||
pSync); \
|
||||
}
|
||||
@ -63,7 +63,7 @@ static void _shmem_alltoall(void *target,
|
||||
RUNTIME_CHECK_ADDR(target); \
|
||||
RUNTIME_CHECK_ADDR(source); \
|
||||
\
|
||||
_shmem_alltoall(target, source, dst, sst, nelems * element_size, \
|
||||
_shmem_alltoall(target, source, dst, sst, nelems, element_size, \
|
||||
PE_start, logPE_stride, PE_size, \
|
||||
pSync); \
|
||||
}
|
||||
|
@ -169,8 +169,8 @@ SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void,
|
||||
rc = group->g_scoll.scoll_alltoall( group, \
|
||||
FPTR_2_VOID_PTR(target), \
|
||||
FPTR_2_VOID_PTR(source), \
|
||||
OMPI_FINT_2_INT(*dst), \
|
||||
OMPI_FINT_2_INT(*sst), \
|
||||
OMPI_FINT_2_INT(*dst), \
|
||||
OMPI_FINT_2_INT(*sst), \
|
||||
OMPI_FINT_2_INT(*nlong), \
|
||||
op->dt_size, \
|
||||
FPTR_2_VOID_PTR(pSync), SCOLL_DEFAULT_ALG );\
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user