From 9825157fc424d7872953d1acfbbf0d052537d0c3 Mon Sep 17 00:00:00 2001 From: Igor Ivanov Date: Fri, 18 Mar 2016 10:12:37 +0200 Subject: [PATCH] oshmem: Align OSHMEM API with spec v1.3 (Add man for alltoall) --- oshmem/shmem/man/man3/Makefile.extra | 4 + oshmem/shmem/man/man3/shmem_alltoall32.3in | 226 ++++++++++++++++++++ oshmem/shmem/man/man3/shmem_alltoall64.3in | 1 + oshmem/shmem/man/man3/shmem_alltoalls32.3in | 1 + oshmem/shmem/man/man3/shmem_alltoalls64.3in | 1 + 5 files changed, 233 insertions(+) create mode 100644 oshmem/shmem/man/man3/shmem_alltoall32.3in create mode 100644 oshmem/shmem/man/man3/shmem_alltoall64.3in create mode 100644 oshmem/shmem/man/man3/shmem_alltoalls32.3in create mode 100644 oshmem/shmem/man/man3/shmem_alltoalls64.3in diff --git a/oshmem/shmem/man/man3/Makefile.extra b/oshmem/shmem/man/man3/Makefile.extra index fbb23826d7..b73d9ce7ee 100644 --- a/oshmem/shmem/man/man3/Makefile.extra +++ b/oshmem/shmem/man/man3/Makefile.extra @@ -132,6 +132,10 @@ shmem_api_man_pages = \ shmem/man/man3/shmem_quiet.3 \ shmem/man/man3/shmem_broadcast32.3 \ shmem/man/man3/shmem_broadcast64.3 \ + shmem/man/man3/shmem_alltoall32.3 \ + shmem/man/man3/shmem_alltoall64.3 \ + shmem/man/man3/shmem_alltoalls32.3 \ + shmem/man/man3/shmem_alltoalls64.3 \ shmem/man/man3/shmem_collect32.3 \ shmem/man/man3/shmem_collect64.3 \ shmem/man/man3/shmem_fcollect32.3 \ diff --git a/oshmem/shmem/man/man3/shmem_alltoall32.3in b/oshmem/shmem/man/man3/shmem_alltoall32.3in new file mode 100644 index 0000000000..c2f08d6584 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_alltoall32.3in @@ -0,0 +1,226 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2016 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_ALLTOALL" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_alltoall32\fP(3), +\fIshmem_alltoall64\fP(3), +\fIshmem_alltoalls32\fP(3), +\fIshmem_alltoalls64\fP(3) +\- collective routine where each PE exchanges a fixed amount of data with all +other PEs in the Active set +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void shmem_alltoall32(void *target, const void *source, + size_t nelems, int PE_start, int logPE_stride, + int PE_size, long *pSync); + +void shmem_alltoall64(void *target, const void *source, + size_t nelems, int PE_start, int logPE_stride, + int PE_size, long *pSync); + +void shmem_alltoalls32(void *target, const void *source, + ptrdiff_t dst, ptrdiff_t sst, + size_t nelems, int PE_start, int logPE_stride, + int PE_size, long *pSync); + +void shmem_alltoalls64(void *target, const void *source, + ptrdiff_t dst, ptrdiff_t sst, + size_t nelems, int PE_start, int logPE_stride, + int PE_size, long *pSync); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER dst, sst, nelems, PE_root, PE_start, logPE_stride, PE_size +INTEGER pSync(SHMEM_ALLTOALL_SYNC_SIZE) + +CALL SHMEM_ALLTOALL32(target, source, nelems, +& PE_start, logPE_stride, PE_size, fIpSync) + +CALL SHMEM_ALLTOALL64(target, source, nelems, +& PE_start, logPE_stride, PE_size, pSync) + +CALL SHMEM_ALLTOALLS32(target, source, dst, sst, nelems, +& PE_start, logPE_stride, PE_size, pSync) + +CALL SHMEM_ALLTOALLS64(target, source, dst, sst, nelems, +& PE_start, logPE_stride, PE_size, pSync) +.Ve +.SH DESCRIPTION + +.PP +The shmem_alltoalls routines are collective routines. Each PE in the Active set exchanges nelems strided +data elements of size 32 bits (for shmem_alltoalls32) or 64 bits (for shmem_alltoalls64) with all other PEs +in the set. Both strides, dst and sst, must be greater than or equal to 1. The sst*jth block sent from PE i to +PE j is placed in the dst*ith block of the dest data object on PE j. +As with all OpenSHMEM collective routines, these routines assume that only PEs in the Active set call the +routine. If a PE not in the Active set calls an OpenSHMEM collective routine, undefined behavior results. +The values of arguments dst, sst, nelems, PE_start, logPE_stride, and PE_size must be equal on all PEs in +the Active set. The same dest and source data objects, and the same pSync work array must be passed to all +PEs in the Active set. +Before any PE calls to a shmem_alltoalls routine, the following conditions must exist (synchronization via +a barrier or some other method is often needed to ensure this): The pSync array on all PEs in the Active set +is not still in use from a prior call to a shmem_alltoalls routine. The dest data object on all PEs in the +Active set is ready to accept the shmem_alltoalls data. +Upon return from a shmem_alltoalls routine, the following is true for the local PE: Its dest symmetric data +object is completely updated and the data has been copied out of the source data object. The values in the +pSync array are restored to the original values. +.PP +The arguments are as follows: +.TP +A symmetric data object with one of the following data types: +.RS +.TP +\fBshmem_alltoall32\fP: Any noncharacter type that +has an element size of 32 bits. No Fortran derived types or C/C++ structures are allowed. +.TP +\fBshmem_alltoall64\fP: Any noncharacter type that has an element size +of 64 bits. No Fortran derived types or C/C++ structures are allowed. +.RE +.RS +.PP +.RE +target +A symmetric data object large enough to receive the combined total of +nelems elements from each PE in the Active set. +.TP +source +A symmetric data object that contains nelems elements of data for each +PE in the Active set, ordered according to destination PE. +.TP +dst +The stride between consecutive elements of the dest data object. The +stride is scaled by the element size. A value of 1 indicates contiguous +data. dst must be of type ptrdiff_t. If you are using Fortran, it must be +a default integer value. +.TP +sst +The stride between consecutive elements of the source data object. The +stride is scaled by the element size. A value of 1 indicates contiguous +data. sst must be of type ptrdiff_t. If you are using Fortran, it must be a +default integer value. +.TP +nelems +The number of elements to exchange for each PE. nelems must be of +type size_t for C/C++. If you are using Fortran, it must be a default +integer value +.TP +PE_start +The lowest virtual PE number of the active set of PEs. PE_start must be of +type integer. If you are using Fortran, it must be a default integer value. +.TP +logPE_stride +The log (base 2) of the stride between consecutive virtual PE numbers in +the active set. log_PE_stride must be of type integer. If you are using Fortran, it must be a +default integer value. +.TP +PE_size +The number of PEs in the active set. PE_size must be of type integer. If you +are using Fortran, it must be a default integer value. +.PP +.TP +pSync +A symmetric work array. In C/C++, pSync must be of type long and size +_SHMEM_ALLTOALL_SYNC_SIZE. +In Fortran, pSync must be of type integer and size SHMEM_ALLTOALL_SYNC_SIZE. Every +element of this array must be initialized with the value _SHMEM_SYNC_VALUE (in C/C++) +or SHMEM_SYNC_VALUE (in Fortran) before any of the PEs in the active set enter +shmem_barrier(). +.PP +Upon return from a shmem_alltoalls routine, the following is true for the local PE: Its dest +symmetric data object is completely updated and the data has been copied out of the source +data object. The values in the pSync array are restored to the original values. +.PP +The values of arguments PE_root, PE_start, logPE_stride, and PE_size must be equal on +all PEs in the active set. The same target and source data objects and the same pSync work +array must be passed to all PEs in the active set. +.PP +Before any PE calls a alltoall routine, you must ensure that the following conditions exist +(synchronization via a barrier or some other method is often needed to ensure this): The +pSync array on all PEs in the active set is not still in use from a prior call to a alltoall +routine. The target array on all PEs in the active set is ready to accept the alltoall data. +.SH NOTES + +The terms collective and symmetric are defined in \fIintro_shmem\fP(3)\&. +.PP +All SHMEM alltoall routines restore pSync to its original contents. Multiple calls to SHMEM +routines that use the same pSync array do not require that pSync be reinitialized after the +first call. +.PP +You must ensure the that the pSync array is not being updated by any PE in the active set +while any of the PEs participates in processing of a SHMEM broadcast routine. Be careful to +avoid these situations: If the pSync array is initialized at run time, some type of +synchronization is needed to ensure that all PEs in the working set have initialized pSync +before any of them enter a SHMEM routine called with the pSync synchronization array. A +pSync array may be reused on a subsequent SHMEM broadcast routine only if none of the PEs +in the active set are still processing a prior SHMEM alltoall routine call that used the same +pSync array. In general, this can be ensured only by doing some type of synchronization. +However, in the special case of SHMEM routines being called with the same active set, you +can allocate two pSync arrays and alternate between them on successive calls. +.PP +.SH EXAMPLES + +.PP +C/C++ example: +.Vb +#include +#include +long pSync[SHMEM_ALLTOALL_SYNC_SIZE]; +int main(void) +{ +int64_t *source, *dest; +int i, count, pe; +shmem_init(); +count = 2; +dest = (int64_t*) shmem_malloc(count * shmem_n_pes() * sizeof(int64_t)); +source = (int64_t*) shmem_malloc(count * shmem_n_pes() * sizeof(int64_t)); +/* assign source values */ +for (pe=0; pe