diff --git a/oshmem/Makefile.am b/oshmem/Makefile.am index 4fe3fb418c..ce2ffac368 100644 --- a/oshmem/Makefile.am +++ b/oshmem/Makefile.am @@ -1,5 +1,5 @@ # -# Copyright (c) 2013 Mellanox Technologies, Inc. +# Copyright (c) 2013-2015 Mellanox Technologies, Inc. # All rights reserved. # Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved. @@ -94,6 +94,25 @@ include proc/Makefile.am include request/Makefile.am include runtime/Makefile.am include shmem/Makefile.am +include shmem/man/man3/Makefile.extra include mca/Makefile.am include tools/Makefile.am include util/Makefile.am + +# Ensure that the man page directory exists before we try to make man +# page files (because ompi/mpi/man/man3 has no config.status-generated +# Makefile) +dir_stamp = $(top_builddir)/$(subdir)/shmem/man/man3/.dir-stamp + +# Also ensure that the man pages are rebuilt if the opal_config.h file +# changes (e.g., configure was run again, meaning that the release +# date or version may have changed) +$(nodist_man_MANS): $(dir_stamp) $(top_builddir)/opal/include/opal_config.h + +$(dir_stamp): + $(MKDIR_P) `dirname $@` + touch "$@" + +# Remove the generated man pages +distclean-local: + rm -f $(nodist_man_MANS) $(dir_stamp) diff --git a/oshmem/shmem/Makefile.am b/oshmem/shmem/Makefile.am index b8317e4f7b..14d9e4d9ff 100644 --- a/oshmem/shmem/Makefile.am +++ b/oshmem/shmem/Makefile.am @@ -7,6 +7,8 @@ # $HEADER$ # +EXTRA_DIST = + headers += shmem/shmem_api_logger.h \ shmem/shmem_lock.h diff --git a/oshmem/shmem/man/man3/Makefile.extra b/oshmem/shmem/man/man3/Makefile.extra new file mode 100644 index 0000000000..965bbbb97c --- /dev/null +++ b/oshmem/shmem/man/man3/Makefile.extra @@ -0,0 +1,180 @@ +# -*- makefile -*- +# Copyright (c) 2015 Mellanox Technologies, Inc. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +include $(top_srcdir)/Makefile.ompi-rules + +shmem_api_man_pages = \ + shmem/man/man3/intro_shmem.3 \ + shmem/man/man3/OpenSHMEM.3 \ + shmem/man/man3/start_pes.3 \ + shmem/man/man3/_num_pes.3 \ + shmem/man/man3/_my_pe.3 \ + shmem/man/man3/shmem_pe_accessible.3 \ + shmem/man/man3/shmem_addr_accessible.3 \ + shmem/man/man3/shmalloc.3 \ + shmem/man/man3/shmemalign.3 \ + shmem/man/man3/shrealloc.3 \ + shmem/man/man3/shfree.3 \ + shmem/man/man3/shmem_ptr.3 \ + shmem/man/man3/shmem_char_p.3 \ + shmem/man/man3/shmem_short_p.3 \ + shmem/man/man3/shmem_int_p.3 \ + shmem/man/man3/shmem_long_p.3 \ + shmem/man/man3/shmem_float_p.3 \ + shmem/man/man3/shmem_double_p.3 \ + shmem/man/man3/shmem_longlong_p.3 \ + shmem/man/man3/shmem_longdouble_p.3 \ + shmem/man/man3/shmem_char_put.3 \ + shmem/man/man3/shmem_short_put.3 \ + shmem/man/man3/shmem_int_put.3 \ + shmem/man/man3/shmem_long_put.3 \ + shmem/man/man3/shmem_float_put.3 \ + shmem/man/man3/shmem_double_put.3 \ + shmem/man/man3/shmem_longlong_put.3 \ + shmem/man/man3/shmem_longdouble_put.3 \ + shmem/man/man3/shmem_put32.3 \ + shmem/man/man3/shmem_put64.3 \ + shmem/man/man3/shmem_put128.3 \ + shmem/man/man3/shmem_putmem.3 \ + shmem/man/man3/shmem_short_iput.3 \ + shmem/man/man3/shmem_int_iput.3 \ + shmem/man/man3/shmem_long_iput.3 \ + shmem/man/man3/shmem_float_iput.3 \ + shmem/man/man3/shmem_double_iput.3 \ + shmem/man/man3/shmem_longlong_iput.3 \ + shmem/man/man3/shmem_longdouble_iput.3 \ + shmem/man/man3/shmem_iput32.3 \ + shmem/man/man3/shmem_iput64.3 \ + shmem/man/man3/shmem_iput128.3 \ + shmem/man/man3/shmem_char_g.3 \ + shmem/man/man3/shmem_short_g.3 \ + shmem/man/man3/shmem_int_g.3 \ + shmem/man/man3/shmem_long_g.3 \ + shmem/man/man3/shmem_float_g.3 \ + shmem/man/man3/shmem_double_g.3 \ + shmem/man/man3/shmem_longlong_g.3 \ + shmem/man/man3/shmem_longdouble_g.3 \ + shmem/man/man3/shmem_char_get.3 \ + shmem/man/man3/shmem_short_get.3 \ + shmem/man/man3/shmem_int_get.3 \ + shmem/man/man3/shmem_long_get.3 \ + shmem/man/man3/shmem_float_get.3 \ + shmem/man/man3/shmem_double_get.3 \ + shmem/man/man3/shmem_longlong_get.3 \ + shmem/man/man3/shmem_longdouble_get.3 \ + shmem/man/man3/shmem_get32.3 \ + shmem/man/man3/shmem_get64.3 \ + shmem/man/man3/shmem_get128.3 \ + shmem/man/man3/shmem_getmem.3 \ + shmem/man/man3/shmem_short_iget.3 \ + shmem/man/man3/shmem_int_iget.3 \ + shmem/man/man3/shmem_long_iget.3 \ + shmem/man/man3/shmem_float_iget.3 \ + shmem/man/man3/shmem_double_iget.3 \ + shmem/man/man3/shmem_longlong_iget.3 \ + shmem/man/man3/shmem_longdouble_iget.3 \ + shmem/man/man3/shmem_iget32.3 \ + shmem/man/man3/shmem_iget64.3 \ + shmem/man/man3/shmem_iget128.3 \ + shmem/man/man3/shmem_swap.3 \ + shmem/man/man3/shmem_int_swap.3 \ + shmem/man/man3/shmem_long_swap.3 \ + shmem/man/man3/shmem_longlong_swap.3 \ + shmem/man/man3/shmem_float_swap.3 \ + shmem/man/man3/shmem_double_swap.3 \ + shmem/man/man3/shmem_int_cswap.3 \ + shmem/man/man3/shmem_long_cswap.3 \ + shmem/man/man3/shmem_longlong_cswap.3 \ + shmem/man/man3/shmem_int_fadd.3 \ + shmem/man/man3/shmem_long_fadd.3 \ + shmem/man/man3/shmem_longlong_fadd.3 \ + shmem/man/man3/shmem_int_finc.3 \ + shmem/man/man3/shmem_long_finc.3 \ + shmem/man/man3/shmem_longlong_finc.3 \ + shmem/man/man3/shmem_int_add.3 \ + shmem/man/man3/shmem_long_add.3 \ + shmem/man/man3/shmem_longlong_add.3 \ + shmem/man/man3/shmem_int_inc.3 \ + shmem/man/man3/shmem_long_inc.3 \ + shmem/man/man3/shmem_longlong_inc.3 \ + shmem/man/man3/shmem_set_lock.3 \ + shmem/man/man3/shmem_clear_lock.3 \ + shmem/man/man3/shmem_test_lock.3 \ + shmem/man/man3/shmem_wait.3 \ + shmem/man/man3/shmem_short_wait.3 \ + shmem/man/man3/shmem_int_wait.3 \ + shmem/man/man3/shmem_long_wait.3 \ + shmem/man/man3/shmem_longlong_wait.3 \ + shmem/man/man3/shmem_wait_until.3 \ + shmem/man/man3/shmem_short_wait_until.3 \ + shmem/man/man3/shmem_int_wait_until.3 \ + shmem/man/man3/shmem_long_wait_until.3 \ + shmem/man/man3/shmem_longlong_wait_until.3 \ + shmem/man/man3/shmem_barrier.3 \ + shmem/man/man3/shmem_barrier_all.3 \ + shmem/man/man3/shmem_fence.3 \ + shmem/man/man3/shmem_quiet.3 \ + shmem/man/man3/shmem_broadcast32.3 \ + shmem/man/man3/shmem_broadcast64.3 \ + shmem/man/man3/shmem_collect32.3 \ + shmem/man/man3/shmem_collect64.3 \ + shmem/man/man3/shmem_fcollect32.3 \ + shmem/man/man3/shmem_fcollect64.3 \ + shmem/man/man3/shmem_short_and_to_all.3 \ + shmem/man/man3/shmem_int_and_to_all.3 \ + shmem/man/man3/shmem_long_and_to_all.3 \ + shmem/man/man3/shmem_longlong_and_to_all.3 \ + shmem/man/man3/shmem_short_or_to_all.3 \ + shmem/man/man3/shmem_int_or_to_all.3 \ + shmem/man/man3/shmem_long_or_to_all.3 \ + shmem/man/man3/shmem_longlong_or_to_all.3 \ + shmem/man/man3/shmem_short_xor_to_all.3 \ + shmem/man/man3/shmem_int_xor_to_all.3 \ + shmem/man/man3/shmem_long_xor_to_all.3 \ + shmem/man/man3/shmem_longlong_xor_to_all.3 \ + shmem/man/man3/shmem_short_max_to_all.3 \ + shmem/man/man3/shmem_int_max_to_all.3 \ + shmem/man/man3/shmem_long_max_to_all.3 \ + shmem/man/man3/shmem_longlong_max_to_all.3 \ + shmem/man/man3/shmem_float_max_to_all.3 \ + shmem/man/man3/shmem_double_max_to_all.3 \ + shmem/man/man3/shmem_longdouble_max_to_all.3 \ + shmem/man/man3/shmem_short_min_to_all.3 \ + shmem/man/man3/shmem_int_min_to_all.3 \ + shmem/man/man3/shmem_long_min_to_all.3 \ + shmem/man/man3/shmem_longlong_min_to_all.3 \ + shmem/man/man3/shmem_float_min_to_all.3 \ + shmem/man/man3/shmem_double_min_to_all.3 \ + shmem/man/man3/shmem_longdouble_min_to_all.3 \ + shmem/man/man3/shmem_short_sum_to_all.3 \ + shmem/man/man3/shmem_int_sum_to_all.3 \ + shmem/man/man3/shmem_long_sum_to_all.3 \ + shmem/man/man3/shmem_longlong_sum_to_all.3 \ + shmem/man/man3/shmem_float_sum_to_all.3 \ + shmem/man/man3/shmem_double_sum_to_all.3 \ + shmem/man/man3/shmem_complexf_sum_to_all.3 \ + shmem/man/man3/shmem_complexd_sum_to_all.3 \ + shmem/man/man3/shmem_short_prod_to_all.3 \ + shmem/man/man3/shmem_int_prod_to_all.3 \ + shmem/man/man3/shmem_long_prod_to_all.3 \ + shmem/man/man3/shmem_longlong_prod_to_all.3 \ + shmem/man/man3/shmem_float_prod_to_all.3 \ + shmem/man/man3/shmem_double_prod_to_all.3 \ + shmem/man/man3/shmem_longdouble_prod_to_all.3 \ + shmem/man/man3/shmem_complexf_prod_to_all.3 \ + shmem/man/man3/shmem_complexd_prod_to_all.3 \ + shmem/man/man3/shmem_udcflush.3 \ + shmem/man/man3/shmem_udcflush_line.3 \ + shmem/man/man3/shmem_set_cache_inv.3 \ + shmem/man/man3/shmem_set_cache_line_inv.3 \ + shmem/man/man3/shmem_clear_cache_inv.3 \ + shmem/man/man3/shmem_clear_cache_line_inv.3 + +nodist_man_MANS += $(shmem_api_man_pages) +EXTRA_DIST += $(shmem_api_man_pages:.3=.3in) diff --git a/oshmem/shmem/man/man3/OpenSHMEM.3in b/oshmem/shmem/man/man3/OpenSHMEM.3in new file mode 100644 index 0000000000..97469c502b --- /dev/null +++ b/oshmem/shmem/man/man3/OpenSHMEM.3in @@ -0,0 +1 @@ +.so man3/intro_shmem.3 diff --git a/oshmem/shmem/man/man3/_my_pe.3in b/oshmem/shmem/man/man3/_my_pe.3in new file mode 100644 index 0000000000..ddc43b4ff7 --- /dev/null +++ b/oshmem/shmem/man/man3/_my_pe.3in @@ -0,0 +1,41 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "MY\\_PE" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +my_pe, _my_pe, shmem_my_pe \- Returns the virtual PE number of the calling PE. +.SH SYNOPSIS + +C or C++: +.Vb +#include +int _my_pe (void); +int shmem_my_pe (void); +.Ve +Fortran: +.Vb +include 'mpp/shmem.fh' +I = MY_PE () +I = SHMEM_MY_PE () +.Ve +.SH DESCRIPTION + +my_pe() or shmem_my_pe() return the processing element (PE) number of the calling PE. It accepts no +arguments. The result is an integer between 0 and npes \- 1, where npes is the total +number of PEs executing the current program. +.SH SEE ALSO + +\fIintro_shmem\fP(3), +\fInum_pes\fP(3), +\fIstart_pes\fP(3) diff --git a/oshmem/shmem/man/man3/_num_pes.3in b/oshmem/shmem/man/man3/_num_pes.3in new file mode 100644 index 0000000000..21ab515dfd --- /dev/null +++ b/oshmem/shmem/man/man3/_num_pes.3in @@ -0,0 +1,39 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "NUM\\_PES" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +num_pes, _num_pes, shmem_n_pes \- Returns the number of processing elements (PEs) used to run the application. +.SH SYNOPSIS + +C or C++: +.Vb +#include +int _num_pes (void); +int shmem_n_pes (void); +.Ve +Fortran: +.Vb +include 'mpp/shmem.fh' +I = NUM_PES () +I = SHMEM_N_PES () +.Ve +.SH DESCRIPTION + +num_pes() or shmem_n_pes() return the total number of PEs running in an application. +.SH SEE ALSO + +\fIintro_shmem\fP(3), +\fImy_pe\fP(3), +\fIstart_pes\fP(3) diff --git a/oshmem/shmem/man/man3/intro_shmem.3in b/oshmem/shmem/man/man3/intro_shmem.3in new file mode 100644 index 0000000000..caef30b881 --- /dev/null +++ b/oshmem/shmem/man/man3/intro_shmem.3in @@ -0,0 +1,1312 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "INTRO\\_SHMEM" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +intro_shmem \- Introduction to the OpenSHMEM programming model +.PP +.SH DESCRIPTION + +The SHMEM programming model consists of library routines that provide low\-latency, +high\-bandwidth communication for use in highly parallelized scalable programs. The +routines in the OpenSHMEM application programming interface (API) provide a programming +model for exchanging data between cooperating parallel processes. The resulting programs +are similar in style to Message Passing Interface (MPI) programs. The SHMEM API can +be used either alone or in combination with MPI routines in the same parallel program. +.PP +An OpenSHMEM program is SPMD (single program, multiple data) in style. The SHMEM +processes, called processing elements or PEs, all start at the same time and they all run the +same program. Usually the PEs perform computation on their own subdomains of the larger +problem and periodically communicate with other PEs to exchange information on +which the next computation phase depends. +.PP +The OpenSHMEM routines minimize the overhead associated with data transfer requests, +maximize bandwidth and minimize data latency. Data latency is the period of time that +starts when a PE initiates a transfer of data and ends when a PE can use the data. +OpenSHMEM routines support remote data transfer through put operations, which transfer +data to a different PE, get operations, which transfer data from a different PE, and remote +pointers, which allow direct references to data objects owned by another PE. Other +operations supported are collective broadcast and reduction, barrier synchronization, and +atomic memory operations. An atomic memory operation is an atomic read\-and\-update +operation, such as a fetch\-and\-increment, on a remote or local data object. +.PP +.SH OPENSHMEM ROUTINES + +This section lists the significant OpenSHMEM message\-passing routines. +.TP +PE queries +.PP +.RS +.TP +.B * +C/C++ only: +.RS +.PP +.RS +.RE +.TP +.B * +\fI_num_pes\fP(3) +.TP +.B * +\fI_my_pe\fP(3) +.RE +.RS +.PP +.RE +.TP +.B * +Fortran only: +.RS +.PP +.RS +.RE +.TP +.B * +\fINUM_PES\fP(3) +.TP +.B * +\fIMY_PE\fP(3) +.RE +.RS +.PP +.RE +.RE +.PP +.RE +.TP +Elemental data put routines +.PP +.RS +.TP +.B * +C/C++ only: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_double_p\fP(3) +.TP +.B * +\fIshmem_float_p\fP(3) +.TP +.B * +\fIshmem_int_p\fP(3) +.TP +.B * +\fIshmem_long_p\fP(3) +.TP +.B * +\fIshmem_short_p.\fP(3) +.RE +.RS +.PP +.RE +.RE +.PP +.RE +.TP +Block data put routines +.PP +.RS +.TP +.B * +C/C++ and Fortran: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_put32\fP(3) +.TP +.B * +\fIshmem_put64\fP(3) +.TP +.B * +\fIshmem_put128\fP(3) +.RE +.RS +.PP +.RE +.TP +.B * +C/C++ only: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_double_put\fP(3) +.TP +.B * +\fIshmem_float_put\fP(3) +.TP +.B * +\fIshmem_int_put\fP(3) +.TP +.B * +\fIshmem_long_put\fP(3) +.TP +.B * +\fIshmem_short_put.\fP(3) +.RE +.RS +.PP +.RE +.TP +.B * +Fortran only: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_complex_put\fP(3) +.TP +.B * +\fIshmem_integer_put\fP(3) +.TP +.B * +\fIshmem_logical_put\fP(3) +.TP +.B * +\fIshmem_real_put\fP(3) +.RE +.RS +.PP +.RE +.RE +.PP +.RE +.TP +Elemental data get routines +.PP +.RS +.TP +.B * +C/C++ only: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_double_g\fP(3) +.TP +.B * +\fIshmem_float_g\fP(3) +.TP +.B * +\fIshmem_int_g\fP(3) +.TP +.B * +\fIshmem_long_g\fP(3) +.TP +.B * +\fIshmem_short_g\fP(3) +.RE +.RS +.PP +.RE +.RE +.PP +.RE +.TP +Block data get routines +.RS +.TP +.B * +C/C++ and Fortran: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_get32\fP(3) +.TP +.B * +\fIshmem_get64\fP(3) +.TP +.B * +\fIshmem_get128\fP(3) +.RE +.RS +.PP +.RE +.TP +.B * +C/C++ only: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_double_get\fP(3) +.TP +.B * +\fIshmem_float_get\fP(3) +.TP +.B * +\fIshmem_int_get\fP(3) +.TP +.B * +\fIshmem_long_get\fP(3) +.TP +.B * +\fIshmem_short_get\fP(3) +.RE +.RS +.PP +.RE +.TP +.B * +Fortran only: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_complex_get\fP(3) +.TP +.B * +\fIshmem_integer_get\fP(3) +.TP +.B * +\fIshmem_logical_get\fP(3) +.TP +.B * +\fIshmem_real_get\fP(3) +.RE +.RS +.PP +.RE +.RE +.PP +.RE +.TP +Strided put routines +.RS +.TP +.B * +C/C++ and Fortran: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_iput32\fP(3) +.TP +.B * +\fIshmem_iput64\fP(3) +.TP +.B * +\fIshmem_iput128\fP(3) +.RE +.RS +.PP +.RE +.TP +.B * +C/C++ only: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_double_iput\fP(3) +.TP +.B * +\fIshmem_float_iput\fP(3) +.TP +.B * +\fIshmem_int_iput\fP(3) +.TP +.B * +\fIshmem_long_iput\fP(3) +.TP +.B * +\fIshmem_short_iput\fP(3) +.RE +.RS +.PP +.RE +.TP +.B * +Fortran only: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_complex_iput\fP(3) +.TP +.B * +\fIshmem_integer_iput\fP(3) +.TP +.B * +\fIshmem_logical_iput\fP(3) +.TP +.B * +\fIshmem_real_iput\fP(3) +.RE +.RS +.PP +.RE +.RE +.PP +.RE +.TP +Strided get routines +.PP +.RS +.TP +.B * +C/C++ and Fortran: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_iget32\fP(3) +.TP +.B * +\fIshmem_iget64\fP(3) +.TP +.B * +\fIshmem_iget128\fP(3) +.RE +.RS +.PP +.RE +.TP +.B * +C/C++ only: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_double_iget\fP(3) +.TP +.B * +\fIshmem_float_iget\fP(3) +.TP +.B * +\fIshmem_int_iget\fP(3) +.TP +.B * +\fIshmem_long_iget\fP(3) +.TP +.B * +\fIshmem_short_iget\fP(3) +.RE +.RS +.PP +.RE +.TP +.B * +Fortran only: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_complex_iget\fP(3) +.TP +.B * +\fIshmem_integer_iget\fP(3) +.TP +.B * +\fIshmem_logical_iget\fP(3) +.TP +.B * +\fIshmem_real_iget\fP(3) +.RE +.RS +.PP +.RE +.RE +.PP +.RE +.TP +Point\-to\-point synchronization routines +.RS +.TP +.B * +C/C++ only: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_int_wait\fP(3) +.TP +.B * +\fIshmem_int_wait_until\fP(3) +.TP +.B * +\fIshmem_long_wait\fP(3) +.TP +.B * +\fIshmem_long_wait_until\fP(3) +.TP +.B * +\fIshmem_longlong_wait\fP(3) +.TP +.B * +\fIshmem_longlong_wait_until\fP(3) +.TP +.B * +\fIshmem_short_wait\fP(3) +.TP +.B * +\fIshmem_short_wait_until\fP(3) +.RE +.RS +.PP +.RE +.TP +.B * +Fortran: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_int4_wait\fP(3) +.TP +.B * +\fIshmem_int4_wait_until\fP(3) +.TP +.B * +\fIshmem_int8_wait\fP(3) +.TP +.B * +\fIshmem_int8_wait_until\fP(3) +.RE +.RS +.PP +.RE +.RE +.PP +.RE +.TP +Barrier synchronization routines +.PP +.RS +.TP +.B * +C/C++ and Fortran: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_barrier_all\fP(3) +.TP +.B * +\fIshmem_barrier\fP(3) +.RE +.RS +.PP +.RE +.RE +.PP +.RE +.TP +Atomic memory fetch\-and\-operate (fetch\-op) routines +.RS +.TP +.B * +C/C++ and Fortran: +.RS +.TP +.B * +shmem_swap +.RE +.RS +.PP +.RE +.RE +.PP +.RE +.TP +Reduction routines +.RS +.TP +.B * +C/C++ only: +.RS +.TP +.B * +\fIshmem_int_and_to_all\fP(3) +.TP +.B * +\fIshmem_long_and_to_all\fP(3) +.TP +.B * +\fIshmem_longlong_and_to_all\fP(3) +.TP +.B * +\fIshmem_short_and_to_all\fP(3) +.TP +.B * +\fIshmem_double_max_to_all\fP(3) +.TP +.B * +\fIshmem_float_max_to_all\fP(3) +.TP +.B * +\fIshmem_int_max_to_all\fP(3) +.TP +.B * +\fIshmem_long_max_to_all\fP(3) +.TP +.B * +\fIshmem_longlong_max_to_all\fP(3) +.TP +.B * +\fIshmem_short_max_to_all\fP(3) +.TP +.B * +\fIshmem_double_min_to_all\fP(3) +.TP +.B * +\fIshmem_float_min_to_all\fP(3) +.TP +.B * +\fIshmem_int_min_to_all\fP(3) +.TP +.B * +\fIshmem_long_min_to_all\fP(3) +.TP +.B * +\fIshmem_longlong_min_to_all\fP(3) +.TP +.B * +\fIshmem_short_min_to_all\fP(3) +.TP +.B * +\fIshmem_double_sum_to_all\fP(3) +.TP +.B * +\fIshmem_float_sum_to_all\fP(3) +.TP +.B * +\fIshmem_int_sum_to_all\fP(3) +.TP +.B * +\fIshmem_long_sum_to_all\fP(3) +.TP +.B * +\fIshmem_longlong_sum_to_all\fP(3) +.TP +.B * +\fIshmem_short_sum_to_all\fP(3) +.TP +.B * +\fIshmem_double_prod_to_all\fP(3) +.TP +.B * +\fIshmem_float_prod_to_all\fP(3) +.TP +.B * +\fIshmem_int_prod_to_all\fP(3) +.TP +.B * +\fIshmem_long_prod_to_all\fP(3) +.TP +.B * +\fIshmem_longlong_prod_to_all\fP(3) +.TP +.B * +\fIshmem_short_prod_to_all\fP(3) +.TP +.B * +\fIshmem_int_or_to_all\fP(3) +.TP +.B * +\fIshmem_long_or_to_all\fP(3) +.TP +.B * +\fIshmem_longlong_or_to_all\fP(3) +.TP +.B * +\fIshmem_short_or_to_all\fP(3) +.TP +.B * +\fIshmem_int_xor_to_all\fP(3) +.TP +.B * +\fIshmem_long_xor_to_all\fP(3) +.TP +.B * +\fIshmem_longlong_xor_to_all\fP(3) +.TP +.B * +\fIshmem_short_xor_to_all\fP(3) +.RE +.RS +.PP +.RE +.TP +.B * +Fortran only: +.RS +.TP +.B * +\fIshmem_int4_and_to_all\fP(3) +.TP +.B * +\fIshmem_int8_and_to_all\fP(3) +.TP +.B * +\fIshmem_real4_max_to_all\fP(3) +.TP +.B * +\fIshmem_real8_max_to_all\fP(3) +.TP +.B * +\fIshmem_int4_max_to_all\fP(3) +.TP +.B * +\fIshmem_int8_max_to_all\fP(3) +.TP +.B * +\fIshmem_real4_min_to_all\fP(3) +.TP +.B * +\fIshmem_real8_min_to_all\fP(3) +.TP +.B * +\fIshmem_int4_min_to_all\fP(3) +.TP +.B * +\fIshmem_int8_min_to_all\fP(3) +.TP +.B * +\fIshmem_real4_sum_to_all\fP(3) +.TP +.B * +\fIshmem_real8_sum_to_all\fP(3) +.TP +.B * +\fIshmem_int4_sum_to_all\fP(3) +.TP +.B * +\fIshmem_int8_sum_to_all\fP(3) +.TP +.B * +\fIshmem_real4_prod_to_all\fP(3) +.TP +.B * +\fIshmem_real8_prod_to_all\fP(3) +.TP +.B * +\fIshmem_int4_prod_to_all\fP(3) +.TP +.B * +\fIshmem_int8_prod_to_all\fP(3) +.TP +.B * +\fIshmem_int4_or_to_all\fP(3) +.TP +.B * +\fIshmem_int8_or_to_all\fP(3) +.TP +.B * +\fIshmem_int4_xor_to_all\fP(3) +.TP +.B * +\fIshmem_int8_xor_to_all\fP(3) +.RE +.RS +.PP +.RE +.RE +.PP +.RE +.TP +Broadcast routines +.PP +.RS +.TP +.B * +C/C++ and Fortran: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_broadcast32\fP(3) +.TP +.B * +\fIshmem_broadcast64\fP(3) +.RE +.RS +.PP +.RE +.RE +.PP +.RE +.TP +Cache management routines +.PP +.RS +.TP +.B * +C/C++ and Fortran: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_udcflush\fP(3) +.TP +.B * +\fIshmem_udcflush_line\fP(3) +.RE +.RS +.PP +.RE +.RE +.PP +.RE +.TP +Byte\-granularity block put routines +.PP +.RS +.TP +.B * +C/C++ and Fortran +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_putmem\fP(3) +.TP +.B * +\fIshmem_getmem\fP(3) +.RE +.RS +.PP +.RE +.TP +.B * +Fortran only: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_character_put\fP(3) +.TP +.B * +\fIshmem_character_get\fP(3) +.RE +.RS +.PP +.RE +.RE +.PP +.RE +.TP +Collect routines +.RS +.TP +.B * +C/C++ and Fortran: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_collect32\fP(3) +.TP +.B * +\fIshmem_collect64\fP(3) +.TP +.B * +\fIshmem_fcollect32\fP(3) +.TP +.B * +\fIshmem_fcollect64\fP(3) +.RE +.RS +.PP +.RE +.RE +.PP +.RE +.TP +Atomic memory fetch\-and\-operate (fetch\-op) routines +.RS +.TP +.B * +C/C++ only: +.RS +.TP +.B * +\fIshmem_double_swap\fP(3) +.TP +.B * +\fIshmem_float_swap\fP(3) +.TP +.B * +\fIshmem_int_cswap\fP(3) +.TP +.B * +\fIshmem_int_fadd\fP(3) +.TP +.B * +\fIshmem_int_finc\fP(3) +.TP +.B * +\fIshmem_int_swap\fP(3) +.TP +.B * +\fIshmem_long_cswap\fP(3) +.TP +.B * +\fIshmem_long_fadd\fP(3) +.TP +.B * +\fIshmem_long_finc\fP(3) +.TP +.B * +\fIshmem_long_swap\fP(3) +.TP +.B * +\fIshmem_longlong_cswap\fP(3) +.TP +.B * +\fIshmem_longlong_fadd\fP(3) +.TP +.B * +\fIshmem_longlong_finc\fP(3) +.TP +.B * +\fIshmem_longlong_swap\fP(3) +.RE +.RS +.PP +.RE +.TP +.B * +Fortran only: +.RS +.TP +.B * +\fIshmem_int4_cswap\fP(3) +.TP +.B * +\fIshmem_int4_fadd\fP(3) +.TP +.B * +\fIshmem_int4_finc\fP(3) +.TP +.B * +\fIshmem_int4_swap\fP(3) +.TP +.B * +\fIshmem_int8_swap\fP(3) +.TP +.B * +\fIshmem_real4_swap\fP(3) +.TP +.B * +\fIshmem_real8_swap\fP(3) +.TP +.B * +\fIshmem_int8_cswap\fP(3) +.RE +.RS +.PP +.RE +.RE +.PP +.RE +.TP +Atomic memory operation routines +.RS +.TP +.B * +Fortran only: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_int4_add\fP(3) +.TP +.B * +\fIshmem_int4_inc\fP(3) +.RE +.RS +.PP +.RE +.RE +.PP +.RE +.TP +Remote memory pointer function +.RS +.TP +.B * +C/C++ and Fortran: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_ptr\fP(3) +.RE +.RS +.PP +.RE +.RE +.PP +.RE +.TP +Reduction routines +.RS +.TP +.B * +C/C++ only: +.RS +.TP +.B * +\fIshmem_longdouble_max_to_all\fP(3) +.TP +.B * +\fIshmem_longdouble_min_to_all\fP(3) +.TP +.B * +\fIshmem_longdouble_prod_to_all\fP(3) +.TP +.B * +\fIshmem_longdouble_sum_to_all\fP(3) +.RE +.RS +.PP +.RE +.TP +.B * +Fortran only: +.RS +.PP +.RS +.RE +.TP +.B * +\fIshmem_real16_max_to_all\fP(3) +.TP +.B * +\fIshmem_real16_min_to_all\fP(3) +.TP +.B * +\fIshmem_real16_prod_to_all\fP(3) +.TP +.B * +\fIshmem_real16_sum_to_all\fP(3) +.RE +.RS +.PP +.RE +.RE +.PP +.RE +.TP +Accessibility query routines +.RS +.TP +.B * +C/C++ and Fortran: +.RS +.TP +.B * +\fIshmem_pe_accessible\fP(3) +.TP +.B * +\fIshmem_addr_accessible\fP(3) +.RE +.RS +.PP +.RE +.RE +.PP +.RE +.TP +Symmetric Data Objects +.PP +Consistent with the SPMD nature of the OpenSHMEM programming model is the +concept of symmetric data objects. These are arrays or variables that +exist with the same size, type, and relative address on all PEs. +Another term for symmetric data objects is "remotely accessible data objects". +In the interface definitions for OpenSHMEM data transfer routines, one or more of the +parameters are typically required to be symmetric or remotely accessible. +.PP +The following kinds of data objects are symmetric: +.RS +.TP +.B * +Fortran data objects in common blocks or with the SAVE attribute. These data +objects must not be defined in a dynamic shared object (DSO). +.TP +.B * +Non\-stack C and C++ variables. These data objects must not be defined in a DSO. +.TP +.B * +Fortran arrays allocated with \fIshpalloc\fP(3F) +.TP +.B * +C and C++ data allocated by \fIshmalloc\fP(3C) +.RE +.RS +.PP +.RE +.TP +Collective Routines +Some SHMEM routines, for example, \fIshmem_broadcast\fP(3) +and +\fIshmem_float_sum_to_all\fP(3), +are classified as collective routines +because they distribute work across a set of PEs. +They must be called concurrently by all PEs in the active set defined by the PE_start, +logPE_stride, PE_size argument triplet. The following man pages describe the OpenSHMEM +collective routines: +.RS +.TP +.B * +\fIshmem_and\fP(3) +.TP +.B * +\fIshmem_barrier\fP(3) +.TP +.B * +\fIshmem_broadcast\fP(3) +.TP +.B * +\fIshmem_collect\fP(3) +.TP +.B * +\fIshmem_max\fP(3) +.TP +.B * +\fIshmem_min\fP(3) +.TP +.B * +\fIshmem_or\fP(3) +.TP +.B * +\fIshmem_prod\fP(3) +.TP +.B * +\fIshmem_sum\fP(3) +.TP +.B * +\fIshmem_xor\fP(3) +.RE +.RS +.PP +.RE +.PP +.SH USING THE SYMMETRIC WORK ARRAY, PSYNC + +Multiple pSync arrays are often needed if a particular PE calls as OpenSHMEM collective +routine twice without intervening barrier synchronization. Problems would occur if some PEs +in the active set for call 2 arrive at call 2 before processing of call 1 is complete by all PEs in +the call 1 active set. You can use \fIshmem_barrier\fP(3) +or \fIshmem_barrier_all\fP(3) +to perform a barrier synchronization between consecutive calls to OpenSHMEM collective +routines. +.PP +There are two special cases: +.RE +.TP +.B * +The \fIshmem_barrier\fP(3) routine allows the same pSync array to be used on +consecutive calls as long as the active PE set does not change. +.TP +.B * +If the same collective routine is called multiple times with the same active set, the +calls may alternate between two pSync arrays. The SHMEM routines guarantee that a +first call is completely finished by all PEs by the time processing of a third call begins +on any PE. +.PP +Because the SHMEM routines restore pSync to its original contents, multiple calls that +use the same pSync array do not require that pSync be reinitialized after the first call. +.PP +.SH SHMEM ENVIRONMENT VARIABLES + +This section lists the significant SHMEM environment variables. +.TP +.B * +\fBSMA_VERSION\fP print the library version at start\-up. +.TP +.B * +\fBSMA_INFO\fP print helpful text about all these environment variables. +.TP +.B * +\fBSMA_SYMMETRIC_SIZE\fP number of bytes to allocate for the symmetric heap. +.TP +.B * +\fBSMA_DEBUG\fP enable debugging messages. +.PP +The first call to SHMEM must be \fIstart_pes\fP(3)\&. +This routines initialize the SHMEM runtime. +.PP +Calling any other SHMEM routines beforehand has undefined behavior. Multiple calls +to this routine is not allowed. +.PP +.SH COMPILING AND RUNNING OPENSHMEM PROGRAMS + +The OpenSHMEM specification is silent regarding how OpenSHMEM programs are compiled, +linked and run. This section shows some examples of how wrapper programs could be utilized +to compile and launch applications. The commands are styled after wrapper programs +found in many MPI implementations. +.PP +The following sample command line demonstrates running an OpenSHMEM Program using a wrapper script (\fBoshrun\fP +in this case): +.PP +.TP +.B * +C/C++: +.Vb +oshcc c_program.c +.Ve +.TP +.B * +FORTRAN: +.Vb +oshfort fortran_program.f +.Ve +.PP +The following sample command line demonstrates running an OpenSHMEM Program assuming that the library provides a wrapper script for such purpose +(named \fBoshrun\fP +for this example): +.PP +.Vb +oshrun \-np 32 ./a.out +.Ve +.PP +.SH EXAMPLES + +\fBExample 1\fP: +The following Fortran OpenSHMEM program directs all PEs to sum +simultaneously the numbers in the VALUES variable across all PEs: +.Vb +PROGRAM REDUCTION + REAL VALUES, SUM + COMMON /C/ VALUES + REAL WORK + + CALL START_PES(0) + VALUES = MY_PE() + CALL SHMEM_BARRIER_ALL ! Synchronize all PEs + SUM = 0.0 + DO I = 0, NUM_PES()\-1 + CALL SHMEM_REAL_GET(WORK, VALUES, 1, I) ! Get next value + SUM = SUM + WORK ! Sum it + ENDDO + PRINT *, 'PE ', MY_PE(), ' COMPUTED SUM=', SUM + CALL SHMEM_BARRIER_ALL +END +.Ve +\fBExample 2\fP: +The following C OpenSHMEM program transfers an array of 10 longs from +PE 0 to PE 1: +.Vb +#include + +main() { + long source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + static long target[10]; + + start_pes(0); + if (_my_pe() == 0) { + /* put 10 elements into target on PE 1 */ + shmem_long_put(target, source, 10, 1); + } + shmem_barrier_all(); /* sync sender and receiver */ + if (_my_pe() == 1) + printf("target[0] on PE %d is %d\\n", _my_pe(), target[0]); +} +.Ve +.SH SEE ALSO + +The following man pages also contain information on OpenSHMEM routines. See the +specific man pages for implementation information. +.PP +\fIshmem_add\fP(3), +\fIshmem_and\fP(3), +\fIshmem_barrier\fP(3), +\fIshmem_barrier_all\fP(3), +\fIshmem_broadcast\fP(3), +\fIshmem_cache\fP(3), +\fIshmem_collect\fP(3), +\fIshmem_cswap\fP(3), +\fIshmem_fadd\fP(3), +\fIshmem_fence\fP(3), +\fIshmem_finc\fP(3), +\fIshmem_get\fP(3), +\fIshmem_iget\fP(3), +\fIshmem_inc\fP(3), +\fIshmem_iput\fP(3), +\fIshmem_lock\fP(3), +\fIshmem_max\fP(3), +\fIshmem_min\fP(3), +\fIshmem_my_pe\fP(3), +\fIshmem_or\fP(3), +\fIshmem_prod\fP(3), +\fIshmem_put\fP(3), +\fIshmem_quiet\fP(3), +\fIshmem_short_g\fP(3), +\fIshmem_short_p\fP(3), +\fIshmem_sum\fP(3), +\fIshmem_swap\fP(3), +\fIshmem_wait\fP(3), +\fIshmem_xor\fP(3), +\fIshmem_pe_accessible\fP(3), +\fIshmem_addr_accessible\fP(3), +\fIstart_pes\fP(3), +\fIshmalloc\fP(3C), +\fIshpalloc\fP(3F), +\fIMY_PE\fP(3I), +\fINUM_PES\fP(3I) diff --git a/oshmem/shmem/man/man3/shfree.3in b/oshmem/shmem/man/man3/shfree.3in new file mode 100644 index 0000000000..63a8ff4e8e --- /dev/null +++ b/oshmem/shmem/man/man3/shfree.3in @@ -0,0 +1 @@ +.so man3/shmalloc.3 diff --git a/oshmem/shmem/man/man3/shmalloc.3in b/oshmem/shmem/man/man3/shmalloc.3in new file mode 100644 index 0000000000..5255598fa9 --- /dev/null +++ b/oshmem/shmem/man/man3/shmalloc.3in @@ -0,0 +1,105 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMALLOC" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmalloc\fP(3), +\fIshfree\fP(3), +\fIshmemalign\fP(3), +\fIshrealloc\fP(3) +\- Symmetric heap memory management functions. +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void *shmalloc(size_t size); + +void shfree(void *ptr); + +void *shrealloc(void *ptr, size_t size); + +void *shmemalign(size_t alignment, size_t size); + +extern long malloc_error; +.Ve +.SH DESCRIPTION + +The \fBshmalloc\fP +function returns a pointer to a block of at least size bytes +suitably aligned for any use. This space is allocated from the symmetric heap (in contrast +to \fImalloc\fP(3C), +which allocates from the private heap). +.PP +The \fBshmemalign\fP +function allocates a block in the symmetric heap that has a +byte alignment specified by the alignment argument. +.PP +The \fBshfree\fP +function causes the block to which ptr points to, to be deallocated, +that is, made available for further allocation. If ptr is a null pointer, no action +occurs; otherwise, if the argument does not match a pointer earlier returned by a symmetric +heap function, or if the space has already been deallocated, malloc_error is set to indicate the +error, and shfree returns. +.PP +The \fBshrealloc\fP +function changes the size of the block to which ptr points to, to the +size (in bytes) specified by size. +.PP +The contents of the block are unchanged up to the lesser of the new and old sizes. If the new +size is larger, the value of the newly allocated portion of the block is indeterminate. If ptr is a +null pointer, the shrealloc function behaves like the shmalloc function for the specified size. If +size is 0 and ptr is not a null pointer, the block to which it points to is freed. Otherwise, if ptr +does not match a pointer earlier returned by a symmetric heap function, or if the space has +already been deallocated, the malloc_error variable is set to indicate the error, and shrealloc +returns a null pointer. If the space cannot be allocated, the block to which ptr points to is +unchanged. +.PP +The shmalloc, shfree, and shrealloc functions are provided so that multiple PEs in an +application can allocate symmetric, remotely accessible memory blocks. These memory +blocks can then be used with (shmem) communication routines. Each of these functions call +the \fIshmem_barrier_all\fP(3) +function before returning; this ensures that all PEs +participate in the memory allocation, and that the memory on other PEs can be used as soon +as the local PE returns. +.PP +The user is responsible for calling these functions with identical argument(s) on all PEs; if +differing size arguments are used, subsequent calls may not return the same symmetric heap +address on all PEs. +.PP +.SH NOTES + +The total size of the symmetric heap is determined at job startup. One can adjust the size of +the heap using the SHMEM_SYMMETRIC_HEAP_SIZE environment variable. See the +\fIintro_shmem\fP(3) +man page for futher details. +The shmalloc, shfree, and shrealloc functions differ from the private heap allocation functions +in that all PEs in an application must call them (a barrier is used to ensure this). +.PP +.SH RETURN VALUES + +The \fBshmalloc\fP +function returns a pointer to the allocated space (which should +be identical on all PEs); otherwise, it returns a null pointer (with malloc_error set). +The \fBshfree\fP +function returns no value. +The \fBshrealloc\fP +function returns a pointer to the allocated space (which +may have moved); otherwise, it returns a null pointer (with malloc_error set). +.SH SEE ALSO + +\fIintro_shmem\fP(3), +\fImy_pe\fP(3I), +\fIstart_pes\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_addr_accessible.3in b/oshmem/shmem/man/man3/shmem_addr_accessible.3in new file mode 100644 index 0000000000..bfc6e9d78e --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_addr_accessible.3in @@ -0,0 +1,56 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_ADDR\\_ACCESSIBLE" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +shmem_addr_accessible \- Indicates if an address is accessible via OpenSHMEM operations +from the specified remote PE. +.SH SYNOPSIS + +C or C++: +.Vb +#include + +int shmem_addr_accessible(void *addr, int pe); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +LOGICAL LOG, SHMEM_ADDR_ACCESSIBLE +INTEGER pe + +LOG = SHMEM_ADDR_ACCESSIBLE(addr, pe) +.Ve +.SH DESCRIPTION + +shmem_addr_accessible is a query function that indicates whether a local address is +accessible via SHMEM operations from the specified remote PE. +.PP +This function verifies that the remote PE is accessible via SHMEM data transfer functions from +the local PE, and that the specified address is in a symmetric data segment with respect to the +remote PE. +.PP +.SH RETURN VALUES + +C: The return value is 1 if addr is a symmetric data object and accessible via SHMEM +operations from the specified remote PE; otherwise, it is 0. +.PP +Fortran: The return value is \&.TRUE. if addr is a symmetric data object and accessible via +SHMEM operations from the specified remote PE; otherwise, it is \&.FALSE.. +.PP +.SH SEE ALSO + +\fIintro_shmem\fP(3), +\fIshmem_pe_accessible\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_barrier.3in b/oshmem/shmem/man/man3/shmem_barrier.3in new file mode 100644 index 0000000000..e9e9722a04 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_barrier.3in @@ -0,0 +1,112 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_BARRIER" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +shmem_barrier \- Performs a barrier operation on a subset of processing elements (PEs). +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void shmem_barrier(int PE_start, int logPE_stride, int PE_size, + long *pSync); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER PE_start, logPE_stride, PE_size +INTEGER pSync(SHMEM_BARRIER_SYNC_SIZE) + +CALL SHMEM_BARRIER(PE_start, logPE_stride, PE_size, pSync) +.Ve +.SH DESCRIPTION + +The shmem_barrier routine does not return until the subset of PEs specified by +\fBPE_start\fP, +\fBlogPE_stride\fP +and \fBPE_size\fP, +has entered this routine at the +same point of the execution path. +.PP +As with all SHMEM collective routines, each of these routines assumes that only PEs in the +active set call the routine. If a PE not in the active set calls a SHMEM collective routine, +undefined behavior results. +.PP +The arguments are as follows: +.TP +PE_start +The lowest virtual PE number of the active set of PEs. PE_start must be of +type integer. If you are using Fortran, it must be a default integer value. +.TP +logPE_stride +The log (base 2) of the stride between consecutive virtual PE numbers in +the active set. logPE_stride must be of type integer. If you are using Fortran, it must be a +default integer value. +.TP +PE_size +The number of PEs in the active set. PE_size must be of type integer. If you +are using Fortran, it must be a default integer value. +.TP +pSync +A symmetric work array. In C/C++, pSync must be of type int and size +_SHMEM_BARRIER_SYNC_SIZE. In Fortran, pSync must be of type integer and size +SHMEM_BARRIER_SYNC_SIZE. If you are using Fortran, it must be a default integer type. +Every element of this array must be initialized to 0 before any of the PEs in the active set enter +shmem_barrier the first time. +.PP +The values of arguments PE_start, logPE_stride, and PE_size must be equal on all PEs in the +active set. The same work array must be passed in pSync to all PEs in the active set. +.PP +shmem_barrier ensures that all previously issued local stores and previously issued remote +memory updates done by any of the PEs in the active set (by using SHMEM calls, for +example \fIshmem_put\fP(3)) +are complete before returning. +.PP +The same pSync array may be reused on consecutive calls to shmem_barrier if the same +active PE set is used. +.PP +.SH NOTES + +The term symmetric is defined in \fIintro_shmem\fP(3)\&. +.PP +If the pSync array is initialized at run time, be sure to use some type of synchronization, for +example, a call to \fIshmem_barrier_all\fP(3), +before calling shmem_barrier for the first +time. +.PP +If the active set does not change, shmem_barrier can be called repeatedly with the same +pSync array. No additional synchronization beyond that implied by shmem_barrier itself is +necessary in this case. +.PP +.SH EXAMPLES + +C/C++ example: +.Vb +shmem_barrier(PE_start, logPE_stride, size, pSync); +.Ve +Fortran example: +.Vb +INTEGER PSYNC(SHMEM_BARRIER_SYNC_SIZE) +INTEGER PE_START, LOGPE_STRIDE, PE_SIZE, PSYNC +DATA PSYNC /SHMEM_BARRIER_SYNC_SIZE*0/ + +CALL SHMEM_BARRIER(PE_START, LOGPE_STRIDE, PE_SIZE, PSYNC) +.Ve +.SH SEE ALSO + +\fIintro_shmem\fP(3), +\fIshmem_barrier_all\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_barrier_all.3in b/oshmem/shmem/man/man3/shmem_barrier_all.3in new file mode 100644 index 0000000000..df94380809 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_barrier_all.3in @@ -0,0 +1,59 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_BARRIER\\_ALL" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +shmem_barrier_all \- Suspends the execution of the calling PE until all other PEs issue a call +to this particular shmem_barrier_all() statement. +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void shmem_barrier_all(void); +.Ve +Fortran: +.Vb +include 'mpp/shmem.h' + +CALL SHMEM_BARRIER_ALL +.Ve +.SH DESCRIPTION + +The shmem_barrier_all routine does not return until all other PEs have entered this routine +at the same point of the execution path. +.PP +Prior to synchronizing with other PEs, shmem_barrier_all ensures completion of all +previously issued local memory stores and remote memory updates issued via SHMEM +functions such as \fIshmem_put32\fP(3)\&. +.PP +.SH EXAMPLES + +.Vb +setup_data() +{ + if (_my_pe() == 0) { + setup(); + } + + /* All PEs wait for PE 0 to complete setup(). */ + shmem_barrier_all(); +} +.Ve +.PP +.SH SEE ALSO + +\fIshmem_barrier\fP(3), +\fIstart_pes\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_broadcast32.3in b/oshmem/shmem/man/man3/shmem_broadcast32.3in new file mode 100644 index 0000000000..abb38e7952 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_broadcast32.3in @@ -0,0 +1,186 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_BROADCAST" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_broadcast4\fP(3), +\fIshmem_broadcast8\fP(3), +\fIshmem_broadcast32\fP(3), +\fIshmem_broadcast64\fP(3) +\- Copy a data object from a designated PE to a target +location on all other PEs of the active set. +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void shmem_broadcast32(void *target, const void *source, + size_t nelems, int PE_root, int PE_start, int logPE_stride, + int PE_size, long *pSync); + +void shmem_broadcast64(void *target, const void *source, + size_t nelems, int PE_root, int PE_start, int logPE_stride, + int PE_size, long *pSync); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER nelems, PE_root, PE_start, logPE_stride, PE_size +INTEGER pSync(SHMEM_BCAST_SYNC_SIZE) + +CALL SHMEM_BROADCAST4(target, source, nelems, PE_root, +& PE_start, logPE_stride, PE_size, fIpSync) + +CALL SHMEM_BROADCAST8(target, source, nelems, PE_root, +& PE_start, logPE_stride, PE_size, pSync) + +CALL SHMEM_BROADCAST32(target, source, nelems, +& PE_root, PE_start, logPE_stride, PE_size, pSync) + +CALL SHMEM_BROADCAST64(target, source, nelems, +& PE_root, PE_start, logPE_stride, PE_size, pSync) +.Ve +.SH DESCRIPTION + +The broadcast routines write the data at address source of the PE specified by +\fBPE_root\fP +to address \fBtarget\fP +on all other PEs in the active set. The active set of +PEs is defined by the triplet \fBPE_start\fP, +\fBlogPE_stride\fP +and \fBPE_size\fP\&. +The data is not copied to the target address on the PE specified by \fBPE_root\fP\&. +Before returning, the broadcast routines ensure that the elements of the pSync array are +restored to their initial values. +.PP +As with all SHMEM collective routines, each of these routines assumes that only PEs in the +active set call the routine. If a PE not in the active set calls a SHMEM collective routine, +undefined behavior results. +.PP +The arguments are as follows: +.TP +target +A symmetric data object with one of the following data types: +.RS +.TP +\fBshmem_broadcast8, shmem_broadcast64\fP: Any noncharacter type that +has an element size of 64 bits. No Fortran derived types or C/C++ structures are allowed. +.TP +\fBshmem_broadcast32\fP: Any noncharacter type that has an element size +of 32 bits. No Fortran derived types or C/C++ structures are allowed. +.TP +\fBshmem_broadcast4\fP: Any noncharacter type that has an element size +of 32 bits. +.RE +.RS +.PP +.RE +.TP +source +A symmetric data object that can be of any data type that is permissible for the +target argument. +.TP +nelems +The number of elements in source. For shmem_broadcast32 and +shmem_broadcast4, this is the number of 32\-bit halfwords. nelems must be of type integer. +If you are using Fortran, it must be a default integer value. +.TP +PE_root +Zero\-based ordinal of the PE, with respect to the active set, from which the +data is copied. Must be greater than or equal to 0 and less than PE_size. PE_root must be of +type integer. If you are using Fortran, it must be a default integer value. +.TP +PE_start +The lowest virtual PE number of the active set of PEs. PE_start must be of +type integer. If you are using Fortran, it must be a default integer value. +.TP +logPE_stride +The log (base 2) of the stride between consecutive virtual PE numbers in +the active set. log_PE_stride must be of type integer. If you are using Fortran, it must be a +default integer value. +.TP +PE_size +The number of PEs in the active set. PE_size must be of type integer. If you +are using Fortran, it must be a default integer value. +.PP +.TP +pSync +A symmetric work array. In C/C++, pSync must be of type long and size +_SHMEM_BCAST_SYNC_SIZE. +In Fortran, pSync must be of type integer and size SHMEM_BCAST_SYNC_SIZE. Every +element of this array must be initialized with the value _SHMEM_SYNC_VALUE (in C/C++) +or SHMEM_SYNC_VALUE (in Fortran) before any of the PEs in the active set enter +shmem_barrier(). +.PP +The values of arguments PE_root, PE_start, logPE_stride, and PE_size must be equal on +all PEs in the active set. The same target and source data objects and the same pSync work +array must be passed to all PEs in the active set. +.PP +Before any PE calls a broadcast routine, you must ensure that the following conditions exist +(synchronization via a barrier or some other method is often needed to ensure this): The +pSync array on all PEs in the active set is not still in use from a prior call to a broadcast +routine. The target array on all PEs in the active set is ready to accept the broadcast data. +.PP +Upon return from a broadcast routine, the following are true for the local PE: If the current PE +is not the root PE, the target data object is updated. The values in the pSync array are +restored to the original values. +.SH NOTES + +The terms collective and symmetric are defined in \fIintro_shmem\fP(3)\&. +.PP +All SHMEM broadcast routines restore pSync to its original contents. Multiple calls to SHMEM +routines that use the same pSync array do not require that pSync be reinitialized after the +first call. +.PP +You must ensure the that the pSync array is not being updated by any PE in the active set +while any of the PEs participates in processing of a SHMEM broadcast routine. Be careful to +avoid these situations: If the pSync array is initialized at run time, some type of +synchronization is needed to ensure that all PEs in the working set have initialized pSync +before any of them enter a SHMEM routine called with the pSync synchronization array. A +pSync array may be reused on a subsequent SHMEM broadcast routine only if none of the PEs +in the active set are still processing a prior SHMEM broadcast routine call that used the same +pSync array. In general, this can be ensured only by doing some type of synchronization. +However, in the special case of SHMEM routines being called with the same active set, you +can allocate two pSync arrays and alternate between them on successive calls. +.PP +.SH EXAMPLES + +In the following examples, the call to shmem_broadcast64 copies source on PE 4 to target +on PEs 5, 6, and 7. +.PP +C/C++ example: +.Vb +for (i=0; i < _SHMEM_BCAST_SYNC_SIZE; i++) { + pSync[i] = _SHMEM_SYNC_VALUE; +} +shmem_barrier_all(); /* Wait for all PEs to initialize pSync */ +shmem_broadcast64(target, source, nelems, 0, 4, 0, 4, pSync); +.Ve +Fortran example: +.Vb +INTEGER PSYNC(SHMEM_BCAST_SYNC_SIZE) +INTEGER TARGET, SOURCE, NELEMS, PE_ROOT, PE_START, +& LOGPE_STRIDE, PE_SIZE, PSYNC +COMMON /COM/ TARGET, SOURCE +DATA PSYNC /SHMEM_BCAST_SYNC_SIZE*SHMEM_SYNC_VALUE/ + +CALL SHMEM_BROADCAST64(TARGET, SOURCE, NELEMS, 0, 4, 0, 4, +& PSYNC) +.Ve +.PP +.SH SEE ALSO + +\fIintro_shmem\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_broadcast64.3in b/oshmem/shmem/man/man3/shmem_broadcast64.3in new file mode 100644 index 0000000000..b9255d5020 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_broadcast64.3in @@ -0,0 +1 @@ +.so man3/shmem_broadcast32.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_char_g.3in b/oshmem/shmem/man/man3/shmem_char_g.3in new file mode 100644 index 0000000000..722a79c640 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_char_g.3in @@ -0,0 +1,64 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_CHAR\\_G" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_char_g\fP(3), +\fIshmem_float_g\fP(3), +\fIshmem_int_g\fP(3), +\fIshmem_long_g\fP(3), +\fIshmem_short_g\fP(3), +\fIshmem_longlong_g\fP(3), +\fIshmem_longdouble_g\fP(3) +\- These routines provide a low latency mechanism to read basic types (char, short, int, float, double, long, long long, long double) from symmetric data objects on remote PEs. +.SH SYNOPSIS + +C or C++: +.Vb +#include + + +char shmem_char_g(char *addr, int pe); + +short shmem_short_g(short *addr, int pe); + +int shmem_int_g(int *addr, int pe); + +long shmem_long_g(long *addr, int pe); + +long shmem_longlong_g(long long *addr, int pe); + +float shmem_float_g(float *addr, int pe); + +double shmem_double_g(double *addr, int pe); + +long shmem_longdouble_g(long double *addr, int pe); + +.Ve +.SH DESCRIPTION + +These routines provide a very low latency get capability for single elements of most basic types. +.PP +The arguments are as follows: +.TP +addr +The remotely accessible array element or scalar data object which will receive the +data on the remote PE. +.TP +pe +The number of the remote PE. +.SH SEE ALSO + +\fIintro_shmem\fP(3), +\fIshmem_get\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_char_get.3in b/oshmem/shmem/man/man3/shmem_char_get.3in new file mode 100644 index 0000000000..8091004920 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_char_get.3in @@ -0,0 +1,207 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_GET" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_character_get\fP(3), +\fIshmem_complex_get\fP(3), +\fIshmem_double_get\fP(3), +\fIshmem_float_get\fP(3), +\fIshmem_get4\fP(3), +\fIshmem_get8\fP(3), +\fIshmem_get32\fP(3), +\fIshmem_get64\fP(3), +\fIshmem_get128\fP(3), +\fIshmem_getmem\fP(3), +\fIshmem_int_get\fP(3), +\fIshmem_integer_get\fP(3), +\fIshmem_logical_get\fP(3), +\fIshmem_long_get\fP(3), +\fIshmem_longdouble_get\fP(3), +\fIshmem_longlong_get\fP(3), +\fIshmem_real_get\fP(3), +\fIshmem_short_get\fP(3) +\- Transfers data from a specified processing element (PE). +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void shmem_get32(void *target, const void *source, + size_t len, int pe); + +void shmem_get64(void *target, const void *source, + size_t len, int pe); + +void shmem_get128(void *target, const void *source, + size_t len, int pe); + +void shmem_getmem(void *target, const void *source, + size_t len, int pe); + +void shmem_int_get(int *target, const int *source, + size_t len, int pe); + +void shmem_double_get(double *target, const double *source, + size_t len, int pe); + +void shmem_float_get(float *target, const float *source, + size_t len, int pe); + +void shmem_long_get(long *target, const long *source, + size_t len, int pe); + +void shmem_longdouble_get(long double *target, + const long double *source, size_t len, int pe); + +void shmem_longlong_get(long long *target, + const long long *source, size_t len, int pe); + +void shmem_short_get(short *target, + const short *source, size_t len, int pe); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER len, pe + +CALL SHMEM_CHARACTER_GET(target, source, len, pe) + +CALL SHMEM_COMPLEX_GET(target, source, len, pe) + +CALL SHMEM_DOUBLE_GET(target, source, len, pe) + +CALL SHMEM_GET4(target, source, len, pe) + +CALL SHMEM_GET8(target, source, len, pe) + +CALL SHMEM_GET32(target, source, len, pe) + +CALL SHMEM_GET64(target, source, len, pe) + +CALL SHMEM_GET128(target, source, len, pe) + +CALL SHMEM_GETMEM(target, source, len, pe) + +CALL SHMEM_INTEGER_GET(target, source, len, pe) + +CALL SHMEM_LOGICAL_GET(target, source, len, pe) + +CALL SHMEM_REAL_GET(target, source, len, pe) +.Ve +.SH DESCRIPTION + +The shmem_get routines transfer \fBnelems\fP +elements of the data object at address \fBsource\fP +on the remote PE \fBpe\fP, +to the data object at address \fBtarget\fP +on the local PE. These routines +return after the data has been copied to address \fBtarget\fP +on the local PE. +.PP +The arguments are as follows: +.TP +target +Local data object to be updated. +.TP +source +Data object on the PE identified by pe that contains the data to be copied. This +data object must be remotely accessible. +.TP +len +Number of elements in the target and source arrays. len must be of type integer. If +you are using Fortran, it must be a constant, variable, or array element of default +integer type. +.TP +pe +PE number of the remote PE. pe must be of type integer. If you are using Fortran, it +must be a constant, variable, or array element of default integer type. +.PP +The target and source data objects must conform to typing constraints, which are as follows: +.TP +\fBshmem_getmem\fP: Fortran: Any noncharacter type. C: Any data type. len is +scaled in bytes. +.TP +\fBshmem_get4, shmem_get32\fP: Any noncharacter type that has a storage size +equal to 32 bits. +.TP +{shmem_get8, shmem_get64}: Any noncharacter type that has a storage size equal to +64 bits. +.TP +\fBshmem_get128\fP: Any noncharacter type that has a storage size equal to 128 +bits. +.TP +\fBshmem_short_get\fP: Elements of type short. +.TP +\fBshmem_int_get\fP: Elements of type int. +.TP +\fBshmem_long_get\fP: Elements of type long. +.TP +\fBshmem_longlong_get\fP: Elements of type long long. +.TP +\fBshmem_float_get\fP: Elements of type float. +.TP +\fBshmem_double_get\fP: Elements of type double. +.TP +\fBshmem_longdouble_get\fP: Elements of type long double. +.TP +\fBSHMEM_CHARACTER_GET\fP: Elements of type character. len is the number of +characters to transfer. The actual character lengths of the source and target variables are +ignored. +.TP +\fBSHMEM_COMPLEX_GET\fP: Elements of type complex of default size. +.TP +\fBSHMEM_DOUBLE_GET\fP: (Fortran) Elements of type double precision. +.TP +\fBSHMEM_INTEGER_GET\fP: Elements of type integer. +.TP +\fBSHMEM_LOGICAL_GET\fP: Elements of type logical. +.TP +\fBSHMEM_REAL_GET\fP: Elements of type real. +.PP +If you are using Fortran, data types must be of default size. For example, a real variable must +be declared as REAL, REAL*4, or REAL(KIND=4). +.SH NOTES + +See \fIintro_shmem\fP(3) +for a definition of the term remotely accessible. +.SH EXAMPLES + +Consider this simple example for Fortran. +.Vb +PROGRAM REDUCTION + REAL VALUES, SUM + COMMON /C/ VALUES + REAL WORK + + CALL START_PES(0) ! ALLOW ANY NUMBER OF PES + VALUES = MY_PE() ! INITIALIZE IT TO SOMETHING + CALL SHMEM_BARRIER_ALL + SUM = 0.0 + DO I = 0,NUM_PES()\-1 + CALL SHMEM_REAL_GET(WORK, VALUES, 1, I) + SUM = SUM + WORK + ENDDO + PRINT *, 'PE ', MY_PE(), ' COMPUTED SUM=', SUM + CALL SHMEM_BARRIER_ALL +END +.Ve +.SH SEE ALSO + +\fIintro_shmem\fP(3), +\fIshmem_put\fP(3), +\fIshmem_iget\fP(3), +\fIshmem_quiet\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_char_p.3in b/oshmem/shmem/man/man3/shmem_char_p.3in new file mode 100644 index 0000000000..3d122b2d9c --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_char_p.3in @@ -0,0 +1,73 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_CHAR\\_P" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_char_p\fP(3), +\fIshmem_float_p\fP(3), +\fIshmem_int_p\fP(3), +\fIshmem_long_p\fP(3), +\fIshmem_short_p\fP(3), +\fIshmem_longlong_p\fP(3), +\fIshmem_longdouble_p\fP(3) +\- These routines provide a low latency mechanism to write basic types (char, short, int, float, double, long, long long, long double) to symmetric data objects on remote PEs. +.SH SYNOPSIS + +C or C++: +.Vb +#include + + +void shmem_char_p(char *addr, char value, int pe); + +void shmem_short_p(short *addr, short value, int pe); + +void shmem_int_p(int *addr, int value, int pe); + +void shmem_long_p(long *addr, long value, int pe); + +void shmem_longlong_p(long long *addr, long long value, int pe); + +void shmem_float_p(float *addr, float value, int pe); + +void shmem_double_p(double *addr, double value, int pe); + +void shmem_longdouble_p(long double *addr, long double value, int pe); + +.Ve +.SH DESCRIPTION + +These routines provide a very low latency put capability for single elements of most basic types. +.PP +The arguments are as follows: +.TP +addr +The remotely accessible array element or scalar data object which will receive the +data on the remote PE. +.TP +value +The value to be transferred to addr on the remote PE. +.TP +pe +The number of the remote PE. +.PP +As with \fIshmem_put\fP(3), +these functions start the remote transfer and may return before +the data is delivered to the remote PE. Use \fIshmem_quiet\fP(3) +to force completion of all +remote PUT transfers. +.SH SEE ALSO + +\fIintro_shmem\fP(3), +\fIshmem_put\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_char_put.3in b/oshmem/shmem/man/man3/shmem_char_put.3in new file mode 100644 index 0000000000..2ba020522e --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_char_put.3in @@ -0,0 +1,214 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_PUT" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_character_put\fP(3), +\fIshmem_complex_put\fP(3), +\fIshmem_double_put\fP(3), +\fIshmem_float_put\fP(3), +\fIshmem_int_put\fP(3), +\fIshmem_integer_put\fP(3), +\fIshmem_logical_put\fP(3), +\fIshmem_long_put\fP(3), +\fIshmem_longdouble_put\fP(3), +\fIshmem_longlong_put\fP(3), +\fIshmem_put4\fP(3), +\fIshmem_put8\fP(3), +\fIshmem_put32\fP(3), +\fIshmem_put64\fP(3), +\fIshmem_put128\fP(3), +\fIshmem_putmem\fP(3), +\fIshmem_real_put\fP(3), +\fIshmem_short_put\fP(3) +\- Transfers data to a specified +processing element (PE) +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void shmem_double_put(double *target, const double *source, + size_t len, int pe); + +void shmem_float_put(float *target, const float *source, + size_t len, int pe); + +void shmem_int_put(int *target, const int *source, size_t len, + int pe); + +void shmem_long_put(long *target, const long *source, + size_t len, int pe); + +void shmem_longdouble_put(long double *target, + const long double *source, size_t len, int pe); + +void shmem_longlong_put(long long *target, + const long long *source, size_t len, int pe); + +void shmem_put32(void *target, const void *source, size_t len, + int pe); + +void shmem_put64(void *target, const void *source, size_t len, + int pe); + +void shmem_put128(void *target, const void *source, size_t len, + int pe); + +void shmem_putmem(void *target, const void *source, size_t len, + int pe); + +void shmem_short_put(short *target, const short *source, + size_t len, int pe); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER len, pe + +CALL SHMEM_CHARACTER_PUT(target, source, len, pe) + +CALL SHMEM_COMPLEX_PUT(target, source, len, pe) + +CALL SHMEM_DOUBLE_PUT(target, source, len, pe) + +CALL SHMEM_INTEGER_PUT(target, source, len, pe) + +CALL SHMEM_LOGICAL_PUT(target, source, len, pe) + +CALL SHMEM_PUT(target, source, len, pe) + +CALL SHMEM_PUT4(target, source, len, pe) + +CALL SHMEM_PUT8(target, source, len, pe) + +CALL SHMEM_PUT32(target, source, len, pe) + +CALL SHMEM_PUT64(target, source, len, pe) + +CALL SHMEM_PUT128(target, source, len, pe) + +CALL SHMEM_PUTMEM(target, source, len, pe) + +CALL SHMEM_REAL_PUT(target, source, len, pe) +.Ve +.SH DESCRIPTION + +These routines transfer \fBnelems\fP +elements of the data object at address +\fBsource\fP +on the calling PE, to the data object at address \fBtarget\fP +on the remote +PE \fBpe\fP\&. +These routines start the remote transfer and may return before the data is +delivered to the remote PE. +.PP +The delivery of data into the data object on the destination PE from different put calls may +occur in any order. Because of this, two successive put operations may deliver data out of +order unless a call to \fIshmem_fence\fP(3) +is introduced between the two calls. +.PP +The arguments are as follows: +.TP +target +Data object to be updated on the remote PE. This data object must be remotely +accessible. +.TP +source +Data object containing the data to be copied. +.TP +len +Number of elements in the target and source arrays. len must be of type integer. If +you are using Fortran, it must be a constant, variable, or array element of default integer +type. +.TP +pe +PE number of the remote PE. pe must be of type integer. If you are using Fortran, it +must be a constant, variable, or array element of default integer type. +.PP +The target and source data objects must conform to certain typing constraints, which are as +follows: +.TP +\fBshmem_putmem\fP: Fortran: Any noncharacter type. C: Any data type. len is scaled in +bytes. +.TP +\fBshmem_put4, shmem_put32:\fP Any noncharacter type that has a storage size +equal to 32 bits. +.TP +\fBshmem_put8, shmem_put64:\fP Any noncharacter type that has a storage size +equal to 64 bits. +.TP +\fBshmem_put128:\fP Any noncharacter type that has a storage size equal to 128 +bits. +.TP +\fBshmem_short_put:\fP Elements of type short. +.TP +\fBshmem_int_put:\fP Elements of type int. +.TP +\fBshmem_long_put:\fP Elements of type long. +.TP +\fBshmem_longlong_put:\fP Elements of type long long. +.TP +\fBshmem_float_put:\fP Elements of type float. +.TP +\fBshmem_double_put:\fP Elements of type double. +.TP +\fBshmem_longdouble_put:\fP Elements of type long double. +.TP +\fBSHMEM_CHARACTER_PUT:\fP Elements of type character. len is the number of +characters to transfer. The actual character lengths of the source and target variables are +ignored. +.TP +\fBSHMEM_COMPLEX_PUT:\fP Elements of type complex of default size. +.TP +\fBSHMEM_DOUBLE_PUT:\fP (Fortran) Elements of type double precision. +.TP +\fBSHMEM_INTEGER_PUT:\fP Elements of type integer. +.TP +\fBSHMEM_LOGICAL_PUT:\fP Elements of type logical. +.TP +\fBSHMEM_REAL_PUT:\fP Elements of type real. +If you are using Fortran, data types must be of default size. For example, a real variable must +be declared as REAL, REAL*4, or REAL(KIND=4). +.PP +.SH EXAMPLES + +The following shmem_put example is for C/C++ programs: +.Vb +#include +#include + +main() +{ + long source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + static long target[10]; + start_pes(2); + + if (_my_pe() == 0) { + /* put 10 words into target on PE 1 */ + shmem_long_put(target, source, 10, 1); + } + shmem_barrier_all(); /* sync sender and receiver */ + if (_my_pe() == 1) + shmem_udcflush(); /* not required on Altix systems */ + printf("target[0] on PE %d is %d\\n", _my_pe(), target[0]); +} +.Ve +.SH SEE ALSO + +\fIintro_shmem\fP(3), +\fIshmem_iput\fP(3), +\fIshmem_quiet\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_clear_cache_inv.3in b/oshmem/shmem/man/man3/shmem_clear_cache_inv.3in new file mode 100644 index 0000000000..4a6a361ef9 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_clear_cache_inv.3in @@ -0,0 +1 @@ +.so man3/shmem_udcflush.3 diff --git a/oshmem/shmem/man/man3/shmem_clear_cache_line_inv.3in b/oshmem/shmem/man/man3/shmem_clear_cache_line_inv.3in new file mode 100644 index 0000000000..4a6a361ef9 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_clear_cache_line_inv.3in @@ -0,0 +1 @@ +.so man3/shmem_udcflush.3 diff --git a/oshmem/shmem/man/man3/shmem_clear_lock.3in b/oshmem/shmem/man/man3/shmem_clear_lock.3in new file mode 100644 index 0000000000..49974c4f17 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_clear_lock.3in @@ -0,0 +1 @@ +.so man3/shmem_set_lock.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_collect32.3in b/oshmem/shmem/man/man3/shmem_collect32.3in new file mode 100644 index 0000000000..bce6dc5aa0 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_collect32.3in @@ -0,0 +1,197 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_COLLECT" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_collect4\fP(3), +\fIshmem_collect8\fP(3), +\fIshmem_collect32\fP(3), +\fIshmem_collect64\fP(3), +\fIshmem_fcollect\fP(3), +\fIshmem_fcollect4\fP(3), +\fIshmem_fcollect8\fP(3), +\fIshmem_fcollect32\fP(3), +\fIshmem_fcollect64\fP(3) +\- Concatenates blocks of data from multiple processing elements (PEs) to an array in every PE +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void shmem_collect32(void *target, const void *source, + size_t nelems, int PE_start, int logPE_stride, int PE_size, + long *pSync); + +void shmem_collect64(void *target, const void *source, + size_t nelems, int PE_start, int logPE_stride, int PE_size, + long *pSync); + +void shmem_fcollect32(void *target, const void *source, + size_t nelems, int PE_start, int logPE_stride, int PE_size, + long *pSync); + +void shmem_fcollect64(void *target, const void *source, + size_t nelems, int PE_start, int logPE_stride, int PE_size, + long *pSync); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER nelems +INTEGER PE_start, logPE_stride, PE_size +INTEGER pSync(SHMEM_COLLECT_SYNC_SIZE) + +CALL SHMEM_COLLECT4(target, source, nelems, PE_start, +& logPE_stride, PE_size, pSync) + +CALL SHMEM_COLLECT8(target, source, nelems, PE_start, +& logPE_stride, PE_size, pSync) + +CALL SHMEM_FCOLLECT4(target, source, nelems, PE_start, +& logPE_stride, PE_size, pSync) + +CALL SHMEM_FCOLLECT8(target, source, nelems, PE_start, +& logPE_stride, PE_size, pSync) +.Ve +.SH DESCRIPTION + +The shared memory (SHMEM) collect and fcollect routines concatenate nelems 64\-bit or 32\-bit +data items from the source array into the target array, over the set of PEs defined by +PE_start, log2PE_stride, and PE_size, in processor number order. The resultant target array +contains the contribution from PE PE_start first, then the contribution from PE PE_start + +PE_stride second, and so on. The collected result is written to the target array for all PEs in +the active set. +.PP +The fcollect routines require that nelems be the same value in all participating PEs, while the +collect routines allow nelems to vary from PE to PE. +.PP +The resulting target array is as follows: +.Vb +\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- + source(1..nelems) + from PE (PE_start + 0 * (2**logPE_stride)) +\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- + source(1..nelems) + from PE (PE_start + 1 * (2**logPE_stride)) +\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- + ... +\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- + source(1..nelems) from + PE (PE_start + (PE_size \- 1) * (2**logPE_stride)) +\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- +.Ve +.PP +As with all SHMEM collective routines, each of these routines assumes that only PEs in +the active set call the routine. If a PE not in the active set calls a SHMEM collective routine, +undefined behavior results. +.PP +The arguments are as follows: +.TP +target +A symmetric array. The target argument must be large enough to accept the concatenation of the source arrays on all PEs. The data types are +as follows: +.RS +.TP +[shmem_collect8, shmem_collect64, shmem_fcollect8, and +shmem_fcollect64] any data type with an element size of 64 bits. Fortran derived types, +Fortran character type, and C/C++ structures are not permitted. +.TP +[shmem_collect4, shmem_collect32, shmem_fcollect4, and +shmem_fcollect32] any data type with an element size of 32 bits. Fortran derived types, +Fortran character type, and C/C++ structures are not permitted. +.RE +.RS +.PP +.RE +.TP +source +A symmetric data object that can be of any type permissible for the target +argument. +.TP +nelems +The number of elements in the source array. nelems must be of type integer. If +you are using Fortran, it must be a default integer value. +.TP +PE_start +The lowest virtual PE number of the active set of PEs. PE_start must be of +type integer. If you are using Fortran, it must be a default integer value. +.TP +logPE_stride +The log (base 2) of the stride between consecutive virtual PE numbers in +the active set. logPE_stride must be of type integer. If you are using Fortran, it must be a +default integer value. +.TP +PE_size +The number of PEs in the active set. PE_size must be of type integer. If you +are using Fortran, it must be a default integer value. +.TP +pSync +A symmetric work array. In C/C++, pSync must be of type int and size +_SHMEM_COLLECT_SYNC_SIZE. In Fortran, pSync must be of type integer and size +SHMEM_COLLECT_SYNC_SIZE. If you are using Fortran, it must be a default integer value. +Every element of this array must be initialized with the value _SHMEM_SYNC_VALUE in +C/C++ or SHMEM_SYNC_VALUE in Fortran before any of the PEs in the active set enter +shmem_barrier(). +.PP +The values of arguments PE_start, logPE_stride, and PE_size must be equal on all PEs in +the active set. The same target and source arrays and the same pSync work array must be +passed to all PEs in the active set. +.PP +Upon return from a collective routine, the following are true for the local PE: The target array +is updated. The values in the pSync array are restored to the original values. +.SH NOTES + +The terms collective and symmetric are defined in \fIintro_shmem\fP(3)\&. +All SHMEM collective routines reset the values in pSync before they return, so a particular +pSync buffer need only be initialized the first time it is used. +.PP +You must ensure that the pSync array is not being updated on any PE in the active set while +any of the PEs participate in processing of a SHMEM collective routine. Be careful to +avoid these situations: If the pSync array is initialized at run time, some type of +synchronization is needed to ensure that all PEs in the working set have initialized +pSync before any of them enter a SHMEM routine called with the pSync synchronization array. +A pSync array can be reused on a subsequent SHMEM collective routine only if none +of the PEs in the active set are still processing a prior SHMEM collective routine call that used +the same pSync array. In general, this may be ensured only by doing some type of +synchronization. However, in the special case of SHMEM routines being called with the same +active set, you can allocate two pSync arrays and alternate between them on +successive calls. +.PP +The collective routines operate on active PE sets that have a non\-power\-of\-two PE_size +with some performance degradation. They operate with no performance degradation +when nelems is a non\-power\-of\-two value. +.SH EXAMPLES + +C/C++: +.Vb +for (i=0; i < _SHMEM_COLLECT_SYNC_SIZE; i++) { + pSync[i] = _SHMEM_SYNC_VALUE; +} +shmem_barrier_all(); /* Wait for all PEs to initialize pSync */ +shmem_collect32(target, source, 64, pe_start, logPE_stride, + pe_size, pSync); +.Ve +Fortran: +.Vb +INTEGER PSYNC(SHMEM_COLLECT_SYNC_SIZE) +DATA PSYNC /SHMEM_COLLECT_SYNC_SIZE*SHMEM_SYNC_VALUE/ + +CALL SHMEM_COLLECT4(TARGET, SOURCE, 64, PE_START, +& LOGPE_STRIDE, PE_SIZE, PSYNC) +.Ve +.SH SEE ALSO + +\fIintro_shmem\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_collect64.3in b/oshmem/shmem/man/man3/shmem_collect64.3in new file mode 100644 index 0000000000..17caf5bf98 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_collect64.3in @@ -0,0 +1 @@ +.so man3/shmem_collect32.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_complexd_prod_to_all.3in b/oshmem/shmem/man/man3/shmem_complexd_prod_to_all.3in new file mode 100644 index 0000000000..39b196d082 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_complexd_prod_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_prod_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_complexd_sum_to_all.3in b/oshmem/shmem/man/man3/shmem_complexd_sum_to_all.3in new file mode 100644 index 0000000000..f75a494841 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_complexd_sum_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_sum_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_complexf_prod_to_all.3in b/oshmem/shmem/man/man3/shmem_complexf_prod_to_all.3in new file mode 100644 index 0000000000..39b196d082 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_complexf_prod_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_prod_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_complexf_sum_to_all.3in b/oshmem/shmem/man/man3/shmem_complexf_sum_to_all.3in new file mode 100644 index 0000000000..f75a494841 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_complexf_sum_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_sum_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_double_g.3in b/oshmem/shmem/man/man3/shmem_double_g.3in new file mode 100644 index 0000000000..d2bbc4ad8d --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_double_g.3in @@ -0,0 +1 @@ +.so man3/shmem_char_g.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_double_get.3in b/oshmem/shmem/man/man3/shmem_double_get.3in new file mode 100644 index 0000000000..6d7c165d2e --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_double_get.3in @@ -0,0 +1 @@ +.so man3/shmem_char_get.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_double_iget.3in b/oshmem/shmem/man/man3/shmem_double_iget.3in new file mode 100644 index 0000000000..48dee9db50 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_double_iget.3in @@ -0,0 +1 @@ +.so man3/shmem_short_iget.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_double_iput.3in b/oshmem/shmem/man/man3/shmem_double_iput.3in new file mode 100644 index 0000000000..c7b4a30e1a --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_double_iput.3in @@ -0,0 +1 @@ +.so man3/shmem_short_iput.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_double_max_to_all.3in b/oshmem/shmem/man/man3/shmem_double_max_to_all.3in new file mode 100644 index 0000000000..e4ad3901e8 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_double_max_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_max_to_all.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_double_min_to_all.3in b/oshmem/shmem/man/man3/shmem_double_min_to_all.3in new file mode 100644 index 0000000000..d688221529 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_double_min_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_min_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_double_p.3in b/oshmem/shmem/man/man3/shmem_double_p.3in new file mode 100644 index 0000000000..c08d60a543 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_double_p.3in @@ -0,0 +1 @@ +.so man3/shmem_char_p.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_double_prod_to_all.3in b/oshmem/shmem/man/man3/shmem_double_prod_to_all.3in new file mode 100644 index 0000000000..39b196d082 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_double_prod_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_prod_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_double_put.3in b/oshmem/shmem/man/man3/shmem_double_put.3in new file mode 100644 index 0000000000..e3ca73d483 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_double_put.3in @@ -0,0 +1 @@ +.so man3/shmem_char_put.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_double_sum_to_all.3in b/oshmem/shmem/man/man3/shmem_double_sum_to_all.3in new file mode 100644 index 0000000000..f75a494841 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_double_sum_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_sum_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_double_swap.3in b/oshmem/shmem/man/man3/shmem_double_swap.3in new file mode 100644 index 0000000000..a038a3f31e --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_double_swap.3in @@ -0,0 +1 @@ +.so man3/shmem_swap.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_fcollect32.3in b/oshmem/shmem/man/man3/shmem_fcollect32.3in new file mode 100644 index 0000000000..17caf5bf98 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_fcollect32.3in @@ -0,0 +1 @@ +.so man3/shmem_collect32.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_fcollect64.3in b/oshmem/shmem/man/man3/shmem_fcollect64.3in new file mode 100644 index 0000000000..17caf5bf98 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_fcollect64.3in @@ -0,0 +1 @@ +.so man3/shmem_collect32.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_fence.3in b/oshmem/shmem/man/man3/shmem_fence.3in new file mode 100644 index 0000000000..12e97a55dc --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_fence.3in @@ -0,0 +1,54 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_FENCE" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +shmem_fence \- Provides a separate ordering on the sequence of puts issued by this PE to each destination +PE. +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void shmem_fence(void); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +CALL SHMEM_FENCE +.Ve +.SH DESCRIPTION + +The \fBshmem_fence()\fP +routine provides an ordering on the put operations issued by the calling +PE prior to the call to \fBshmem_fence()\fP +relative to the put operations issued by the +calling PE following the call to \fBshmem_fence()\fP\&. +It guarantees that all such prior put operations +issued to a particular destination PE are fully written to the symmetric memory of +that destination PE, before any such following put operations to that same destination PE +are written to the symmetric memory of that destination PE. +Note that the ordering is provided separately on the sequences of puts from the calling PE to +each distinct destination PE. The \fBshmem_quiet()\fP +routine should be used instead if ordering +of puts is required when multiple destination PEs are involved. +.SH NOTES + +The shmem_quiet function should be called if ordering of puts is desired when multiple remote +PEs are involved. +.SH SEE ALSO + +\fIintro_shmem\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_float_g.3in b/oshmem/shmem/man/man3/shmem_float_g.3in new file mode 100644 index 0000000000..d2bbc4ad8d --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_float_g.3in @@ -0,0 +1 @@ +.so man3/shmem_char_g.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_float_get.3in b/oshmem/shmem/man/man3/shmem_float_get.3in new file mode 100644 index 0000000000..6d7c165d2e --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_float_get.3in @@ -0,0 +1 @@ +.so man3/shmem_char_get.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_float_iget.3in b/oshmem/shmem/man/man3/shmem_float_iget.3in new file mode 100644 index 0000000000..48dee9db50 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_float_iget.3in @@ -0,0 +1 @@ +.so man3/shmem_short_iget.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_float_iput.3in b/oshmem/shmem/man/man3/shmem_float_iput.3in new file mode 100644 index 0000000000..c7b4a30e1a --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_float_iput.3in @@ -0,0 +1 @@ +.so man3/shmem_short_iput.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_float_max_to_all.3in b/oshmem/shmem/man/man3/shmem_float_max_to_all.3in new file mode 100644 index 0000000000..e4ad3901e8 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_float_max_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_max_to_all.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_float_min_to_all.3in b/oshmem/shmem/man/man3/shmem_float_min_to_all.3in new file mode 100644 index 0000000000..d688221529 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_float_min_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_min_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_float_p.3in b/oshmem/shmem/man/man3/shmem_float_p.3in new file mode 100644 index 0000000000..c08d60a543 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_float_p.3in @@ -0,0 +1 @@ +.so man3/shmem_char_p.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_float_prod_to_all.3in b/oshmem/shmem/man/man3/shmem_float_prod_to_all.3in new file mode 100644 index 0000000000..39b196d082 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_float_prod_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_prod_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_float_put.3in b/oshmem/shmem/man/man3/shmem_float_put.3in new file mode 100644 index 0000000000..e3ca73d483 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_float_put.3in @@ -0,0 +1 @@ +.so man3/shmem_char_put.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_float_sum_to_all.3in b/oshmem/shmem/man/man3/shmem_float_sum_to_all.3in new file mode 100644 index 0000000000..f75a494841 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_float_sum_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_sum_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_float_swap.3in b/oshmem/shmem/man/man3/shmem_float_swap.3in new file mode 100644 index 0000000000..a038a3f31e --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_float_swap.3in @@ -0,0 +1 @@ +.so man3/shmem_swap.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_get128.3in b/oshmem/shmem/man/man3/shmem_get128.3in new file mode 100644 index 0000000000..6d7c165d2e --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_get128.3in @@ -0,0 +1 @@ +.so man3/shmem_char_get.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_get32.3in b/oshmem/shmem/man/man3/shmem_get32.3in new file mode 100644 index 0000000000..6d7c165d2e --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_get32.3in @@ -0,0 +1 @@ +.so man3/shmem_char_get.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_get64.3in b/oshmem/shmem/man/man3/shmem_get64.3in new file mode 100644 index 0000000000..6d7c165d2e --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_get64.3in @@ -0,0 +1 @@ +.so man3/shmem_char_get.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_getmem.3in b/oshmem/shmem/man/man3/shmem_getmem.3in new file mode 100644 index 0000000000..6d7c165d2e --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_getmem.3in @@ -0,0 +1 @@ +.so man3/shmem_char_get.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_iget128.3in b/oshmem/shmem/man/man3/shmem_iget128.3in new file mode 100644 index 0000000000..48dee9db50 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_iget128.3in @@ -0,0 +1 @@ +.so man3/shmem_short_iget.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_iget32.3in b/oshmem/shmem/man/man3/shmem_iget32.3in new file mode 100644 index 0000000000..48dee9db50 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_iget32.3in @@ -0,0 +1 @@ +.so man3/shmem_short_iget.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_iget64.3in b/oshmem/shmem/man/man3/shmem_iget64.3in new file mode 100644 index 0000000000..48dee9db50 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_iget64.3in @@ -0,0 +1 @@ +.so man3/shmem_short_iget.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_int_add.3in b/oshmem/shmem/man/man3/shmem_int_add.3in new file mode 100644 index 0000000000..ff4728492a --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_int_add.3in @@ -0,0 +1,76 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_ADD" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_int_add\fP(3), +\fIshmem_int4_add\fP(3), +\fIshmem_int8_add\fP(3), +\fIshmem_long_add\fP(3), +\fIshmem_longlong_add\fP(3) +\- Performs an atomic add +operation. +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void shmem_int_add(int *target, int value, int pe); +void shmem_long_add(long *target, long value, int pe); +void shmem_longlong_add(long long *target, long long value, + int pe); +.Ve +Fortran: +.Vb +include 'mpp/shmem.h' + +INTEGER pe + +CALL SHMEM_INT4_ADD(target, value, pe) +CALL SHMEM_INT8_ADD(target, value, pe) +.Ve +.SH DESCRIPTION + +The atomic add routines add \fBvalue\fP +to the data at address \fBtarget\fP +on PE +\fBpe\fP\&. +The operation completes without the possibility of another process updating +target between the time of the fetch and the update. +.PP +The arguments are as follows: +.TP +target +The remotely accessible integer data object to be updated on the remote PE. If +you are using C/C++, the type of target should match that implied in the SYNOPSIS section. If +you are using the Fortran compiler, it must be of type integer with an element size of 4 bytes +for SHMEM_INT4_ADD and 8 bytes for SHMEM_INT8_ADD. +.TP +value +The value to be atomically added to target. If you are using C/C++, the type of +value should match that implied in the SYNOPSIS section. If you are using Fortran, it must be +of type integer with an element size of target. +.TP +pe +An integer that indicates the PE number upon which target is to be updated. If you +are using Fortran, it must be a default integer value. +.PP +.SH NOTES + +The term remotely accessible is defined in \fIintro_shmem\fP(3)\&. +.SH SEE ALSO + +\fIintro_shmem\fP(3), +\fIshmem_cache\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_int_and_to_all.3in b/oshmem/shmem/man/man3/shmem_int_and_to_all.3in new file mode 100644 index 0000000000..5b5103cd48 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_int_and_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_and_to_all.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_int_cswap.3in b/oshmem/shmem/man/man3/shmem_int_cswap.3in new file mode 100644 index 0000000000..568e66cc2a --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_int_cswap.3in @@ -0,0 +1,127 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_CSWAP" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_int_cswap\fP(3), +\fIshmem_int4_cswap\fP(3), +\fIshmem_int8_cswap\fP(3), +\fIshmem_long_cswap\fP(3), +\fIshmem_longlong_cswap\fP(3) +\- Performs an atomic conditional swap to a remote data object +.SH SYNOPSIS + +C or C++: +.Vb +#include + +int shmem_int_cswap(int *target, int cond, int value, int pe); + +long shmem_long_cswap(long *target, long cond, long value, + int pe); + +long long shmem_longlong_cswap(longlong *target, + longlong cond, longlong value, int pe); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER pe + +INTEGER(KIND=4) SHMEM_INT4_CSWAP +ires = SHMEM_INT4_CSWAP(target, cond, value, pe) + +INTEGER(KIND=8) SHMEM_INT8_CSWAP +ires = SHMEM_INT8_CSWAP(target, cond, value, pe) +.Ve +.SH DESCRIPTION + +The conditional swap routines conditionally update a target data object on an arbitrary +processing element (PE) and return the prior contents of the data object in one atomic +operation. +.PP +The arguments are as follows: +.TP +target +The remotely accessible integer data object to be updated on the remote PE. If +you are using C/C++, the type of target should match that implied in the SYNOPSIS section. If +you are using the Fortran compiler, it must be of type integer with an element size of 4 bytes +for SHMEM_INT4_ADD and 8 bytes for SHMEM_INT8_ADD. +.TP +value +The value to be atomically added to target. If you are using C/C++, the type of +value should match that implied in the SYNOPSIS section. If you are using Fortran, it must be +of type integer with an element size of target. +.TP +pe +An integer that indicates the PE number upon which target is to be updated. If you +are using Fortran, it must be a default integer value. +.TP +target +The remotely accessible integer data object to be updated on the remote PE. If +you are using C/C++, the data type of target should match that implied in the SYNOPSIS +section. If you are using Fortran, it must be of the following type: +.RS +.TP +\fBSHMEM_INT4_CSWAP\fP: 4\-byte integer +.TP +\fBSHMEM_INT8_CSWAP\fP: 8\-byte integer +.RE +.RS +.PP +.RE +.TP +cond +cond is compared to the remote target value. If cond and the remote target are +equal, then value is swapped into the remote target. Otherwise, the remote target is +unchanged. In either case, the old value of the remote target is returned as the function return +value. cond must be of the same data type as target. +.TP +value +The value to be atomically written to the remote PE. value must be the same data +type as target. +.TP +pe +An integer that indicates the PE number upon which target is to be updated. If you +are using Fortran, it must be a default integer value. +.PP +.SH NOTES + +The term remotely accessible is defined in \fIintro_shmem\fP(3)\&. +.SH RETURN VALUES + +The contents that had been in the target data object on the remote PE prior to the conditional +swap. +.SH EXAMPLES + +The following call ensures that the first PE to execute the conditional swap will successfully +write its PE number to race_winner on PE 0. +.Vb +main() +{ + static int race_winner = \-1; + int oldval; + + start_pes(2); + oldval = shmem_int_cswap(&race_winner, \-1, _my_pe(), 0); + if (oldval == \-1) + printf("pe %d was first\\n",_my_pe()); +} +.Ve +.SH SEE ALSO + +\fIintro_shmem\fP(3), +\fIshmem_cache\fP(3), +\fIshmem_swap\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_int_fadd.3in b/oshmem/shmem/man/man3/shmem_int_fadd.3in new file mode 100644 index 0000000000..a1eb726924 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_int_fadd.3in @@ -0,0 +1,79 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_FADD" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_int4_fadd\fP(3), +\fIshmem_int8_fadd\fP(3), +\fIshmem_int_fadd\fP(3), +\fIshmem_long_fadd\fP(3), +\fIshmem_longlong_fadd\fP(3) +\- Performs an atomic fetch\-and\-add operation on a remote data object +.SH SYNOPSIS + +C or C++: +.Vb +#include + +int shmem_int_fadd(int *target, int value, int pe); + +long shmem_long_fadd(long *target, long value, int pe); + +long long shmem_longlong_fadd(long long *target, longlong value, + int pe); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER pe + +INTEGER(KIND=4) SHMEM_INT4_FADD, ires, target, value +ires = SHMEM_INT4_FADD(target, value, pe) + +INTEGER(KIND=8) SHMEM_INT8_FADD, ires, target, value +ires = SHMEM_INT8_FADD(target, value, pe) +.Ve +.SH DESCRIPTION + +shmem_fadd functions perform an atomic fetch\-and\-add operation. An atomic +fetch\-and\-add operation fetches the old target and adds value to target without the +possibility of another process updating target between the time of the fetch and the update. +These routines add value to target on Processing Element (PE) pe and return the previous +contents of target as an atomic operation. +.PP +The arguments are as follows: +.TP +target +The remotely accessible integer data object to be updated on the remote PE. The +type of target should match that implied in the SYNOPSIS section. +.TP +value +The value to be atomically added to target. The type of value should match that +implied in the SYNOPSIS section. +.TP +pe +An integer that indicates the PE number on which target is to be updated. If you are +using Fortran, it must be a default integer value. +.PP +.SH NOTES + +The term remotely accessible is defined in \fIintro_shmem\fP(3)\&. +.SH RETURN VALUES + +The contents that had been at the target address on the remote PE prior to the atomic addition +operation. +.SH SEE ALSO + +\fIintro_shmem\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_int_finc.3in b/oshmem/shmem/man/man3/shmem_int_finc.3in new file mode 100644 index 0000000000..0f1808693e --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_int_finc.3in @@ -0,0 +1,76 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_FINC" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_int4_finc\fP(3), +\fIshmem_int8_finc\fP(3), +\fIshmem_int_finc\fP(3), +\fIshmem_long_finc\fP(3), +\fIshmem_longlong_finc\fP(3) +\- Performs an atomic fetch\-and\-increment operation on a remote data object +.SH SYNOPSIS + +C or C++: +.Vb +#include + +int shmem_int_finc(int *target, int pe); + +long shmem_long_finc(long *target, int pe); + +long long shmem_longlong_finc(long long *target, int pe); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER pe +INTEGER(KIND=4) SHMEM_INT4_FINC, target4 +INTEGER(KIND=8) SHMEM_INT8_FINC, target8 + +ires4 = SHMEM_INT4_FINC(target4, pe) + +ires8 = SHMEM_INT8_FINC(target8, pe) +.Ve +.SH DESCRIPTION + +The fetch and increment routines retrieve the value at address \fBtarget\fP +on PE +\fBpe\fP, +and update \fBtarget\fP +with the result of incrementing the retrieved value by +one. The operation must be completed without the possibility of another process updating +\fBtarget\fP +between the time of the fetch and the update. +.PP +The arguments are as follows: +.TP +target +The remotely accessible integer data object to be updated on the remote PE. The +type of target should match that implied in the SYNOPSIS section. +.TP +pe +An integer that indicates the PE number upon which target is to be updated. If you +are using Fortran, it must be a default integer value. +.PP +.SH NOTES + +The term remotely accessible is defined in \fIintro_shmem\fP(3)\&. +.SH RETURN VALUES + +The contents that had been at the target address on the remote PE prior to the increment. +.SH SEE ALSO + +\fIintro_shmem\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_int_g.3in b/oshmem/shmem/man/man3/shmem_int_g.3in new file mode 100644 index 0000000000..d2bbc4ad8d --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_int_g.3in @@ -0,0 +1 @@ +.so man3/shmem_char_g.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_int_get.3in b/oshmem/shmem/man/man3/shmem_int_get.3in new file mode 100644 index 0000000000..6d7c165d2e --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_int_get.3in @@ -0,0 +1 @@ +.so man3/shmem_char_get.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_int_iget.3in b/oshmem/shmem/man/man3/shmem_int_iget.3in new file mode 100644 index 0000000000..48dee9db50 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_int_iget.3in @@ -0,0 +1 @@ +.so man3/shmem_short_iget.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_int_inc.3in b/oshmem/shmem/man/man3/shmem_int_inc.3in new file mode 100644 index 0000000000..0a879d766d --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_int_inc.3in @@ -0,0 +1,73 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_INC" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_int4_inc\fP(3), +\fIshmem_int8_inc\fP(3), +\fIshmem_int_inc\fP(3), +\fIshmem_long_inc\fP(3), +\fIshmem_longlong_inc\fP(3) +\- These routines perform an atomic increment operation on a remote data object. +.SH SYNOPSIS + +C or C++: +.Vb +#include + +int shmem_int_inc(int *target, int pe); + +long shmem_long_inc(long *target, int pe); + +long long shmem_longlong_inc(long long *target, int pe); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER pe +INTEGER(KIND=4) SHMEM_INT4_INC, target4 +INTEGER(KIND=8) SHMEM_INT8_INC, target8 + +ires4 = SHMEM_INT4_INC(target4, pe) + +ires8 = SHMEM_INT8_INC(target8, pe) +.Ve +.SH DESCRIPTION + +The atomic increment routines replace the value of \fBtarget\fP +with its value incremented by +one. The operation must be completed without the possibility of another process updating +\fBtarget\fP +between the time of the fetch and the update. +.PP +The arguments are as follows: +.TP +target +The remotely accessible integer data object to be updated on the remote PE. The +type of target should match that implied in the SYNOPSIS section. +.TP +pe +An integer that indicates the PE number upon which target is to be updated. If you +are using Fortran, it must be a default integer value. +.PP +.SH NOTES + +The term remotely accessible is defined in \fIintro_shmem\fP(3)\&. +.SH RETURN VALUES + +None. +.SH SEE ALSO + +\fIintro_shmem\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_int_iput.3in b/oshmem/shmem/man/man3/shmem_int_iput.3in new file mode 100644 index 0000000000..c7b4a30e1a --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_int_iput.3in @@ -0,0 +1 @@ +.so man3/shmem_short_iput.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_int_max_to_all.3in b/oshmem/shmem/man/man3/shmem_int_max_to_all.3in new file mode 100644 index 0000000000..e4ad3901e8 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_int_max_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_max_to_all.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_int_min_to_all.3in b/oshmem/shmem/man/man3/shmem_int_min_to_all.3in new file mode 100644 index 0000000000..d688221529 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_int_min_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_min_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_int_or_to_all.3in b/oshmem/shmem/man/man3/shmem_int_or_to_all.3in new file mode 100644 index 0000000000..c78fc06052 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_int_or_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_or_to_all.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_int_p.3in b/oshmem/shmem/man/man3/shmem_int_p.3in new file mode 100644 index 0000000000..c08d60a543 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_int_p.3in @@ -0,0 +1 @@ +.so man3/shmem_char_p.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_int_prod_to_all.3in b/oshmem/shmem/man/man3/shmem_int_prod_to_all.3in new file mode 100644 index 0000000000..39b196d082 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_int_prod_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_prod_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_int_put.3in b/oshmem/shmem/man/man3/shmem_int_put.3in new file mode 100644 index 0000000000..e3ca73d483 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_int_put.3in @@ -0,0 +1 @@ +.so man3/shmem_char_put.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_int_sum_to_all.3in b/oshmem/shmem/man/man3/shmem_int_sum_to_all.3in new file mode 100644 index 0000000000..f75a494841 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_int_sum_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_sum_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_int_swap.3in b/oshmem/shmem/man/man3/shmem_int_swap.3in new file mode 100644 index 0000000000..a038a3f31e --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_int_swap.3in @@ -0,0 +1 @@ +.so man3/shmem_swap.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_int_wait.3in b/oshmem/shmem/man/man3/shmem_int_wait.3in new file mode 100644 index 0000000000..03267ffbc5 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_int_wait.3in @@ -0,0 +1 @@ +.so man3/shmem_wait.3 diff --git a/oshmem/shmem/man/man3/shmem_int_wait_until.3in b/oshmem/shmem/man/man3/shmem_int_wait_until.3in new file mode 100644 index 0000000000..03267ffbc5 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_int_wait_until.3in @@ -0,0 +1 @@ +.so man3/shmem_wait.3 diff --git a/oshmem/shmem/man/man3/shmem_int_xor_to_all.3in b/oshmem/shmem/man/man3/shmem_int_xor_to_all.3in new file mode 100644 index 0000000000..cd2d696b96 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_int_xor_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_xor_all.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_iput128.3in b/oshmem/shmem/man/man3/shmem_iput128.3in new file mode 100644 index 0000000000..c7b4a30e1a --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_iput128.3in @@ -0,0 +1 @@ +.so man3/shmem_short_iput.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_iput32.3in b/oshmem/shmem/man/man3/shmem_iput32.3in new file mode 100644 index 0000000000..c7b4a30e1a --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_iput32.3in @@ -0,0 +1 @@ +.so man3/shmem_short_iput.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_iput64.3in b/oshmem/shmem/man/man3/shmem_iput64.3in new file mode 100644 index 0000000000..c7b4a30e1a --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_iput64.3in @@ -0,0 +1 @@ +.so man3/shmem_short_iput.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_long_add.3in b/oshmem/shmem/man/man3/shmem_long_add.3in new file mode 100644 index 0000000000..b356added7 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_long_add.3in @@ -0,0 +1 @@ +.so man3/shmem_int_add.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_long_and_to_all.3in b/oshmem/shmem/man/man3/shmem_long_and_to_all.3in new file mode 100644 index 0000000000..5b5103cd48 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_long_and_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_and_to_all.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_long_cswap.3in b/oshmem/shmem/man/man3/shmem_long_cswap.3in new file mode 100644 index 0000000000..b89ae230be --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_long_cswap.3in @@ -0,0 +1 @@ +.so man3/shmem_int_cswap.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_long_fadd.3in b/oshmem/shmem/man/man3/shmem_long_fadd.3in new file mode 100644 index 0000000000..bebae84923 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_long_fadd.3in @@ -0,0 +1 @@ +.so man3/shmem_int_fadd.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_long_finc.3in b/oshmem/shmem/man/man3/shmem_long_finc.3in new file mode 100644 index 0000000000..2c75c2ab68 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_long_finc.3in @@ -0,0 +1 @@ +.so man3/shmem_int_finc.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_long_g.3in b/oshmem/shmem/man/man3/shmem_long_g.3in new file mode 100644 index 0000000000..d2bbc4ad8d --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_long_g.3in @@ -0,0 +1 @@ +.so man3/shmem_char_g.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_long_get.3in b/oshmem/shmem/man/man3/shmem_long_get.3in new file mode 100644 index 0000000000..6d7c165d2e --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_long_get.3in @@ -0,0 +1 @@ +.so man3/shmem_char_get.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_long_iget.3in b/oshmem/shmem/man/man3/shmem_long_iget.3in new file mode 100644 index 0000000000..48dee9db50 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_long_iget.3in @@ -0,0 +1 @@ +.so man3/shmem_short_iget.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_long_inc.3in b/oshmem/shmem/man/man3/shmem_long_inc.3in new file mode 100644 index 0000000000..aa2f265e1b --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_long_inc.3in @@ -0,0 +1 @@ +.so man3/shmem_int_inc.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_long_iput.3in b/oshmem/shmem/man/man3/shmem_long_iput.3in new file mode 100644 index 0000000000..c7b4a30e1a --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_long_iput.3in @@ -0,0 +1 @@ +.so man3/shmem_short_iput.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_long_max_to_all.3in b/oshmem/shmem/man/man3/shmem_long_max_to_all.3in new file mode 100644 index 0000000000..e4ad3901e8 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_long_max_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_max_to_all.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_long_min_to_all.3in b/oshmem/shmem/man/man3/shmem_long_min_to_all.3in new file mode 100644 index 0000000000..d688221529 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_long_min_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_min_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_long_or_to_all.3in b/oshmem/shmem/man/man3/shmem_long_or_to_all.3in new file mode 100644 index 0000000000..c78fc06052 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_long_or_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_or_to_all.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_long_p.3in b/oshmem/shmem/man/man3/shmem_long_p.3in new file mode 100644 index 0000000000..c08d60a543 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_long_p.3in @@ -0,0 +1 @@ +.so man3/shmem_char_p.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_long_prod_to_all.3in b/oshmem/shmem/man/man3/shmem_long_prod_to_all.3in new file mode 100644 index 0000000000..39b196d082 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_long_prod_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_prod_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_long_put.3in b/oshmem/shmem/man/man3/shmem_long_put.3in new file mode 100644 index 0000000000..e3ca73d483 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_long_put.3in @@ -0,0 +1 @@ +.so man3/shmem_char_put.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_long_sum_to_all.3in b/oshmem/shmem/man/man3/shmem_long_sum_to_all.3in new file mode 100644 index 0000000000..f75a494841 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_long_sum_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_sum_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_long_swap.3in b/oshmem/shmem/man/man3/shmem_long_swap.3in new file mode 100644 index 0000000000..a038a3f31e --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_long_swap.3in @@ -0,0 +1 @@ +.so man3/shmem_swap.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_long_wait.3in b/oshmem/shmem/man/man3/shmem_long_wait.3in new file mode 100644 index 0000000000..03267ffbc5 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_long_wait.3in @@ -0,0 +1 @@ +.so man3/shmem_wait.3 diff --git a/oshmem/shmem/man/man3/shmem_long_wait_until.3in b/oshmem/shmem/man/man3/shmem_long_wait_until.3in new file mode 100644 index 0000000000..03267ffbc5 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_long_wait_until.3in @@ -0,0 +1 @@ +.so man3/shmem_wait.3 diff --git a/oshmem/shmem/man/man3/shmem_long_xor_to_all.3in b/oshmem/shmem/man/man3/shmem_long_xor_to_all.3in new file mode 100644 index 0000000000..cd2d696b96 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_long_xor_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_xor_all.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longdouble_g.3in b/oshmem/shmem/man/man3/shmem_longdouble_g.3in new file mode 100644 index 0000000000..d2bbc4ad8d --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longdouble_g.3in @@ -0,0 +1 @@ +.so man3/shmem_char_g.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longdouble_get.3in b/oshmem/shmem/man/man3/shmem_longdouble_get.3in new file mode 100644 index 0000000000..6d7c165d2e --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longdouble_get.3in @@ -0,0 +1 @@ +.so man3/shmem_char_get.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longdouble_iget.3in b/oshmem/shmem/man/man3/shmem_longdouble_iget.3in new file mode 100644 index 0000000000..48dee9db50 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longdouble_iget.3in @@ -0,0 +1 @@ +.so man3/shmem_short_iget.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longdouble_iput.3in b/oshmem/shmem/man/man3/shmem_longdouble_iput.3in new file mode 100644 index 0000000000..c7b4a30e1a --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longdouble_iput.3in @@ -0,0 +1 @@ +.so man3/shmem_short_iput.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longdouble_max_to_all.3in b/oshmem/shmem/man/man3/shmem_longdouble_max_to_all.3in new file mode 100644 index 0000000000..e4ad3901e8 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longdouble_max_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_max_to_all.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longdouble_min_to_all.3in b/oshmem/shmem/man/man3/shmem_longdouble_min_to_all.3in new file mode 100644 index 0000000000..d688221529 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longdouble_min_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_min_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_longdouble_p.3in b/oshmem/shmem/man/man3/shmem_longdouble_p.3in new file mode 100644 index 0000000000..c08d60a543 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longdouble_p.3in @@ -0,0 +1 @@ +.so man3/shmem_char_p.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longdouble_prod_to_all.3in b/oshmem/shmem/man/man3/shmem_longdouble_prod_to_all.3in new file mode 100644 index 0000000000..39b196d082 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longdouble_prod_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_prod_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_longdouble_put.3in b/oshmem/shmem/man/man3/shmem_longdouble_put.3in new file mode 100644 index 0000000000..e3ca73d483 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longdouble_put.3in @@ -0,0 +1 @@ +.so man3/shmem_char_put.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longdouble_sum_to_all.3in b/oshmem/shmem/man/man3/shmem_longdouble_sum_to_all.3in new file mode 100644 index 0000000000..f75a494841 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longdouble_sum_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_sum_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_longlong_add.3in b/oshmem/shmem/man/man3/shmem_longlong_add.3in new file mode 100644 index 0000000000..b356added7 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longlong_add.3in @@ -0,0 +1 @@ +.so man3/shmem_int_add.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longlong_and_to_all.3in b/oshmem/shmem/man/man3/shmem_longlong_and_to_all.3in new file mode 100644 index 0000000000..5b5103cd48 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longlong_and_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_and_to_all.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longlong_cswap.3in b/oshmem/shmem/man/man3/shmem_longlong_cswap.3in new file mode 100644 index 0000000000..b89ae230be --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longlong_cswap.3in @@ -0,0 +1 @@ +.so man3/shmem_int_cswap.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longlong_fadd.3in b/oshmem/shmem/man/man3/shmem_longlong_fadd.3in new file mode 100644 index 0000000000..bebae84923 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longlong_fadd.3in @@ -0,0 +1 @@ +.so man3/shmem_int_fadd.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longlong_finc.3in b/oshmem/shmem/man/man3/shmem_longlong_finc.3in new file mode 100644 index 0000000000..2c75c2ab68 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longlong_finc.3in @@ -0,0 +1 @@ +.so man3/shmem_int_finc.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longlong_g.3in b/oshmem/shmem/man/man3/shmem_longlong_g.3in new file mode 100644 index 0000000000..d2bbc4ad8d --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longlong_g.3in @@ -0,0 +1 @@ +.so man3/shmem_char_g.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longlong_get.3in b/oshmem/shmem/man/man3/shmem_longlong_get.3in new file mode 100644 index 0000000000..6d7c165d2e --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longlong_get.3in @@ -0,0 +1 @@ +.so man3/shmem_char_get.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longlong_iget.3in b/oshmem/shmem/man/man3/shmem_longlong_iget.3in new file mode 100644 index 0000000000..48dee9db50 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longlong_iget.3in @@ -0,0 +1 @@ +.so man3/shmem_short_iget.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longlong_inc.3in b/oshmem/shmem/man/man3/shmem_longlong_inc.3in new file mode 100644 index 0000000000..aa2f265e1b --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longlong_inc.3in @@ -0,0 +1 @@ +.so man3/shmem_int_inc.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longlong_iput.3in b/oshmem/shmem/man/man3/shmem_longlong_iput.3in new file mode 100644 index 0000000000..c7b4a30e1a --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longlong_iput.3in @@ -0,0 +1 @@ +.so man3/shmem_short_iput.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longlong_max_to_all.3in b/oshmem/shmem/man/man3/shmem_longlong_max_to_all.3in new file mode 100644 index 0000000000..e4ad3901e8 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longlong_max_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_max_to_all.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longlong_min_to_all.3in b/oshmem/shmem/man/man3/shmem_longlong_min_to_all.3in new file mode 100644 index 0000000000..d688221529 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longlong_min_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_min_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_longlong_or_to_all.3in b/oshmem/shmem/man/man3/shmem_longlong_or_to_all.3in new file mode 100644 index 0000000000..c78fc06052 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longlong_or_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_or_to_all.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longlong_p.3in b/oshmem/shmem/man/man3/shmem_longlong_p.3in new file mode 100644 index 0000000000..c08d60a543 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longlong_p.3in @@ -0,0 +1 @@ +.so man3/shmem_char_p.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longlong_prod_to_all.3in b/oshmem/shmem/man/man3/shmem_longlong_prod_to_all.3in new file mode 100644 index 0000000000..39b196d082 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longlong_prod_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_prod_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_longlong_put.3in b/oshmem/shmem/man/man3/shmem_longlong_put.3in new file mode 100644 index 0000000000..e3ca73d483 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longlong_put.3in @@ -0,0 +1 @@ +.so man3/shmem_char_put.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longlong_sum_to_all.3in b/oshmem/shmem/man/man3/shmem_longlong_sum_to_all.3in new file mode 100644 index 0000000000..f75a494841 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longlong_sum_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_sum_to_all.3 diff --git a/oshmem/shmem/man/man3/shmem_longlong_swap.3in b/oshmem/shmem/man/man3/shmem_longlong_swap.3in new file mode 100644 index 0000000000..a038a3f31e --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longlong_swap.3in @@ -0,0 +1 @@ +.so man3/shmem_swap.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_longlong_wait.3in b/oshmem/shmem/man/man3/shmem_longlong_wait.3in new file mode 100644 index 0000000000..03267ffbc5 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longlong_wait.3in @@ -0,0 +1 @@ +.so man3/shmem_wait.3 diff --git a/oshmem/shmem/man/man3/shmem_longlong_wait_until.3in b/oshmem/shmem/man/man3/shmem_longlong_wait_until.3in new file mode 100644 index 0000000000..03267ffbc5 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longlong_wait_until.3in @@ -0,0 +1 @@ +.so man3/shmem_wait.3 diff --git a/oshmem/shmem/man/man3/shmem_longlong_xor_to_all.3in b/oshmem/shmem/man/man3/shmem_longlong_xor_to_all.3in new file mode 100644 index 0000000000..cd2d696b96 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_longlong_xor_to_all.3in @@ -0,0 +1 @@ +.so man3/shmem_short_xor_all.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_pe_accessible.3in b/oshmem/shmem/man/man3/shmem_pe_accessible.3in new file mode 100644 index 0000000000..66b38b9224 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_pe_accessible.3in @@ -0,0 +1,54 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_PE\\_ACCESSIBLE" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +shmem_pe_accessible \- Determines whether a processing element (PE) is accessible via +SHMEM data transfer operations. +.SH SYNOPSIS + +C: +.Vb +#include + +int shmem_pe_accessible(int pe); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +LOGICAL LOG, SHMEM_PE_ACCESSIBLE +INTEGER pe + +LOG = SHMEM_PE_ACCESSIBLE(pe) +.Ve +.SH DESCRIPTION + +shmem_pe_accessible returns a value that indicates whether the calling PE is able to perform +OpenSHMEM communication operations with the remote PE. +.SH RETURN VALUES + +.TP +C/C++ +The return value is 1 if the specified PE is a valid remote PE for SHMEM functions; +otherwise,it is 0. +.TP +Fortran +The return value is \&.TRUE. if the specified PE is a valid remote PE for SHMEM +functions; otherwise, it is \&.FALSE.. +.PP +.SH SEE ALSO + +\fIintro_shmem\fP(3), +\fIshmem_addr_accessible\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_ptr.3in b/oshmem/shmem/man/man3/shmem_ptr.3in new file mode 100644 index 0000000000..bfc1a0170c --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_ptr.3in @@ -0,0 +1,129 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_PTR" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_ptr\fP(3) +\- Returns a pointer to a data object on a specified processing element +(PE). +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void *shmem_ptr(void *target, int pe); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +POINTER (PTR, POINTEE) +INTEGER pe + +PTR = SHMEM_PTR(target, pe) +.Ve +.SH DESCRIPTION + +The shmem_ptr routine returns an address that can be used to directly reference +\fBtarget\fP +on the remote PE \fBpe\fP\&. +With this address we can perform ordinary loads +and stores to the remote address. +.PP +When a sequence of loads (gets) and stores (puts) to a data object on a remote PE does not +match the access pattern provided in a SHMEM data transfer routine like +\fIshmem_put32\fP(3) +or \fIshmem_real_iget\fP(3), +the shmem_ptr function can +provide an efficient means to accomplish the communication. +.PP +The arguments are as follows: +.TP +target +The symmetric data object to be referenced. +.TP +pe +An integer that indicates the PE number on which target is to be accessed. If you +are using Fortran, it must be a default integer value. +.PP +.SH EXAMPLES + +This Fortran program calls shmem_ptr and then PE 0 writes to the BIGD array on PE 1: +.Vb +PROGRAM REMOTEWRITE + INCLUDE 'mpp/shmem.fh' + + INTEGER BIGD(100) + SAVE BIGD + INTEGER POINTEE(*) + + POINTER (PTR,POINTEE) + CALL START_PES(0) + IF (MY_PE() .EQ. 0) THEN + ! initialize PE 1's BIGD array + PTR = SHMEM_PTR(BIGD, 1) ! get address of PE 1's BIGD + ! array + DO I=1,100 + POINTEE(I) = I + ENDDO + ENDIF + CALL SHMEM_BARRIER_ALL + IF (MY_PE() .EQ. 1) THEN + PRINT *, 'BIGD on PE 1 is: ' + PRINT *, BIGD + ENDIF +END +.Ve +This is the equivalent program written in C: +.Vb +#include +main() +{ + static int bigd[100]; + int *ptr; + int i; + + start_pes(0); + if (_my_pe() == 0) { + /* initialize PE 1's bigd array */ + ptr = shmem_ptr(bigd, 1); + for (i=0; i<100; i++) + *ptr++ = i+1; + } + shmem_barrier_all(); + if (_my_pe() == 1) { + printf("bigd on PE 1 is:\\n"); + for (i=0; i<100; i++) + printf(" %d\\n",bigd[i]); + printf("\\n"); + } +} +.Ve +.SH NOTES + +The shmem_ptr function is available only on systems where ordinary memory loads and +stores are used to implement SHMEM put and get operations. +.PP +.SH RETURN VALUES + +shmem_ptr returns a pointer to the data object on the specified remote PE. If target is not +remotely accessible, a NULL pointer is returned. +.PP +.SH SEE ALSO + +\fIintro_shmem\fP(3), +\fIshmem_put\fP(3), +\fIshmem_get\fP(3) +.PP diff --git a/oshmem/shmem/man/man3/shmem_put128.3in b/oshmem/shmem/man/man3/shmem_put128.3in new file mode 100644 index 0000000000..e3ca73d483 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_put128.3in @@ -0,0 +1 @@ +.so man3/shmem_char_put.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_put32.3in b/oshmem/shmem/man/man3/shmem_put32.3in new file mode 100644 index 0000000000..e3ca73d483 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_put32.3in @@ -0,0 +1 @@ +.so man3/shmem_char_put.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_put64.3in b/oshmem/shmem/man/man3/shmem_put64.3in new file mode 100644 index 0000000000..e3ca73d483 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_put64.3in @@ -0,0 +1 @@ +.so man3/shmem_char_put.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_putmem.3in b/oshmem/shmem/man/man3/shmem_putmem.3in new file mode 100644 index 0000000000..e3ca73d483 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_putmem.3in @@ -0,0 +1 @@ +.so man3/shmem_char_put.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_quiet.3in b/oshmem/shmem/man/man3/shmem_quiet.3in new file mode 100644 index 0000000000..cd92ae1794 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_quiet.3in @@ -0,0 +1,84 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_QUIET" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_quiet\fP(3) +\- Waits for completion of all outstanding remote writes issued by a +processing element (PE). +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void shmem_quiet(void); +.Ve +Fortran: +.Vb +CALL SHMEM_QUIET +.Ve +.SH DESCRIPTION + +shmem_quiet ensures ordering of put (remote write) operations. All put operations issued to +any processing element (PE) prior to the call to shmem_quiet are guaranteed to be visible to +all other PEs no later than any subsequent memory load or store, remote put or get, or +synchronization operations that follow the call to shmem_quiet. +.SH NOTES + +shmem_quiet is most useful as a way of ensuring ordering of delivery of several put +operations. For example, you might use shmem_quiet to await delivery of a block of data +before issuing another put, which sets a completion flag on another PE. +.br +shmem_quiet is not usually needed if \fIshmem_barrier_all\fP(3) +or +\fIshmem_barrier\fP(3) +are called. The barrier routines all wait for the completion of +outstanding remote writes (puts). +.SH EXAMPLES + +.Vb +PROGRAM COMPFLAG + INCLUDE "mpp/shmem.fh" + + INTEGER FLAG_VAR, ARRAY(100), RECEIVER, SENDER + COMMON/FLAG/FLAG_VAR + COMMON/DATA/ARRAY + INTRINSIC MY_PE + + FLAG_VAR = 0 + CALL SHMEM_BARRIER_ALL ! wait for FLAG_VAR to be initialized + SENDER = 0 ! PE 0 sends the data + RECEIVER = 1 ! PE 1 receives the data + + IF (MY_PE() .EQ. 0) THEN + ARRAY = 33 + CALL SHMEM_PUT(ARRAY, ARRAY, 100, RECEIVER) ! start sending data + CALL SHMEM_QUIET ! wait for delivery + CALL SHMEM_PUT(FLAG_VAR, 1, 1, RECEIVER) ! send completion flag + ELSE IF (MY_PE() .EQ. RECEIVER) THEN + CALL SHMEM_UDCFLUSH + CALL SHMEM_WAIT(FLAG_VAR, 0) + PRINT *,ARRAY ! ARRAY has been delivered + ENDIF +END +.Ve +.SH SEE ALSO + +\fIintro_shmem\fP(3), +\fIshmem_barrier\fP(3), +\fIshmem_barrier_all\fP(3), +\fIshmem_fence\fP(3), +\fIshmem_put\fP(3), +\fIshmem_wait\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_set_cache_inv.3in b/oshmem/shmem/man/man3/shmem_set_cache_inv.3in new file mode 100644 index 0000000000..4a6a361ef9 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_set_cache_inv.3in @@ -0,0 +1 @@ +.so man3/shmem_udcflush.3 diff --git a/oshmem/shmem/man/man3/shmem_set_cache_line_inv.3in b/oshmem/shmem/man/man3/shmem_set_cache_line_inv.3in new file mode 100644 index 0000000000..4a6a361ef9 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_set_cache_line_inv.3in @@ -0,0 +1 @@ +.so man3/shmem_udcflush.3 diff --git a/oshmem/shmem/man/man3/shmem_set_lock.3in b/oshmem/shmem/man/man3/shmem_set_lock.3in new file mode 100644 index 0000000000..750e83a21b --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_set_lock.3in @@ -0,0 +1,78 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_LOCK" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_set_lock\fP(3), +\fIshmem_clear_lock\fP(3), +\fIshmem_test_lock\fP(3) +\- Releases, locks, and tests a mutual exclusion memory lock. +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void shmem_clear_lock(long *lock); + +void shmem_set_lock(long *lock); + +int shmem_test_lock(long *lock); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER lock, SHMEM_TEST_LOCK + +CALL SHMEM_CLEAR_LOCK(lock) + +CALL SHMEM_SET_LOCK(lock) + +I = SHMEM_TEST_LOCK(lock) +.Ve +.SH DESCRIPTION + +The shmem_set_lock routine sets a mutual exclusion lock after waiting for the lock to be +freed by any other PE currently holding the lock. Waiting PEs are assured of getting the lock +in a first\-come, first\-served manner. +.PP +The shmem_clear_lock routine releases a lock previously set by shmem_set_lock after +ensuring that all local and remote stores initiated in the critical region are complete. +.PP +The shmem_test_lock function sets a mutual exclusion lock only if it is currently cleared. +By using this function, a PE can avoid blocking on a set lock. If the lock is currently set, the +function returns without waiting. +These routines are appropriate for protecting a critical region from simultaneous update by +multiple PEs. +They accept the following arguments: +.TP +lock +A symmetric data object that is a scalar variable or an array of length 1. This +data object must be set to 0 on all processing elements (PEs) prior to the first use. lock must +be of type integer. If you are using Fortran, it must be of default kind. +.PP +.SH NOTES + +The term symmetric data object is defined on \fIintro_shmem\fP(3)\&. +.PP +.SH RETURN VALUES + +The shmem_test_lock function returns 0 if the lock was originally cleared and this call +was able to set the lock. A value of 1 is returned if the lock had been set and the call returned +without waiting to set the lock. +.PP +.SH SEE ALSO + +\fIintro_shmem\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_short_and_to_all.3in b/oshmem/shmem/man/man3/shmem_short_and_to_all.3in new file mode 100644 index 0000000000..d9204fcfe2 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_short_and_to_all.3in @@ -0,0 +1,206 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_AND" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_int_and_to_all\fP(3), +\fIshmem_int4_and_to_all\fP(3), +\fIshmem_int8_and_to_all\fP(3), +\fIshmem_long_and_to_all\fP(3), +\fIshmem_longlong_and_to_all\fP(3), +\fIshmem_short_and_to_all\fP(3) +\- Performs a bitwise AND operation on symmetric +arrays over the active set of PEs. +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void shmem_int_and_to_all(int *target, int *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + int *pWrk, long *pSync); + +void shmem_long_and_to_all(long *target, long *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + long *pWrk, long *pSync); + +void shmem_longlong_and_to_all(long long *target, + long long *source, int nreduce, int PE_start, int logPE_stride, + int PE_size, long long *pWrk, long *pSync); + +void shmem_short_and_to_all(short *target, short *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + short *pWrk, long *pSync); + +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER pSync(SHMEM_REDUCE_SYNC_SIZE) +INTEGER nreduce, PE_start, logPE_stride, PE_size + +CALL SHMEM_INT4_AND_TO_ALL(target, source, nreduce, +& PE_start, logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_INT8_AND_TO_ALL(target, source, nreduce, +& PE_start, logPE_stride, PE_size, pWrk, pSync) +.Ve +.PP +.SH DESCRIPTION + +The shared memory (SHMEM) reduction routines compute one or more reductions across +symmetric arrays on multiple virtual PEs. A reduction performs an associative binary +operation across a set of values. For a list of other SHMEM reduction routines, see +\fIintro_shmem\fP(3)\&. +.PP +The nreduce argument determines the number of separate reductions to perform. The source +array on all PEs in the active set provides one element for each reduction. The results of the +reductions are placed in the target array on all PEs in the active set. The active set is defined +by the PE_start, logPE_stride, PE_size triplet. +.PP +The source and target arrays may be the same array, but they may not be overlapping arrays. +As with all SHMEM collective routines, each of these routines assumes that only PEs in the +active set call the routine. If a PE not in the active set calls a SHMEM collective routine, +undefined behavior results. +.PP +The arguments are as follows: +.TP +target +A symmetric array, of length nreduce elements, to receive the result of the +reduction operations. The data type of target varies with the version of the reduction routine +being called. When calling from C/C++, refer to the SYNOPSIS section for data type +information. When calling from Fortran, the target data types are as follows: +.RS +.TP +\fBshmem_int8_and_to_all\fP: Integer, with an element size of 8 bytes +.TP +\fBshmem_int4_and_to_all\fP: Integer, with an element size of 4 bytes +.RE +.RS +.PP +.RE +.TP +source +A symmetric array, of length nreduce elements, that contains one element for +each separate reduction operation. The source argument must have the same data type as +target. +.TP +nreduce +The number of elements in the target and source arrays. nreduce must be of +type integer. If you are using Fortran, it must be a default integer value. +.TP +PE_start +The lowest virtual PE number of the active set of PEs. PE_start must be of +type integer. If you are using Fortran, it must be a default integer value. +.TP +logPE_stride +The log (base 2) of the stride between consecutive virtual PE numbers in +the active set. logPE_stride must be of type integer. If you are using Fortran, it must be a +default integer value. +.TP +PE_size +The number of PEs in the active set. PE_size must be of type integer. If you +are using Fortran, it must be a default integer value. +.TP +pWrk +A symmetric work array. The pWrk argument must have the same data type as +target. In C/C++, this contains max(nreduce/2 + 1, +_SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements. In Fortran, this contains +max(nreduce/2 + 1, SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements. +.TP +pSync +A symmetric work array. In C/C++, pSync must be of type long and size +_SHMEM_REDUCE_SYNC_SIZE. In Fortran, pSync must be of type integer and size +SHMEM_REDUCE_SYNC_SIZE. If you are using Fortran, it must be a default integer value. +Every element of this array must be initialized with the value _SHMEM_SYNC_VALUE (in +C/C++) or SHMEM_SYNC_VALUE (in Fortran) before any of the PEs in the active set enter +the reduction routine. +.PP +The values of arguments nreduce, PE_start, logPE_stride, and PE_size must be equal on all +PEs in the active set. The same target and source arrays, and the same pWrk and pSync work +arrays, must be passed to all PEs in the active set. +.PP +Before any PE calls a reduction routine, you must ensure that the following conditions exist +(synchronization via a barrier or some other method is often needed to ensure this): The +pWrk and pSync arrays on all PEs in the active set are not still in use from a prior call to a +collective SHMEM routine. The target array on all PEs in the active set is ready to accept the +results of the reduction. +.PP +Upon return from a reduction routine, the following are true for the local PE: The target array +is updated. The values in the pSync array are restored to the original values. +.PP +.SH NOTES + +The terms collective, symmetric, and cache aligned are defined in \fIintro_shmem\fP(3)\&. +All SHMEM reduction routines reset the values in pSync before they return, so a particular +pSync buffer need only be initialized the first time it is used. +.PP +You must ensure that the pSync array is not being updated on any PE in the active set while +any of the PEs participate in processing of a SHMEM reduction routine. Be careful to avoid the +following situations: If the pSync array is initialized at run time, some type of +synchronization is needed to ensure that all PEs in the working set have initialized pSync +before any of them enter a SHMEM routine called with the pSync synchronization array. A +pSync or pWrk array can be reused in a subsequent reduction routine call only if none of the +PEs in the active set are still processing a prior reduction routine call that used the same +pSync or pWrk arrays. In general, this can be assured only by doing some type of +synchronization. However, in the special case of reduction routines being called with the +same active set, you can allocate two pSync and pWrk arrays and alternate between them on +successive calls. +.PP +.SH EXAMPLES + +\fBExample 1\fP: +This Fortran example statically initializes the pSync array and finds the logical AND of the integer variable FOO across all even PEs. +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE) +DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/ +PARAMETER (NR=1) +REAL PWRK(MAX(NR/2+1, SHMEM_REDUCE_MIN_WRKDATA_SIZE)) +INTEGER FOO, FOOAND +COMMON /COM/ FOO, FOOAND, PWRK +INTRINSIC MY_PE + +IF ( MOD(MY_PE(),2) .EQ. 0) THEN + CALL SHMEM_INT8_AND_TO_ALL(FOOAND, FOO, NR, 0, 1, N$PES/2, + & PWRK, PSYNC) + PRINT *, 'Result on PE ', MY_PE(), ' is ', FOOAND +ENDIF +.Ve +\fBExample 2\fP: +Consider the following C call: +.Vb +shmem_int_and_to_all( target, source, 3, 0, 0, 8, pwrk, psync ); +.Ve +The preceding call is more efficient, but semantically equivalent to, the combination of the +following calls: +.Vb +shmem_int_and_to_all(&(target[0]), &(source[0]), 1, 0, 0, 8, + pwrk1, psync1); + +shmem_int_and_to_all(&(target[1]), &(source[1]), 1, 0, 0, 8, + pwrk2, psync2); + +shmem_int_and_to_all(&(target[2]), &(source[2]), 1, 0, 0, 8, + pwrk1, psync1); +.Ve +Note that two sets of pWrk and pSync arrays are used alternately because no +synchronization is done between calls. +.SH SEE ALSO + +\fIf90\fP(1), +\fIintro_shmem\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_short_g.3in b/oshmem/shmem/man/man3/shmem_short_g.3in new file mode 100644 index 0000000000..d2bbc4ad8d --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_short_g.3in @@ -0,0 +1 @@ +.so man3/shmem_char_g.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_short_get.3in b/oshmem/shmem/man/man3/shmem_short_get.3in new file mode 100644 index 0000000000..6d7c165d2e --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_short_get.3in @@ -0,0 +1 @@ +.so man3/shmem_char_get.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_short_iget.3in b/oshmem/shmem/man/man3/shmem_short_iget.3in new file mode 100644 index 0000000000..5538cd707b --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_short_iget.3in @@ -0,0 +1,217 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_IGET" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_complex_iget\fP(3), +\fIshmem_double_iget\fP(3), +\fIshmem_float_iget\fP(3), +\fIshmem_iget4\fP(3), +\fIshmem_iget8\fP(3), +\fIshmem_iget32\fP(3), +\fIshmem_iget64\fP(3), +\fIshmem_iget128\fP(3), +\fIshmem_int_iget\fP(3), +\fIshmem_integer_iget\fP(3), +\fIshmem_logical_iget\fP(3), +\fIshmem_long_iget\fP(3), +\fIshmem_longdouble_iget\fP(3), +\fIshmem_longlong_iget\fP(3), +\fIshmem_real_iget\fP(3), +\fIshmem_short_iget\fP(3) +\- Transfers strided data from a specified processing element (PE) +.SH SYNOPSIS + +C or C++: +.Vb +#include +void shmem_iget32(void *target, const void *source, + ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); + +void shmem_iget64(void *target, const void *source, + ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); + +void shmem_iget128(void *target, const void *source, + ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); + +void shmem_int_iget(int *target, const int *source, + ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); + +void shmem_double_iget(double *target, const double *source, + ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); + +void shmem_float_iget(float *target, const float *source, + ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); + +void shmem_long_iget(long *target, const long *source, + ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); + +void shmem_longdouble_iget(long double *target, + const long double *source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe); + +void shmem_longlong_iget(long long *target, + const long long *source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); + +void shmem_short_iget(short *target, + const short *source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER tst, sst, len, pe + +CALL SHMEM_COMPLEX_IGET(target, source, tst, sst, len, +& pe) + +CALL SHMEM_DOUBLE_IGET(target, source, tst, sst, len, +& pe) + +CALL SHMEM_IGET4(target, source, tst, sst, len, pe) + +CALL SHMEM_IGET8(target, source, tst, sst, len, pe) + +CALL SHMEM_IGET32(target, source, tst, sst, len, pe) + +CALL SHMEM_IGET64(target, source, tst, sst, len, pe) + +CALL SHMEM_IGET128(target, source, tst, sst, len, pe) + +CALL SHMEM_INTEGER_IGET(target, source, tst, sst, len, +& pe) + +CALL SHMEM_LOGICAL_IGET(target, source, tst, sst, len, +& pe) + +CALL SHMEM_REAL_IGET(target, source, tst, sst, len, pe) +.Ve +.SH DESCRIPTION + +The strided get routines retrieve array data available at address source on remote PE (pe). +The elements of the \fBsource\fP +array are separated by a stride \fBsst\fP\&. +Once the data is received, +it is stored at the local memory address \fBtarget\fP, +separated by stride \fBtst\fP\&. +The routines return +when the data has been copied into the local \fBtarget\fP +array. +.PP +The arguments are as follows: +.TP +target +Array to be updated on the local PE. +.TP +source +Array containing the data to be copied on the remote PE. +.TP +tst +The stride between consecutive elements of the target array. The stride is scaled by +the element size of the target array. A value of 1 indicates contiguous data. tst must be of +type integer. If you are calling from Fortran, it must be a default integer value. +.TP +sst +The stride between consecutive elements of the source array. The stride is scaled +by the element size of the source array. A value of 1 indicates contiguous data. sst must be +of type integer. If you are calling from Fortran, it must be a default integer value. +.TP +len +Number of elements in the target and source arrays. len must be of type integer. If +you are using Fortran, it must be a constant, variable, or array element of default integer +type. +.TP +pe +PE number of the remote PE. pe must be of type integer. If you are using Fortran, it +must be a constant, variable, or array element of default integer type. +.PP +The target and source data objects must conform to typing constraints, which are as +follows: +.TP +\fBshmem_iget32, shmem_iget4\fP: Any noncharacter type that has a storage size +equal to 32 bits. +.TP +\fBshmem_iget64, shmem_iget8\fP: Any noncharacter type that has a storage size +equal to 64 bits. +.TP +\fBshmem_iget128\fP: Any noncharacter type that has a storage size equal to +128 bits. +.TP +\fBshmem_short_iget\fP: Elements of type short. +.TP +\fBshmem_int_iget\fP: Elements of type int. +.TP +\fBshmem_long_iget\fP: Elements of type long. +.TP +\fBshmem_longlong_iget\fP: Elements of type long long. +.TP +\fBshmem_float_iget\fP: Elements of type float. +.TP +\fBshmem_double_iget\fP: Elements of type double. +.TP +\fBshmem_longdouble_iget\fP: Elements of type long double. +.TP +\fBSHMEM_COMPLEX_IGET\fP: Elements of type complex of default size. +.TP +\fBSHMEM_DOUBLE_IGET\fP: (Fortran) Elements of type double precision. +.TP +\fBSHMEM_INTEGER_IGET\fP: Elements of type integer. +.TP +\fBSHMEM_LOGICAL_IGET\fP: Elements of type logical. +.TP +\fBSHMEM_REAL_IGET\fP: Elements of type real. +.TP +\fBshmem_longdouble_iget\fP: Elements of type long double. +.TP +\fBSHMEM_COMPLEX_IGET\fP: Elements of type complex of default size. +.TP +\fBSHMEM_DOUBLE_IGET\fP: (Fortran) Elements of type double precision. +.TP +\fBSHMEM_INTEGER_IGET\fP: Elements of type integer. +.TP +\fBSHMEM_LOGICAL_IGET\fP: Elements of type logical. +.TP +\fBSHMEM_REAL_IGET\fP: Elements of type real. +.PP +If you are using Fortran, data types must be of default size. For example, a real variable must +be declared as REAL, REAL*4, or REAL(KIND=4). +.PP +.SH NOTES + +See \fIintro_shmem\fP(3) +for a definition of the term remotely accessible. +.PP +.SH EXAMPLES + +The following simple example uses shmem_logical_iget in a Fortran program. Compile +this example with the \-lsma compiler option. +.Vb +PROGRAM STRIDELOGICAL + LOGICAL SOURCE(10), TARGET(5) + SAVE SOURCE ! SAVE MAKES IT REMOTELY ACCESSIBLE + DATA SOURCE /.T.,.F.,.T.,.F.,.T.,.F.,.T.,.F.,.T.,.F./ + DATA TARGET / 5*.F. / + + CALL START_PES(2) + IF (MY_PE() .EQ. 0) THEN + CALL SHMEM_LOGICAL_IGET(TARGET, SOURCE, 1, 2, 5, 1) + PRINT*,'TARGET AFTER SHMEM_LOGICAL_IGET:',TARGET + ENDIF + CALL SHMEM_BARRIER_ALL +END +.Ve +.SH SEE ALSO + +\fIintro_shmem\fP(3), +\fIshmem_get\fP(3), +\fIshmem_quiet\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_short_iput.3in b/oshmem/shmem/man/man3/shmem_short_iput.3in new file mode 100644 index 0000000000..6582c084ed --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_short_iput.3in @@ -0,0 +1,220 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_IPUT" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_complex_iput\fP(3), +\fIshmem_double_iput\fP(3), +\fIshmem_float_iput\fP(3), +\fIshmem_int_iput\fP(3), +\fIshmem_integer_iput\fP(3), +\fIshmem_iput4\fP(3), +\fIshmem_iput8\fP(3), +\fIshmem_iput32\fP(3), +\fIshmem_iput64\fP(3), +\fIshmem_iput128\fP(3), +\fIshmem_logical_iput\fP(3), +\fIshmem_long_iput\fP(3), +\fIshmem_longdouble_iput\fP(3), +\fIshmem_longlong_iput\fP(3), +\fIshmem_real_iput\fP(3), +\fIshmem_short_iput\fP(3) +\- Transfer strided data to a specified processing element (PE). +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void shmem_double_iput(double *target, const double *source, + ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); + +void shmem_float_iput(float *target, const float *source, + ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); + +void shmem_int_iput(int *target, const int *source, + ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); + +void shmem_iput32(void *target, const void *source, + ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); + +void shmem_iput64(void *target, const void *source, + ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); + +void shmem_iput128(void *target, const void *source, + ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); + +void shmem_long_iput(long *target, const long *source, + ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); + +void shmem_longdouble_iput(long double *target, + const long double *source, ptrdiff_t tst, ptrdiff_t sst, + size_t len, int pe); + +void shmem_longlong_iput(long long *target, + const long long *source, ptrdiff_t tst, ptrdiff_t sst, + size_t len, int pe); + +void shmem_short_iput(short *target, const short *source, + ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER tst, sst, len, pe + +CALL SHMEM_COMPLEX_IPUT(target, source, tst, sst, len, +& pe) + +CALL SHMEM_DOUBLE_IPUT(target, source, tst, sst, len, +& pe) + +CALL SHMEM_INTEGER_IPUT(target, source, tst, sst, len, +& pe) + +CALL SHMEM_IPUT4(target, source, tst, sst, len, pe) + +CALL SHMEM_IPUT8(target, source, tst, sst, len, pe) + +CALL SHMEM_IPUT32(target, source, tst, sst, len, pe) + +CALL SHMEM_IPUT64(target, source, tst, sst, len, pe) + +CALL SHMEM_IPUT128(target, source, tst, sst, len, pe) + +CALL SHMEM_LOGICAL_IPUT(target, source, tst, sst, len, +& pe) + +CALL SHMEM_REAL_IPUT(target, source, tst, sst, len, pe) +.Ve +.PP +.SH DESCRIPTION + +The shmem_iput routines read the elements of a local array (\fBsource\fP) +and write them +to a remote array (\fBtarget\fP) +on the PE indicated by \fBpe\fP\&. +These routines return +when the data has been copied out of the source array on the local PE but not necessarily +before the data has been delivered to the remote data object. +.PP +The arguments are as follows: +.TP +target +Array to be updated on the remote PE. This data object must be remotely +accessible. +.TP +source +Array containing the data to be copied. +.TP +tst +The stride between consecutive elements of the target array. The stride is scaled by +the element size of the target array. A value of 1 indicates contiguous data. tst must be of +type integer. If you are using Fortran, it must be a default integer value. +.TP +sst +The stride between consecutive elements of the source array. The stride is scaled +by the element size of the source array. A value of 1 indicates contiguous data. sst must be +of type integer. If you are using Fortran, it must be a default integer value. +.TP +len +Number of elements in the target and source arrays. len must be of type integer. If +you are using Fortran, it must be a constant, variable, or array element of default integer +type. +.TP +pe +PE number of the remote PE. pe must be of type integer. If you are using Fortran, it +must be a constant, variable, or array element of default integer type. +.PP +The target and source data objects must conform to typing constraints, which are as follows: +.PP +.TP +\fBshmem_iput32, shmem_iput4\fP: Any noncharacter type that has a storage size equal +to 32 bits. +.TP +\fBshmem_iput64, shmem_iput8\fP: Any noncharacter type that has a storage size equal +to 64 bits. +.TP +\fBshmem_iput128\fP: Any noncharacter type that has a storage size equal to 128 bits. +.TP +\fBshmem_short_iput\fP: Elements of type short. +.TP +\fBshmem_int_iput\fP: Elements of type int. +.TP +\fBshmem_long_iput\fP: Elements of type long. +.TP +\fBshmem_longlong_iput\fP: Elements of type long long. +.TP +\fBshmem_float_iput\fP: Elements of type float. +.TP +\fBshmem_double_iput\fP: Elements of type double. +.TP +\fBshmem_longdouble_iput\fP: Elements of type long double. +.TP +\fBSHMEM_COMPLEX_IPUT\fP: Elements of type complex of default size. +.TP +\fBSHMEM_DOUBLE_IPUT\fP: (Fortran) Elements of type double precision. +.TP +\fBSHMEM_INTEGER_IPUT\fP: Elements of type integer. +.TP +\fBSHMEM_LOGICAL_IPUT\fP: Elements of type logical. +.TP +\fBSHMEM_REAL_IPUT\fP: Elements of type real. +.TP +\fBSHMEM_LOGICAL_IPUT\fP: Elements of type logical. +.TP +\fBSHMEM_REAL_IPUT\fP: Elements of type real. +.PP +If you are using Fortran, data types must be of default size. For example, a real variable must +be declared as REAL, REAL*4 or REAL(KIND=4). +.PP +.SH NOTES + +See \fIintro_shmem\fP(3) +for a definition of the term remotely accessible. +.PP +.SH EXAMPLES + +Consider the following simple shmem_long_iput example for C/C++ programs. +.Vb +#include + +main() +{ + short source[10] = { 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10 }; + static short target[10]; + + start_pes(2); + if (_my_pe() == 0) { + /* put 10 words into target on PE 1 */ + shmem_short_iput(target, source, 1, 2, 5, 1); + } + shmem_barrier_all(); /* sync sender and receiver */ + if (_my_pe() == 1) { + shmem_udcflush(); /* not required on IRIX systems */ + printf("target on PE %d is %d %d %d %d %d0, _my_pe(), + (int)target[0], (int)target[1], (int)target[2], + (int)target[3], (int)target[4] ); + } + shmem_barrier_all(); /* sync before exiting */ +} +.Ve +.SH SEE ALSO + +\fIintro_shmem\fP(3), +\fIshmem_iget\fP(3), +\fIshmem_put\fP(3), +\fIshmem_quiet\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_short_max_to_all.3in b/oshmem/shmem/man/man3/shmem_short_max_to_all.3in new file mode 100644 index 0000000000..760dd45eec --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_short_max_to_all.3in @@ -0,0 +1,238 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_MAX" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_double_max_to_all\fP(3), +\fIshmem_float_max_to_all\fP(3), +\fIshmem_int_max_to_all\fP(3), +\fIshmem_int4_max_to_all\fP(3), +\fIshmem_int8_max_to_all\fP(3), +\fIshmem_long_max_to_all\fP(3), +\fIshmem_longdouble_max_to_all\fP(3), +\fIshmem_longlong_max_to_all\fP(3), +\fIshmem_real4_max_to_all\fP(3), +\fIshmem_real8_max_to_all\fP(3), +\fIshmem_real16_max_to_all\fP(3), +\fIshmem_short_max_to_all\fP(3) +\- Performs a maximum function reduction across a set of processing elements (PEs). +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void shmem_double_max_to_all(double *target, double *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + double *pWrk, long *pSync); + +void shmem_float_max_to_all(float *target, float *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + float *pWrk, long *pSync); + +void shmem_int_max_to_all(int *target, int *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + int *pWrk, long *pSync); + +void shmem_long_max_to_all(long *target, long *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + long *pWrk, long *pSync); + +void shmem_longdouble_max_to_all(long double *target, + long double *source, int nreduce, int PE_start, + int logPE_stride, int PE_size, long double *pWrk, long *pSync); + +void shmem_longlong_max_to_all(long long *target, + long long *source, int nreduce, int PE_start, + int logPE_stride, int PE_size, long long *pWrk, long *pSync); + + void shmem_short_max_to_all(short *target, short *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + short *pWrk, long *pSync); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER pSync(SHMEM_REDUCE_SYNC_SIZE) + +INTEGER nreduce, PE_start, logPE_stride, PE_size + +CALL SHMEM_INT4_MAX_TO_ALL(target, source, nreduce, +& PE_start, logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_INT8_MAX_TO_ALL(target, source, nreduce, +& PE_start, logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_REAL4_MAX_TO_ALL(target, source, nreduce, +& PE_start, logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_REAL8_MAX_TO_ALL(target, source, nreduce, +& PE_start, logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_REAL16_MAX_TO_ALL(target, source, nreduce, +& PE_start, logPE_stride, PE_size, pWrk, pSync) +.Ve +.SH DESCRIPTION + +The shared memory (SHMEM) reduction routines compute one or more reductions across +symmetric arrays on multiple virtual PEs. A reduction performs an associative binary +operation across a set of values. For a list of other SHMEM reduction routines, see +\fIintro_shmem\fP(3)\&. +.PP +As with all SHMEM collective routines, each of these routines assumes that only PEs in the +active set call the routine. If a PE not in the active set calls a SHMEM collective routine, +undefined behavior results. +.PP +The nreduce argument determines the number of separate reductions to perform. The source +array on all PEs in the active set provides one element for each reduction. The results of the +reductions are placed in the target array on all PEs in the active set. The active set is defined +by the PE_start, logPE_stride, PE_size triplet. +.PP +The source and target arrays may be the same array, but they may not be overlapping arrays. +.PP +The arguments are as follows: +.TP +target +A symmetric array of length nreduce elements to receive the results of the +reduction operations. The data type of target varies with the version of the reduction routine +being called. When calling from C, refer to the SYNOPSIS section for data type information. +.PP +When calling from Fortran, the target data types are as follows: +.RS +.TP +\fBshmem_comp8_max_to_all\fP: Complex, with an element size equal to two +8\-byte real values. +.TP +\fBshmem_int4_max_to_all\fP: Integer, with an element size of 4 bytes. +.TP +\fBshmem_int8_max_to_all\fP: Integer, with an element size of 8 bytes. +.TP +\fBshmem_real4_max_to_all\fP: Real, with an element size of 4 bytes. +.TP +\fBshmem_real16_max_to_all\fP: Real, with an element size of 16 bytes. +.RE +.RS +.PP +.RE +.TP +source +A symmetric array of length nreduce elements that contains one element for +each separate reduction operation. The source argument must have the same data type as +target. +.TP +nreduce +The number of elements in the target and source arrays. nreduce must be of +type integer. If you are using Fortran, it must be a default integer value. +.TP +PE_start +The lowest virtual PE number of the active set of PEs. PE_start must be of +type integer. If you are using Fortran, it must be a default integer value. +.TP +logPE_stride +The log (base 2) of the stride between consecutive virtual PE numbers in +the active set. logPE_stride must be of type integer. If you are using Fortran, it must be a +default integer value. +.TP +PE_size +The number of PEs in the active set. PE_size must be of type integer. If you +are using Fortran, it must be a default integer value. +.TP +pWrk +A symmetric work array. The pWrk argument must have the same data type as +target. In C/C++, this contains max(nreduce/2 + 1, +_SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements. In Fortran, this contains +max(nreduce/2 + 1, SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements. +.TP +pSync +A symmetric work array. In C/C++, pSync is of type long and size +_SHMEM_REDUCE_SYNC_SIZE. In Fortran, pSync is of type integer and size +SHMEM_REDUCE_SYNC_SIZE. If you are using Fortran, it must be a default integer value. +Every element of this array must be initialized with the value _SHMEM_SYNC_VALUE (in +C/C++) or SHMEM_SYNC_VALUE (in Fortran) before any of the PEs in the active set enter +the reduction routine. +.PP +The values of arguments nreduce, PE_start, logPE_stride, and PE_size must be equal on all +PEs in the active set. The same target and source arrays, and the same pWrk and pSync work +arrays, must be passed to all PEs in the active set. +.PP +Before any PE calls a reduction routine, you must ensure that the following conditions exist +(synchronization via a barrier or some other method is often needed to ensure this): The +pWrk and pSync arrays on all PEs in the active set are not still in use from a prior call to a +collective SHMEM routine. The target array on all PEs in the active set is ready to accept the +results of the reduction. +.PP +Upon return from a reduction routine, the following are true for the local PE: The target array +is updated. The values in the pSync array are restored to the original values. +.PP +.SH NOTES + +The terms collective, symmetric, and cache aligned are defined in \fIintro_shmem\fP(3)\&. +All SHMEM reduction routines reset the values in pSync before they return, so a particular +pSync buffer need only be initialized the first time it is used. +.PP +You must ensure that the pSync array is not being updated on any PE in the active set while +any of the PEs participate in processing of a SHMEM reduction routine. Be careful of the +following situations: If the pSync array is initialized at run time, some type of +synchronization is needed to ensure that all PEs in the working set have initialized pSync +before any of them enter a SHMEM routine called with the pSync synchronization array. A +pSync or pWrk array can be reused in a subsequent reduction routine call only if none +of the PEs in the active set are still processing a prior reduction routine call that used the +same pSync or pWrk arrays. +.PP +In general, this can be assured only by doing some type of synchronization. However, in the +special case of reduction routines being called with the same active set, you can allocate two +pSync and pWrk arrays and alternate between them on successive calls. +.PP +.SH EXAMPLES + +\fBExample 1:\fP +This Fortran example statically initializes the pSync array and finds the +maximum value of real variable FOO across all even PEs. +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE) +DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/ +PARAMETER (NR=1) +REAL FOO, FOOMAX, PWRK(MAX(NR/2+1,SHMEM_REDUCE_MIN_WRKDATA_SIZE)) +COMMON /COM/ FOO, FOOMAX, PWRK +INTRINSIC MY_PE + +IF ( MOD(MY_PE(),2) .EQ. 0) THEN + CALL SHMEM_REAL8_MAX_TO_ALL(FOOMAX, FOO, NR, 0, 1, N$PES/2, + & PWRK, PSYNC) + PRINT *, 'Result on PE ', MY_PE(), ' is ', FOOMAX +ENDIF +.Ve +\fBExample 2:\fP +Consider the following C/C++ call: +.Vb +shmem_int_max_to_all( target, source, 3, 0, 0, 8, pwrk, psync ); +.Ve +The preceding call is more efficient, but semantically equivalent to, the combination of the +following calls: +.Vb +shmem_int_max_to_all(&(target[0]), &(source[0]), 1, 0, 0, 8, + pwrk1, psync1); +shmem_int_max_to_all(&(target[1]), &(source[1]), 1, 0, 0, 8, + pwrk2, psync2); +shmem_int_max_to_all(&(target[2]), &(source[2]), 1, 0, 0, 8, + pwrk1, psync1); +.Ve +Note that two sets of pWrk and pSync arrays are used alternately because no synchronization +is done between calls. +.SH SEE ALSO + +\fIintro_shmem\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_short_min_to_all.3in b/oshmem/shmem/man/man3/shmem_short_min_to_all.3in new file mode 100644 index 0000000000..8bdaae4e9b --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_short_min_to_all.3in @@ -0,0 +1,234 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_MIN" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_double_min_to_all\fP(3), +\fIshmem_float_min_to_all\fP(3), +\fIshmem_int_min_to_all\fP(3), +\fIshmem_int4_min_to_all\fP(3), +\fIshmem_int8_min_to_all\fP(3), +\fIshmem_long_min_to_all\fP(3), +\fIshmem_longdouble_min_to_all\fP(3), +\fIshmem_longlong_min_to_all\fP(3), +\fIshmem_real4_min_to_all\fP(3), +\fIshmem_real8_min_to_all\fP(3), +\fIshmem_real16_min_to_all\fP(3), +\fIshmem_short_min_to_all\fP(3) +\- Performs a minimum function reduction across a set of processing elements (PEs) +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void shmem_double_min_to_all(double *target, double *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + double *pWrk, long *pSync); + +void shmem_float_min_to_all(float *target, float *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + float *pWrk, long *pSync); + +void shmem_int_min_to_all(int *target, int *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + int *pWrk, long *pSync); + +void shmem_long_min_to_all(long *target, long *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + long *pWrk, long *pSync); + +void shmem_longdouble_min_to_all(long double *target, + long double *source, int nreduce, int PE_start, + int logPE_stride, int PE_size, long double *pWrk, + long *pSync); + +void shmem_longlong_min_to_all(long long *target, + long long *source, int nreduce, int PE_start, int logPE_stride, + int PE_size, long long *pWrk, long *pSync); + +void shmem_short_min_to_all(short *target, short *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + short *pWrk, long *pSync); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER pSync(SHMEM_REDUCE_SYNC_SIZE) +INTEGER nreduce, PE_start, logPE_stride, PE_size + +CALL SHMEM_INT4_MIN_TO_ALL(target, source, nreduce, PE_start, +& logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_INT8_MIN_TO_ALL(target, source, nreduce, PE_start, +& logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_REAL4_MIN_TO_ALL(target, source, nreduce, PE_start, +& logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_REAL8_MIN_TO_ALL(target, source, nreduce, PE_start, +& logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_REAL16_MIN_TO_ALL(target, source, nreduce, PE_start, +& logPE_stride, PE_size, pWrk, pSync) +.Ve +.SH DESCRIPTION + +The shared memory (SHMEM) reduction routines compute one or more reductions across +symmetric arrays on multiple virtual PEs. A reduction performs an associative binary +operation across a set of values. For a list of other SHMEM reduction routines, see +\fIintro_shmem\fP(3)\&. +.PP +As with all SHMEM collective routines, each of these routines assumes that only PEs in the +active set call the routine. If a PE not in the active set calls a SHMEM collective routine, +undefined behavior results. +.PP +The nreduce argument determines the number of separate reductions to perform. The source +array on all PEs in the active set provides one element for each reduction. The results of the +reductions are placed in the target array on all PEs in the active set. The active set is defined +by the PE_start, logPE_stride, PE_size triplet. +.PP +The source and target arrays may be the same array, but they may not be overlapping arrays. +.PP +The arguments are as follows: +.TP +target +A symmetric array of length nreduce elements to receive the results of the +reduction operations. The data type of target varies with the version of the reduction routine +being called. When calling from C/C++, refer to the SYNOPSIS section for data type +information. When calling from Fortran, the target data types are as follows: +.RS +.TP +\fBshmem_int4_min_to_all\fP: Integer, with an element size of 4 bytes +.TP +\fBshmem_int8_min_to_all\fP: Integer, with an element size of 8 bytes +.TP +\fBshmem_real4_min_to_all\fP: Real, with an element size of 4 bytes +.TP +\fBshmem_real8_min_to_all\fP: Real, with an element size of 8 bytes +.TP +\fBshmem_real16_min_to_all\fP: Real, with an element size of 16 bytes +.TP +\fBsource A symmetric array\fP: of length nreduce elements, that contains one +element for each separate reduction operation. The source argument must have the same +data type as target. +.RE +.RS +.PP +.RE +.TP +nreduce +The number of elements in the target and source arrays. nreduce must be of +type integer. If you are using Fortran, it must be a default integer value. +.TP +PE_start +The lowest virtual PE number of the active set of PEs. PE_start must be of +type integer. If you are using Fortran, it must be a default integer value. +.TP +logPE_stride +The log (base 2) of the stride between consecutive virtual PE numbers in +the active set. logPE_stride must be of type integer. If you are using Fortran, it must be a +default integer value. +.TP +PE_size +The number of PEs in the active set. PE_size must be of type integer. If you +are using Fortran, it must be a default integer value. +.TP +pWrk +A symmetric work array. The pWrk argument must have the same data type as +target. In C/C++, this contains max(nreduce/2 + 1, +_SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements. In Fortran, this contains +max(nreduce/2 + 1, SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements. +.TP +pSync +A symmetric work array. In C/C++, pSync is of type long and size +_SHMEM_REDUCE_SYNC_SIZE. In Fortran, pSync is of type integer and size +SHMEM_REDUCE_SYNC_SIZE. If you are using Fortran, it must be a default integer value. +Every element of this array must be initialized with the value _SHMEM_SYNC_VALUE (in +C/C++) or SHMEM_SYNC_VALUE (in Fortran) before any of the PEs in the active set enter +the reduction routine. +.PP +The values of arguments nreduce, PE_start, logPE_stride, and PE_size must be equal on all +PEs in the active set. The same target and source arrays, and the same pWrk and pSync work +arrays, must be passed to all PEs in the active set. +.PP +Before any PE calls a reduction routine, you must ensure that the following conditions exist +(synchronization via a barrier or some other method is often needed to ensure this): The +pWrk and pSync arrays on all PEs in the active set are not still in use from a prior call to a +collective SHMEM routine. The target array on all PEs in the active set is ready to accept the +results of the reduction. +.PP +Upon return from a reduction routine, the following are true for the local PE: The target array +is updated. The values in the pSync array are restored to the original values. +.PP +.SH NOTES + +The terms collective, symmetric, and cache aligned are defined in \fIintro_shmem\fP(3)\&. +All SHMEM reduction routines reset the values in pSync before they return, so a particular +pSync buffer need only be initialized the first time it is used. +.PP +You must ensure that the pSync array is not being updated on any PE in the active set while +any of the PEs participate in processing of a SHMEM reduction routine. Be careful of the +following situations: If the pSync array is initialized at run time, some type of +synchronization is needed to ensure that all PEs in the working set have initialized pSync +before any of them enter a SHMEM routine called with the pSync synchronization array. A +pSync or pWrk array can be reused in a subsequent reduction routine call only if none +of the PEs in the active set are still processing a prior reduction routine call that used the +same pSync or pWrk arrays. In general, this can be assured only by doing some type of +synchronization. However, in the special case of reduction routines being called with the +same active set, you can allocate two pSync and pWrk arrays and alternate between them on +successive calls. +.PP +.SH EXAMPLES + +\fBExample 1:\fP +This Fortran example statically initializes the pSync array and finds the +minimum value of real variable FOO across all the even PEs. +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE) +DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/ +PARAMETER (NR=1) +REAL FOO, FOOMIN, PWRK(MAX(NR/2+1,SHMEM_REDUCE_MIN_WRKDATA_SIZE)) +COMMON /COM/ FOO, FOOMIN, PWRK +INTRINSIC MY_PE + +IF ( MOD(MY_PE(),2) .EQ. 0) THEN + CALL SHMEM_REAL8_MIN_TO_ALL(FOOMIN, FOO, NR, 0, 1, N$PES/2, + & PWRK, PSYNC) + PRINT *, 'Result on PE ', MY_PE(), ' is ', FOOMIN +ENDIF +.Ve +\fBExample 2:\fP +Consider the following C/C++ call: +.Vb +shmem_int_min_to_all( target, source, 3, 0, 0, 8, pwrk, psync ); +.Ve +The preceding call is more efficient, but semantically equivalent to, the combination of the +following calls: +.Vb +shmem_int_min_to_all(&(target[0]), &(source[0]), 1, 0, 0, 8, + pwrk1, psync1); +shmem_int_min_to_all(&(target[1]), &(source[1]), 1, 0, 0, 8, + pwrk2, psync2); +shmem_int_min_to_all(&(target[2]), &(source[2]), 1, 0, 0, 8, + pwrk1, psync1); +.Ve +Note that two sets of pWrk and pSync arrays are used alternately because no synchronization +is done between calls. +.SH SEE ALSO + +\fIintro_shmem\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_short_or_to_all.3in b/oshmem/shmem/man/man3/shmem_short_or_to_all.3in new file mode 100644 index 0000000000..77ebef6889 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_short_or_to_all.3in @@ -0,0 +1,202 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_OR" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_int_or_to_all\fP(3), +\fIshmem_int4_or_to_all\fP(3), +\fIshmem_int8_or_to_all\fP(3), +\fIshmem_long_or_to_all\fP(3), +\fIshmem_longlong_or_to_all\fP(3), +\fIshmem_short_or_to_all\fP(3) +\- Performs a bitwise OR function reduction across a set of processing elements (PEs) +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void shmem_int_or_to_all(int *target, int *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + int *pWrk, long *pSync); + +void shmem_long_or_to_all(long *target, long *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + long *pWrk, long *pSync); + +void shmem_longlong_or_to_all(long long *target, + long long *source, int nreduce, int PE_start, int logPE_stride, + int PE_size, long long *pWrk, long *pSync); + +void shmem_short_or_to_all(short *target, short *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + short *pWrk, long *pSync); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER pSync(SHMEM_REDUCE_SYNC_SIZE) +INTEGER nreduce, PE_start, logPE_stride, PE_size + +CALL SHMEM_INT4_OR_TO_ALL(target, source, nreduce, PE_start, +& logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_INT8_OR_TO_ALL(target, source, nreduce, PE_start, +& logPE_stride, PE_size, pWrk, pSync) +.Ve +.SH DESCRIPTION + +The shared memory (SHMEM) reduction routines compute one or more reductions across +symmetric arrays on multiple virtual PEs. A reduction performs an associative binary +operation across a set of values. For a list of other SHMEM reduction routines, see +intro_shmem(3). +.PP +As with all SHMEM collective routines, each of these routines assumes that only PEs in the +active set call the routine. If a PE not in the active set calls a SHMEM collective routine, +undefined behavior results. +.PP +The nreduce argument determines the number of separate reductions to perform. The source +array on all PEs in the active set provides one element for each reduction. The results of the +reductions are placed in the target array on all PEs in the active set. The active set is defined +by the PE_start, logPE_stride, PE_size triplet. +.PP +The source and target arrays may be the same array, but they may not be overlapping arrays. +.PP +The arguments are as follows: +.TP +target +A symmetric array of length nreduce elements to receive the results of the +reduction operations. The data type of target varies with the version of the reduction routine +being called. When calling from C/C++, refer to the SYNOPSIS section for data type +information. When calling from Fortran, the target data types are as follows: +.RS +.TP +\fBshmem_int8_or_to_all\fP Integer, with an element size of 8 bytes. +.TP +\fBshmem_int4_or_to_all\fP Integer, with an element size of 4 bytes. +.RE +.RS +.PP +.RE +.TP +source +A symmetric array, of length nreduce elements, that contains one element for +each separate reduction operation. The source argument must have the same data type as +target. +.TP +nreduce +The number of elements in the target and source arrays. nreduce must be of +type integer. If you are using Fortran, it must be a default integer value. +.TP +PE_start +The lowest virtual PE number of the active set of PEs. PE_start must be of +type integer. If you are using Fortran, it must be a default integer value. +.TP +logPE_stride +The log (base 2) of the stride between consecutive virtual PE numbers in +the active set. logPE_stride must be of type integer. If you are using Fortran, it must be a +default integer value. +.TP +PE_size +The number of PEs in the active set. PE_size must be of type integer. If you +are using Fortran, it must be a default integer value. +.TP +pWrk +A symmetric work array. The pWrk argument must have the same data type as +target. In C/C++, this contains max(nreduce/2 + 1, +_SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements. In Fortran, this contains +max(nreduce/2 + 1, SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements. +.TP +pSync +A symmetric work array. In C/C++, pSync is of type long and size +_SHMEM_REDUCE_SYNC_SIZE. In Fortran, pSync is of type integer and size +SHMEM_REDUCE_SYNC_SIZE. If you are using Fortran, it must be a default integer value. +Every element of this array must be initialized with the value _SHMEM_SYNC_VALUE (in +C/C++) or SHMEM_SYNC_VALUE (in Fortran) before any of the PEs in the active set enter +the reduction routine. +.PP +The values of arguments nreduce, PE_start, logPE_stride, and PE_size must be equal on +all PEs in the active set. The same target and source arrays, and the same pWrk and pSync +work arrays, must be passed to all PEs in the active set. +.PP +Before any PE calls a reduction routine, you must ensure that the following conditions exist +(synchronization via a barrier or some other method is often needed to ensure this): The +pWrk and pSync arrays on all PEs in the active set are not still in use from a prior call to a +collective SHMEM routine. The target array on all PEs in the active set is ready to accept the +results of the reduction. +.PP +Upon return from a reduction routine, the following are true: The target array is updated. The +values in the pSync array are restored to the original values. +.PP +.SH NOTES + +The terms collective, symmetric, and cache aligned are defined in \fIintro_shmem\fP(3)\&. +All SHMEM reduction routines reset the values in pSync before they return, so a particular +pSync buffer need only be initialized the first time it is used. +.PP +You must ensure that the pSync array is not being updated on any PE in the active set while +any of the PEs participate in processing of a SHMEM reduction routine. Be careful to avoid +these situations: If the pSync array is initialized at run time, some type of synchronization is +needed to ensure that all PEs in the working set have initialized pSync before any of them +enter a SHMEM routine called with the pSync synchronization array. A pSync or pWrk array +can be reused in a subsequent reduction routine call only if none of the PEs in the active set +are still processing a prior reduction routine call that used the same pSync or pWrk arrays. In +general, this can be assured only by doing some type of synchronization. However, in the +special case of reduction routines being called with the same active set, you can allocate two +pSync and pWrk arrays and alternate between them on successive calls. +.PP +.SH EXAMPLES + +\fBExample 1:\fP +This Fortran example statically initializes the pSync array and finds the +logical OR of the integer variable FOO across all even PEs. +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE) +DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/ +PARAMETER (NR=1) +REAL PWRK(MAX(NR/2+1,SHMEM_REDUCE_MIN_WRKDATA_SIZE)) +INTEGER FOO, FOOOR +COMMON /COM/ FOO, FOOOR, PWRK +INTRINSIC MY_PE + +IF ( MOD(MY_PE(),2) .EQ. 0) THEN + CALL SHMEM_INT8_OR_TO_ALL(FOOOR, FOO, NR, 0, 1, N$PES/2, + & PWRK, PSYNC) + PRINT *,'Result on PE ',MY_PE(),' is ',FOOOR +ENDIF +.Ve +\fBExample 2:\fP +Consider the following C/C++ call: +.Vb +shmem_int_or_to_all( target, source, 3, 0, 0, 8, pwrk, psync ); +.Ve +The preceding call is more efficient, but semantically equivalent to, the combination of the +following calls: +.Vb +shmem_int_or_to_all(&(target[0]), &(source[0]), 1, 0, 0, 8, + pwrk1, psync1); +shmem_int_or_to_all(&(target[1]), &(source[1]), 1, 0, 0, 8, + pwrk2, psync2); +shmem_int_or_to_all(&(target[2]), &(source[2]), 1, 0, 0, 8, + pwrk1, psync1); +.Ve +Note that two sets of pWrk and pSync arrays are used alternately because no synchronization +is done between calls. +.PP +.SH SEE ALSO + +\fIintro_shmem\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_short_p.3in b/oshmem/shmem/man/man3/shmem_short_p.3in new file mode 100644 index 0000000000..c08d60a543 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_short_p.3in @@ -0,0 +1 @@ +.so man3/shmem_char_p.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_short_prod_to_all.3in b/oshmem/shmem/man/man3/shmem_short_prod_to_all.3in new file mode 100644 index 0000000000..b7544a9af8 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_short_prod_to_all.3in @@ -0,0 +1,259 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_PROD" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_comp4_prod_to_all\fP(3), +\fIshmem_comp8_prod_to_all\fP(3), +\fIshmem_complexd_prod_to_all\fP(3), +\fIshmem_complexf_prod_to_all\fP(3), +\fIshmem_double_prod_to_all\fP(3), +\fIshmem_float_prod_to_all\fP(3), +\fIshmem_int_prod_to_all\fP(3), +\fIshmem_int4_prod_to_all\fP(3), +\fIshmem_int8_prod_to_all\fP(3), +\fIshmem_long_prod_to_all\fP(3), +\fIshmem_longdouble_prod_to_all\fP(3), +\fIshmem_longlong_prod_to_all\fP(3), +\fIshmem_real8_prod_to_all\fP(3), +\fIshmem_real16_prod_to_all\fP(3), +\fIshmem_real4_prod_to_all\fP(3), +\fIshmem_short_prod_to_all\fP(3) +\- Performs +a product reduction across a set of processing elements (PEs) +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void shmem_complexd_prod_to_all(double complex *target, + double complex *source, int nreduce, int PE_start, + int logPE_stride, int PE_size, double complex *pWrk, + long *pSync); + +void shmem_complexf_prod_to_all(float complex *target, + float complex *source, int nreduce, int PE_start, + int logPE_stride, int PE_size, float complex *pWrk, + long *pSync); + +void shmem_double_prod_to_all(double *target, double *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + double *pWrk, long *pSync); + +void shmem_float_prod_to_all(float *target, float *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + float *pWrk, long *pSync); + +void shmem_int_prod_to_all(int *target, int *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + int *pWrk, long *pSync); + +void shmem_long_prod_to_all(long *target, long *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + long *pWrk, long *pSync); + +void shmem_longdouble_prod_to_all(long double *target, + long double *source, int nreduce, int PE_start, + int logPE_stride, int PE_size, long double *pWrk, + long *pSync); + +void shmem_longlong_prod_to_all(long long *target, + long long *source, int nreduce, int PE_start, + int logPE_stride, int PE_size, long long *pWrk, + long *pSync); + +void shmem_short_prod_to_all(short *target, short *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + short *pWrk, long *pSync); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER pSync(SHMEM_REDUCE_SYNC_SIZE) +INTEGER nreduce, PE_start, logPE_stride, PE_size + +CALL SHMEM_COMP4_PROD_TO_ALL(target, source, nreduce, PE_start, +& logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_COMP8_PROD_TO_ALL(target, source, nreduce, PE_start, +& logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_INT4_PROD_TO_ALL(target, source, nreduce, PE_start, +& logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_INT8_PROD_TO_ALL(target, source, nreduce, PE_start, +& logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_REAL4_PROD_TO_ALL(target, source, nreduce, PE_start, +& logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_REAL8_PROD_TO_ALL(target, source, nreduce, PE_start, +& logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_REAL16_PROD_TO_ALL(target, source, nreduce, PE_start, +& logPE_stride, PE_size, pWrk, pSync) +.Ve +.SH DESCRIPTION + +The shared memory (SHMEM) reduction routines compute one or more reductions across +symmetric arrays on multiple virtual PEs. A reduction performs an associative binary +operation across a set of values. For a list of other SHMEM reduction routines, see +\fIintro_shmem\fP(3)\&. +.PP +As with all SHMEM collective routines, each of these routines assumes that only PEs in the +active set call the routine. If a PE not in the active set calls a SHMEM collective routine, +undefined behavior results. +.PP +The nreduce argument determines the number of separate reductions to perform. The source +array on all PEs in the active set provides one element for each reduction. The results of the +reductions are placed in the target array on all PEs in the active set. The active set is defined +by the PE_start, logPE_stride, PE_size triplet. +.PP +The source and target arrays may be the same array, but they may not be overlapping arrays. +.PP +The arguments are as follows: +.TP +target +A symmetric array of length nreduce elements to receive the results of the +reduction operations. The data type of target varies with the version of the reduction routine +being called and the language used. When calling from C/C++, refer to the SYNOPSIS section +for data type information. When calling from Fortran, the target data types are as follows: +.RS +.TP +\fBshmem_comp4_prod_to_all\fP: Complex, with an element size equal to two +4\-byte real values. +.TP +\fBshmem_comp8_prod_to_all\fP: Complex, with an element size equal to two +8\-byte real values. +.TP +\fBshmem_int4_prod_to_all\fP: Integer, with an element size of 4 bytes +.TP +\fBshmem_int8_prod_to_all\fP: Integer, with an element size of 8 bytes +.TP +\fBshmem_real4_prod_to_all\fP: Real, with an element size of 4 bytes +.TP +\fBshmem_real8_prod_to_all\fP: Real, with an element size of 8 bytes +.TP +\fBshmem_real16_prod_to_all\fP: Real, with an element size of 16 bytes +.RE +.RS +.PP +.RE +.TP +source +A symmetric array, of length nreduce elements, that contains one element for +each separate reduction operation. The source argument must have the same data type as +target. +.TP +nreduce +The number of elements in the target and source arrays. nreduce must be of +type integer. If you are using Fortran, it must be a default integer value. +.TP +PE_start +The lowest virtual PE number of the active set of PEs. PE_start must be of +type integer. If you are using Fortran, it must be a default integer value. +.TP +logPE_stride +The log (base 2) of the stride between consecutive virtual PE numbers in +the active set. logPE_stride must be of type integer. If you are using Fortran, it must be a +default integer value. +.TP +PE_size +The number of PEs in the active set. PE_size must be of type integer. If you +are using Fortran, it must be a default integer value. +.TP +pWrk +A symmetric work array. The pWrk argument must have the same data type as +target. In C/C++, this contains max(nreduce/2 + 1, +_SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements. In Fortran, this contains +max(nreduce/2 + 1, SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements. +.TP +pSync +A symmetric work array. In C/C++, pSync is of type long and size +_SHMEM_REDUCE_SYNC_SIZE. In Fortran, pSync is of type integer and size +SHMEM_REDUCE_SYNC_SIZE. If you are using Fortran, it must be a default integer value. +Before any of the PEs in the active set enter the reduction routine, every element of this array +must be initialized with the value _SHMEM_SYNC_VALUE (in C/C++) or +SHMEM_SYNC_VALUE (in Fortran). +.PP +The values of arguments nreduce, PE_start, logPE_stride, and PE_size must be equal on all +PEs in the active set. The same target and source arrays, and the same pWrk and pSync work +arrays, must be passed to all PEs in the active set. Before any PE calls a reduction routine, you +must ensure that the following conditions exist (synchronization via a barrier or some +other method is often needed to ensure this): The pWrk and pSync arrays on all PEs in the +active set are not still in use from a prior call to a collective SHMEM routine. The target array +on all PEs in the active set is ready to accept the results of the reduction. +.PP +Upon return from a reduction routine, the following are true for the local PE: The target array +is updated. The values in the pSync array are restored to the original values. +.SH NOTES + +The terms collective, symmetric, and cache aligned are defined in \fIintro_shmem\fP(3)\&. +All SHMEM reduction routines reset the values in pSync before they return, so a particular +pSync buffer need only be initialized the first time it is used. +.PP +You must ensure that the pSync array is not being updated on any PE in the active set while +any of the PEs participate in processing of a SHMEM reduction routine. Be careful of the +following situations: If the pSync array is initialized at run time, some type of +synchronization is needed to ensure that all PEs in the working set have initialized pSync +before any of them enter a SHMEM routine called with the pSync synchronization array. A +pSync or pWrk array can be reused in a subsequent reduction routine call only if none of the +PEs in the active set are still processing a prior reduction routine call that used the same +pSync or pWrk arrays. In general, this can be assured only by doing some type of +synchronization. However, in the special case of reduction routines being called with the +same active set, you can allocate two pSync and pWrk arrays and alternate between them on +successive calls. +.SH EXAMPLES + +\fBExample 1:\fP +This Fortran example statically initializes the pSync array and finds the +product of the real variable FOO across all the even PEs. +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE) +DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/ +PARAMETER (NR=1) +REAL FOO, FOOPROD, PWRK(MAX(NR/2+1,SHMEM_REDUCE_MIN_WRKDATA_SIZE)) +COMMON /COM/ FOO, FOOPROD, PWRK +INTRINSIC MY_PE + +IF ( MOD(MY_PE(),2) .EQ. 0) THEN + CALL SHMEM_COMP8_PROD_TO_ALL(FOOPROD, FOO, NR, 0, 1, N$PES/2, + & PWRK, PSYNC) + PRINT *, 'Result on PE ', MY_PE(), ' is ', FOOPROD +ENDIF +.Ve +\fBExample 2:\fP +Consider the following C/C++ call: +.Vb +shmem_short_prod_to_all(target, source, 3, 0, 0, 8, pwrk, psync); +.Ve +The preceding call is more efficient, but semantically equivalent to, the combination of the +following calls: +.Vb +shmem_short_prod_to_all(&(target[0]), &(source[0]), 1, 0, 0, 8, + pwrk1, psync1); +shmem_short_prod_to_all(&(target[1]), &(source[1]), 1, 0, 0, 8, + pwrk2, psync2); +shmem_short_prod_to_all(&(target[2]), &(source[2]), 1, 0, 0, 8, + pwrk1, psync1); +.Ve +Note that two sets of pWrk and pSync arrays are used alternately because no synchronization +is done between calls. +.SH SEE ALSO + +\fIintro_shmem\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_short_put.3in b/oshmem/shmem/man/man3/shmem_short_put.3in new file mode 100644 index 0000000000..e3ca73d483 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_short_put.3in @@ -0,0 +1 @@ +.so man3/shmem_char_put.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_short_sum_to_all.3in b/oshmem/shmem/man/man3/shmem_short_sum_to_all.3in new file mode 100644 index 0000000000..3467a882fe --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_short_sum_to_all.3in @@ -0,0 +1,281 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_SUM" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_comp4_sum_to_all\fP(3), +\fIshmem_comp8_sum_to_all\fP(3), +\fIshmem_complexd_sum_to_all\fP(3), +\fIshmem_complexf_sum_to_all\fP(3), +\fIshmem_double_sum_to_all\fP(3), +\fIshmem_float_sum_to_all\fP(3), +\fIshmem_int_sum_to_all\fP(3), +\fIshmem_int4_sum_to_all\fP(3), +\fIshmem_int8_sum_to_all\fP(3), +\fIshmem_long_sum_to_all\fP(3), +\fIshmem_longdouble_sum_to_all\fP(3), +\fIshmem_longlong_sum_to_all\fP(3), +\fIshmem_real4_sum_to_all\fP(3), +\fIshmem_real8_sum_to_all\fP(3), +\fIshmem_real16_sum_to_all\fP(3), +\fIshmem_short_sum_to_all\fP(3) +\- Performs +a sum reduction across a set of processing elements (PEs) +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void shmem_complexd_sum_to_all(double complex *target, + double complex *source, int nreduce, int PE_start, + int logPE_stride, int PE_size, double complex *pWrk, + long *pSync); + +void shmem_complexf_sum_to_all(float complex *target, + float complex *source, int nreduce, int PE_start, + int logPE_stride, int PE_size, float complex *pWrk, + long *pSync); + +void shmem_double_sum_to_all(double *target, + double *source, int nreduce, int PE_start, int logPE_stride, + int PE_size, double *pWrk, long *pSync); + +void shmem_float_sum_to_all(float *target, float *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + float *pWrk, long *pSync); + +void shmem_int_sum_to_all(int *target, int *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + int *pWrk, long *pSync); + +void shmem_long_sum_to_all(long *target, long *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + long *pWrk, long *pSync); + +void shmem_longdouble_sum_to_all(long double *target, + long double *source, int nreduce, int PE_start, int + logPE_stride, int PE_size, long double *pWrk, long *pSync); + +void shmem_longlong_sum_to_all(long long *target, + long long *source, int nreduce, int PE_start, + int logPE_stride, int PE_size, long long *pWrk, + long *pSync); + +void shmem_short_sum_to_all(short *target, short *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + short *pWrk, long *pSync); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER pSync(SHMEM_REDUCE_SYNC_SIZE) +INTEGER nreduce, PE_start, logPE_stride, PE_size + +CALL SHMEM_COMP4_SUM_TO_ALL(target, source, nreduce, +& PE_start, logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_COMP8_SUM_TO_ALL(target, source, nreduce, +& PE_start, logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_INT4_SUM_TO_ALL(target, source, nreduce, +& PE_start, logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_INT8_SUM_TO_ALL(target, source, nreduce, +& PE_start, logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_REAL4_SUM_TO_ALL(target, source, nreduce, +& PE_start, logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_REAL8_SUM_TO_ALL(target, source, nreduce, +& PE_start, logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_REAL16_SUM_TO_ALL(target, source, nreduce, +& PE_start, logPE_stride, PE_size, pWrk, pSync) +.Ve +.SH DESCRIPTION + +The shared memory (SHMEM) reduction routines compute one or more reductions across +symmetric arrays on multiple virtual PEs. A reduction performs an associative binary +operation across a set of values. For a list of other SHMEM reduction routines, see +\fIintro_shmem\fP(3)\&. +.PP +As with all SHMEM collective routines, each of these routines assumes that only PEs in the +active set call the routine. If a PE not in the active set calls a SHMEM collective routine, +undefined behavior results. +.PP +The nreduce argument determines the number of separate reductions to perform. The source +array on all PEs in the active set provides one element for each reduction. The results of the +reductions are placed in the target array on all PEs in the active set. The active set is defined +by the PE_start, logPE_stride, PE_size triplet. +.PP +The source and target arrays may be the same array, but they may not be overlapping arrays. +.PP +The arguments are as follows: +.TP +target +The remotely accessible integer data object to be updated on the remote PE. If +you are using C/C++, the type of target should match that implied in the SYNOPSIS section. +If you are using the Fortran compiler, it must be of type integer with an element size of 4 +bytes for SHMEM_INT4_ADD and 8 bytes for SHMEM_INT8_ADD. +.TP +value +The value to be atomically added to target. If you are using C/C++, the type of +value should match that implied in the SYNOPSIS section. If you are using Fortran, it must be +of type integer with an element size of target. +.TP +pe +An integer that indicates the PE number upon which target is to be updated. If you +are using Fortran, it must be a default integer value. +.TP +target +A symmetric array of length nreduce elements to receive the results of the +reduction operations. +.br +The data type of target varies with the version of the reduction routine being called and the +language used. When calling from C/C++, refer to the SYNOPSIS section for data type +information. When calling from Fortran, the target data types are as follows: +.RS +.TP +\fBshmem_comp4_sum_to_all:\fP COMPLEX(KIND=4). +.TP +\fBshmem_comp8_sum_to_all:\fP Complex. If you are using Fortran, it must be +a default complex value. +.TP +\fBshmem_int4_sum_to_all:\fP INTEGER(KIND=4). +.TP +\fBshmem_int8_sum_to_all:\fP Integer. If you are using Fortran, it must be a +default integer value. +.TP +\fBshmem_real4_sum_to_all:\fP REAL(KIND=4). +.TP +\fBshmem_real8_sum_to_all:\fP Real. If you are using Fortran, it must be a +default real value. +.TP +\fBshmem_real16_sum_to_all:\fP Real. If you are using Fortran, it must be a +default real value. +.RE +.RS +.PP +.RE +.TP +source +A symmetric array, of length nreduce elements, that contains one element for +each separate reduction operation. The source argument must have the same data type as +target. +.TP +nreduce +The number of elements in the target and source arrays. nreduce must be of +type integer. If you are using Fortran, it must be a default integer value. +.TP +PE_start +The lowest virtual PE number of the active set of PEs. PE_start must be of +type integer. If you are using Fortran, it must be a default integer value. +.TP +logPE_stride +The log (base 2) of the stride between consecutive virtual PE numbers in +the active set. logPE_stride must be of type integer. If you are using Fortran, it must be a +default integer value. +.TP +PE_size +The number of PEs in the active set. PE_size must be of type integer. If you +are using Fortran, it must be a default integer value. +.TP +pWrk +A symmetric work array. The pWrk argument must have the same data type as +target. In C/C++, this contains max(nreduce/2 + 1, +_SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements. In Fortran, this contains +max(nreduce/2 + 1, SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements. +.TP +pSync +A symmetric work array. In C/C++, pSync is of type long and size +_SHMEM_REDUCE_SYNC_SIZE. In Fortran, pSync is of type integer and size +SHMEM_REDUCE_SYNC_SIZE. It must be a default integer value. Every element of this array +must be initialized with the value _SHMEM_SYNC_VALUE (in C/C++) or +SHMEM_SYNC_VALUE (in Fortran) before any of the PEs in the active set enter the reduction +routine. +.PP +The values of arguments nreduce, PE_start, logPE_stride, and PE_size must be equal on all +PEs in the active set. The same target and source arrays, and the same pWrk and pSync work +arrays, must be passed to all PEs in the active set. +.PP +Before any PE calls a reduction routine, you must ensure that the following conditions exist +(synchronization via a barrier or some other method is often needed to ensure this): The +pWrk and pSync arrays on all PEs in the active set are not still in use from a prior call to a +collective SHMEM routine. The target array on all PEs in the active set is ready to accept the +results of the reduction. +.PP +Upon return from a reduction routine, the following are true for the local PE: The target array +is updated. The values in the pSync array are restored to the original values. +.SH NOTES + +The terms collective, symmetric, and cache aligned are defined in \fIintro_shmem\fP(3)\&. +.PP +All SHMEM reduction routines reset the values in pSync before they return, so a particular +pSync buffer need only be initialized the first time it is used. +.PP +You must ensure that the pSync array is not being updated on any PE in the active set while +any of the PEs participate in processing of a SHMEM reduction routine. Be careful of the +following situations: If the pSync array is initialized at run time, some type of +synchronization is needed to ensure that all PEs in the working set have initialized pSync +before any of them enter a SHMEM routine called with the pSync synchronization array. A +pSync or pWrk array can be reused in a subsequent reduction routine call only if none +of the PEs in the active set are still processing a prior reduction routine call that used the +same pSync or pWrk arrays. In general, this can be assured only by doing some +type of synchronization. However, in the special case of reduction routines being called with +the same active set, you can allocate two pSync and pWrk arrays and alternate between them +on successive calls. +.SH EXAMPLES + +\fBExample 1:\fP +This Fortran example statically initializes the pSync array and finds the +sum of the real variable FOO across all even PEs. +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE) +DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/ +PARAMETER (NR=1) +REAL FOO, FOOSUM, PWRK(MAX(NR/2+1,SHMEM_REDUCE_MIN_WRKDATA_SIZE)) +COMMON /COM/ FOO, FOOSUM, PWRK +INTRINSIC MY_PE + +IF ( MOD(MY_PE(),2) .EQ. 0) THEN + CALL SHMEM_INT4_SUM_TO_ALL(FOOSUM, FOO, NR, 0, 1, N$PES/2, + & PWRK, PSYNC) + PRINT *, 'Result on PE ', MY_PE(), ' is ', FOOSUM +ENDIF +.Ve +\fBExample 2:\fP +Consider the following C/C++ call: +.Vb +shmem_int_sum_to_all( target, source, 3, 0, 0, 8, pwrk, psync ); +.Ve +The preceding call is more efficient, but semantically equivalent to, the combination of the +following calls: +.Vb +shmem_int_sum_to_all(&(target[0]), &(source[0]), 1, 0, 0, 8, + pwrk1, psync1); +shmem_int_sum_to_all(&(target[1]), &(source[1]), 1, 0, 0, 8, + pwrk2, psync2); +shmem_int_sum_to_all(&(target[2]), &(source[2]), 1, 0, 0, 8, + pwrk1, psync1); + +Note that two sets of pWrk and pSync arrays are used alternately because no +synchronization is done between calls. +.Ve +.SH SEE ALSO + +\fIintro_shmem\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_short_wait.3in b/oshmem/shmem/man/man3/shmem_short_wait.3in new file mode 100644 index 0000000000..03267ffbc5 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_short_wait.3in @@ -0,0 +1 @@ +.so man3/shmem_wait.3 diff --git a/oshmem/shmem/man/man3/shmem_short_wait_until.3in b/oshmem/shmem/man/man3/shmem_short_wait_until.3in new file mode 100644 index 0000000000..03267ffbc5 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_short_wait_until.3in @@ -0,0 +1 @@ +.so man3/shmem_wait.3 diff --git a/oshmem/shmem/man/man3/shmem_short_xor_to_all.3in b/oshmem/shmem/man/man3/shmem_short_xor_to_all.3in new file mode 100644 index 0000000000..7d02702a87 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_short_xor_to_all.3in @@ -0,0 +1,215 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_XOR" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_comp4_xor_to_all\fP(3), +\fIshmem_int_xor_to_all\fP(3), +\fIshmem_int4_xor_to_all\fP(3), +\fIshmem_int8_xor_to_all\fP(3), +\fIshmem_long_xor_to_all\fP(3), +\fIshmem_longlong_xor_to_all\fP(3), +\fIshmem_short_xor_to_all\fP(3) +\- Performs a bitwise XOR operation on symmetric +arrays over the active set of PEs. +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void shmem_int_xor_to_all(int *target, int *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + int *pWrk, long *pSync); + +void shmem_long_xor_to_all(long *target, long *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + long *pWrk, long *pSync); + +void shmem_longlong_xor_to_all(long long *target, + long long *source, int nreduce, int PE_start, int logPE_stride, + int PE_size, long long *pWrk, long *pSync); + +void shmem_short_xor_to_all(short *target, short *source, + int nreduce, int PE_start, int logPE_stride, int PE_size, + short *pWrk, long *pSync); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER pSync(SHMEM_REDUCE_SYNC_SIZE) +INTEGER nreduce, PE_start, logPE_stride, PE_size + +CALL SHMEM_COMP4_XOR_TO_ALL(target, source, nreduce, +& PE_start, logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_INT4_XOR_TO_ALL(target, source, nreduce, +& PE_start, logPE_stride, PE_size, pWrk, pSync) + +CALL SHMEM_INT8_XOR_TO_ALL(target, source, nreduce, +& PE_start, logPE_stride, PE_size, pWrk, pSync) +.Ve +.SH DESCRIPTION + +The shared memory (SHMEM) reduction routines compute one or more reductions across +symmetric arrays on multiple virtual PEs. A reduction performs an associative binary +operation across a set of values. For a list of other SHMEM reduction routines, see +\fIintro_shmem\fP(3)\&. +.PP +As with all SHMEM collective routines, each of these routines assumes that only PEs in the +active set call the routine. If a PE not in the active set calls a SHMEM collective routine, +undefined behavior results. +.PP +The nreduce argument determines the number of separate reductions to perform. The source +array on all PEs in the active set provides one element for each reduction. The results of the +reductions are placed in the target array on all PEs in the active set. The active set is defined +by the PE_start, logPE_stride, PE_size triplet. +.PP +The source and target arrays may be the same array, but they may not be overlapping arrays. +.PP +The arguments are as follows: +.TP +target +A symmetric array of length nreduce elements to receive the results of the +reduction operations. +The data type of target varies with the version of the reduction routine being called and the +language used. When calling from C/C++, refer to the SYNOPSIS section for data type +information. When calling from Fortran, the target data types are as follows: +.RS +.TP +\fBshmem_comp8_xor_to_all:\fP Complex, with an element size equal to two 8\- +byte real values +.TP +\fBshmem_comp4_xor_to_all:\fP Complex, with an element size equal to two 4\- +byte real values +.TP +\fBshmem_int8_xor_to_all:\fP Integer, with an element size of 8 bytes +.TP +\fBshmem_int4_xor_to_all:\fP Integer, with an element size of 4 bytes +.TP +\fBshmem_real8_xor_to_all:\fP Real, with an element size of 8 bytes +.TP +\fBshmem_real4_xor_to_all:\fP Real, with an element size of 4 bytes +.RE +.RS +.PP +.RE +.TP +source +A symmetric array, of length nreduce elements, that contains one element for +each separate reduction operation. The source argument must have the same data type as +target. +.TP +nreduce +The number of elements in the target and source arrays. nreduce must be of +type integer. If you are using Fortran, it must be a default integer value. +.TP +PE_start +The lowest virtual PE number of the active set of PEs. PE_start must be of +type integer. If you are using Fortran, it must be a default integer value. +.TP +logPE_stride +The log (base 2) of the stride between consecutive virtual PE numbers in +the active set. logPE_stride must be of type integer. If you are using Fortran, it must be a +default integer value. +.TP +PE_size +The number of PEs in the active set. PE_size must be of type integer. If you +are using Fortran, it must be a default integer value. +.TP +pWrk +A symmetric work array. The pWrk argument must have the same data type as +target. In C/C++, this contains max(nreduce/2 + 1, +_SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements. In Fortran, this contains +max(nreduce/2 + 1, SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements. +.TP +pSync +A symmetric work array. In C/C++, pSync is of type long and size +_SHMEM_REDUCE_SYNC_SIZE. In Fortran, pSync is of type integer and size +SHMEM_REDUCE_SYNC_SIZE. If you are using Fortran, it must be a default integer value. +Every element of this array must be initialized with the value _SHMEM_SYNC_VALUE (in +C/C++) or SHMEM_SYNC_VALUE (in Fortran) before any of the PEs in the active set enter +the reduction routine. +.PP +The values of arguments nreduce, PE_start, logPE_stride, and PE_size must be equal on all +PEs in the active set. The same target and source arrays, and the same pWrk and pSync +work arrays, must be passed to all PEs in the active set. +.PP +Before any PE calls a reduction routine, you must ensure that the following conditions exist +(synchronization via a barrier or some other method is often needed to ensure this): The +pWrk and pSync arrays on all PEs in the active set are not still in use from a prior call to a +collective SHMEM routine. The target array on all PEs in the active set is ready to accept the +results of the reduction. +.PP +Upon return from a reduction routine, the following are true for the local PE: The target array +is updated. The values in the pSync array are restored to the original values. +.SH NOTES + +The terms collective, symmetric, and cache aligned are defined in \fIintro_shmem\fP(3)\&. +All SHMEM reduction routines reset the values in pSync before they return, so a particular +pSync buffer need only be initialized the first time it is used. +.PP +You must ensure that the pSync array is not being updated on any PE in the active set while +any of the PEs participate in processing of a SHMEM reduction routine. Be careful of the +following situations: If the pSync array is initialized at run time, some type of +synchronization is needed to ensure that all PEs in the working set have initialized pSync +before any of them enter a SHMEM routine called with the pSync synchronization array. A +pSync or pWrk array can be reused in a subsequent reduction routine call only if none of the +PEs in the active set are still processing a prior reduction routine call that used the same +pSync or pWrk arrays. In general, this can be assured only by doing some type of +synchronization. However, in the special case of reduction routines being called with the +same active set, you can allocate two pSync and pWrk arrays and alternate between them on +successive calls. +.SH EXAMPLES + +\fBExample 1:\fP +This Fortran example statically initializes the pSync array and computes +the exclusive OR of variable FOO across all even PEs. +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE) +DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/ +PARAMETER (NR=1) +REAL FOO, FOOXOR, PWRK(MAX(NR/2+1,SHMEM_REDUCE_MIN_WRKDATA_SIZE)) +COMMON /COM/ FOO, FOOXOR, PWRK +INTRINSIC MY_PE + +IF ( MOD(MY_PE(),2) .EQ. 0) THEN + CALL SHMEM_REAL8_XOR_TO_ALL(FOOXOR, FOO, NR, 0, 1, N$PES/2, + & PWRK, PSYNC) + PRINT *, 'Result on PE ', MY_PE(), ' is ', FOOXOR +ENDIF +.Ve +\fBExample 2:\fP +Consider the following C/C++ call: +.Vb +shmem_short_xor_to_all( target, source, 3, 0, 0, 8, pwrk, psync ); +.Ve +The preceding call is more efficient, but semantically equivalent to, the combination of the +following calls: +.Vb +shmem_short_xor_to_all(&(target[0]), &(source[0]), 1, 0, 0, 8, + pwrk1, psync1); +shmem_short_xor_to_all(&(target[1]), &(source[1]), 1, 0, 0, 8, + pwrk2, psync2); +shmem_short_xor_to_all(&(target[2]), &(source[2]), 1, 0, 0, 8, + pwrk1, psync1); +.Ve +Note that two sets of pWrk and pSync arrays are used alternately because no synchronization +is done between calls. +.SH SEE ALSO + +\fIintro_shmem\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_swap.3in b/oshmem/shmem/man/man3/shmem_swap.3in new file mode 100644 index 0000000000..bc8daafe0e --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_swap.3in @@ -0,0 +1,115 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_SWAP" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_double_swap\fP(3), +\fIshmem_float_swap\fP(3), +\fIshmem_int_swap\fP(3), +\fIshmem_long_swap\fP(3), +\fIshmem_swap\fP(3), +\fIshmem_int4_swap\fP(3), +\fIshmem_int8_swap\fP(3), +\fIshmem_real4_swap\fP(3), +\fIshmem_real8_swap\fP(3), +\fIshmem_longlong_swap\fP(3) +\- Performs an atomic swap to a remote data object +.SH SYNOPSIS + +C or C++: +.Vb +#include + +double shmem_double_swap(double *target, double value, + int pe); + +float shmem_float_swap(float *target, float value, int pe); + +int shmem_int_swap(int *target, int value, int pe); + +long shmem_long_swap(long *target, long value, int pe); + +long long shmem_longlong_swap(long long *target, + long long value, int pe); + +long shmem_swap(long *target, long value, int pe); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +INTEGER pe + +INTEGER SHMEM_SWAP +ires = SHMEM_SWAP(target, value, pe) + +INTEGER(KIND=4) SHMEM_INT4_SWAP +ires = SHMEM_INT4_SWAP(target, value, pe) + +INTEGER(KIND=8) SHMEM_INT8_SWAP +ires = SHMEM_INT8_SWAP(target, value, pe) + +REAL(KIND=4) SHMEM_REAL4_SWAP +res = SHMEM_REAL4_SWAP(target, value, pe) + +REAL(KIND=8) SHMEM_REAL8_SWAP +res = SHMEM_REAL8_SWAP(target, value, pe) +.Ve +.SH DESCRIPTION + +The atomic swap routines write \fBvalue\fP +to address target on PE \fBpe\fP, +and return +the previous contents of \fBtarget\fP +in one atomic operation. +.PP +The arguments are as follows: +.TP +target +The remotely accessible integer data object to be updated on the remote PE. If +you are using C/C++, the type of target should match that implied in the SYNOPSIS section. If +you are using Fortran, it must be of the following type: +.RS +.TP +\fBSHMEM_SWAP:\fP Integer of default kind +.TP +\fBSHMEM_INT4_SWAP:\fP 4\-byte integer +.TP +\fBSHMEM_INT8_SWAP:\fP 8\-byte integer +.TP +\fBSHMEM_REAL4_SWAP:\fP 4\-byte real +.TP +\fBSHMEM_REAL8_SWAP:\fP 8\-byte real +.RE +.RS +.PP +.RE +.TP +value +Value to be atomically written to the remote PE. value is the same type as target. +.TP +pe +An integer that indicates the PE number on which target is to be updated. If you are +using Fortran, it must be a default integer value. +.PP +.SH NOTES + +The term remotely accessible is defined in \fIintro_shmem\fP(3)\&. +.SH RETURN VALUES + +The contents that had been at the target address on the remote PE prior to the swap is +returned. +.SH SEE ALSO + +\fIintro_shmem\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_test_lock.3in b/oshmem/shmem/man/man3/shmem_test_lock.3in new file mode 100644 index 0000000000..49974c4f17 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_test_lock.3in @@ -0,0 +1 @@ +.so man3/shmem_set_lock.3 \ No newline at end of file diff --git a/oshmem/shmem/man/man3/shmem_udcflush.3in b/oshmem/shmem/man/man3/shmem_udcflush.3in new file mode 100644 index 0000000000..3edeb74023 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_udcflush.3in @@ -0,0 +1,94 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_CACHE" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_clear_cache_inv\fP(3), +\fIshmem_set_cache_inv\fP(3), +\fIshmem_set_cache_line_inv\fP(3), +\fIshmem_udcflush\fP(3), +\fIshmem_udcflush_line\fP(3) +\- Controls data cache utilities +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void shmem_clear_cache_inv(void); +void shmem_clear_cache_line_inv(void *target); +void shmem_set_cache_inv(void); +void shmem_set_cache_line_inv(void *target); +void shmem_udcflush(void); +void shmem_udcflush_line(void *target); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +CALL SHMEM_CLEAR_CACHE_INV +CALL SHMEM_CLEAR_CACHE_LINE_INV(target) +CALL SHMEM_SET_CACHE_INV +CALL SHMEM_SET_CACHE_LINE_INV(target) + +CALL SHMEM_UDCFLUSH +CALL SHMEM_UDCFLUSH_LINE(target) +.Ve +.SH DESCRIPTION + +The following argument is passed to the cache line control routines: +.TP +target +A data object that is local to the processing element (PE). target can be of +any noncharacter type. If you are using Fortran, it can be of any kind. +.PP +\fBshmem_clear_cache_inv\fP +disables automatic cache coherency mode previously +enabled by shmem_set_cache_inv or shmem_set_cache_line_inv. +.PP +\fBshmem_clear_cache_line_inv\fP +disables automatic cache coherency mode for the +cache line associated with the address of \fBtarget\fP +only. +.PP +\fBshmem_set_cache_inv\fP +enables the OpenSHMEM API to automatically decide the +best strategy for cache coherency. +.PP +\fBshmem_set_cache_line_inv\fP +enables automatic cache coherency mode for the +cache line associated with the address of \fBtarget\fP +only. +.PP +\fBshmem_clear_cache_inv\fP +disables automatic cache coherency mode previously +enabled by shmem_set_cache_inv or shmem_set_cache_line_inv. +.PP +\fBshmem_udcflush\fP +makes the entire user data cache coherent. +.PP +\fBshmem_udcflush_line\fP +makes coherent the cache line that corresponds with +the address specified by target. +.PP +.SH NOTES + +These routines have been retained for improved backward compatability with legacy +architectures. +.PP +.SH SEE ALSO + +\fIintro_shmem\fP(3), +\fIshmem_put\fP(3), +\fIshmem_swap\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_udcflush_line.3in b/oshmem/shmem/man/man3/shmem_udcflush_line.3in new file mode 100644 index 0000000000..4a6a361ef9 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_udcflush_line.3in @@ -0,0 +1 @@ +.so man3/shmem_udcflush.3 diff --git a/oshmem/shmem/man/man3/shmem_wait.3in b/oshmem/shmem/man/man3/shmem_wait.3in new file mode 100644 index 0000000000..f1c6aa5769 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_wait.3in @@ -0,0 +1,205 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "SHMEM\\_WAIT" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIshmem_int_wait\fP(3), +\fIshmem_int_wait\fP(3)_until, +\fIshmem_int4_wait\fP(3), +\fIshmem_int4_wait\fP(3)_until, +\fIshmem_int8_wait\fP(3), +\fIshmem_int8_wait\fP(3)_until, +\fIshmem_long_wait\fP(3), +\fIshmem_long_wait\fP(3)_until, +\fIshmem_longlong_wait\fP(3), +\fIshmem_longlong_wait\fP(3)_until, +\fIshmem_short_wait\fP(3), +\fIshmem_short_wait\fP(3)_until, +\fIshmem_wait\fP(3), +\fIshmem_wait\fP(3)_until +\- Waits for a variable on the local processing element (PE) to change +.SH SYNOPSIS + +C or C++: +.Vb +#include + +void shmem_int_wait(int *var, int value); + +void shmem_int_wait_until(int *var, int cond, int value); + +void shmem_long_wait(long *var, long value); + +void shmem_long_wait_until(long *var, int cond, long value); + +void shmem_longlong_wait(long long *var, long long value); + +void shmem_longlong_wait_until(long long *var, int cond, + long long value); + +void shmem_short_wait(short *var, short value); + +void shmem_short_wait_until(short *var, int cond, + short value); + +void shmem_wait(long *ivar, long cmp_value); + +void shmem_wait_until(long *ivar, int cmp, long value); +.Ve +Fortran: +.Vb +INCLUDE "mpp/shmem.fh" + +CALL SHMEM_INT4_WAIT(ivar, cmp_value) + +CALL SHMEM_INT4_WAIT_UNTIL(ivar, cmp, cmp_value) + +CALL SHMEM_INT8_WAIT(ivar, cmp_value) + +CALL SHMEM_INT8_WAIT_UNTIL(ivar, cmp, cmp_value) + +CALL SHMEM_WAIT(ivar, cmp_value) + +CALL SHMEM_WAIT_UNTIL(ivar, cmp, cmp_value) +.Ve +.SH DESCRIPTION + +shmem_wait and shmem_wait_until wait for \fBivar\fP +to be changed by a remote write +or atomic swap issued by a different processor. These routines can be used for point\-to\- +point directed synchronization. A call to shmem_wait does not return until some other +processor writes a value, not equal to cmp_value, into \fBivar\fP +on the waiting +processor. A call to shmem_wait_until does not return until some other processor changes +\fBivar\fP +to satisfy the condition implied by cmp and cmp_value. This mechanism is +useful when a processor needs to tell another processor that it has completed some action. +.PP +The arguments are as follows: +.TP +target +The remotely accessible integer data object to be updated on the remote PE. If +you are using C/C++, the type of target should match that implied in the SYNOPSIS section. +If you are using the Fortran compiler, it must be of type integer with an element size of 4 +bytes for SHMEM_INT4_ADD and 8 bytes for SHMEM_INT8_ADD. +.TP +value +The value to be atomically added to target. If you are using C/C++, the type of +value should match that implied in the SYNOPSIS section. If you are using Fortran, it must be +of type integer with an element size of target. +.TP +pe +An integer that indicates the PE number upon which target is to be updated. If you +are using Fortran, it must be a default integer value. +.TP +ivar +A remotely accessible integer variable that is being updated by another PE. If you +are using C/C++, the type of ivar should match that implied in the SYNOPSIS section. If you +are using Fortran, ivar must be a specific sized integer type according to +the function being called, as follows: +.RS +.TP +\fBshmem_wait, shmem_wait_until:\fP default INTEGER +.TP +\fBshmem_int4_wait, shmem_int4_wait_until:\fP INTEGER*4 +.TP +\fBshmem_int8_wait, shmem_int8_wait_until:\fP INTEGER*8 +.RE +.RS +.PP +.RE +.TP +cmp +The compare operator that compares ivar with cmp_value. cmp must be of type +integer. If you are using Fortran, it must be of default kind. If you are using C/C++, the type +of cmp should match that implied in the SYNOPSIS section. The following cmp values are +supported: +.RS +.TP +SHMEM_CMP_EQ +Equal +.TP +SHMEM_CMP_NE +Not equal +.TP +SHMEM_CMP_GT +Greater than +.TP +SHMEM_CMP_LE +Less than or equal to +.TP +SHMEM_CMP_LT +Less than +.TP +SHMEM_CMP_GE +Greater than or equal to +.RE +.RS +.PP +.RE +.TP +cmp_value +cmp_value must be of type integer. If you are using C/C++, the type of +cmp_value should match thatimplied in the SYNOPSIS section. If you are using Fortran, +cmp_value must be an integer of the same size and kind as ivar. +The shmem_wait routines return when ivar is no longer equal to cmp_value. +The shmem_wait_until routines return when the compare condition is true. The compare +condition is defined by the ivar argument compared with the cmp_value using the +comparison operator, cmp. +.PP +.SH EXAMPLES + +\fBExample 1:\fP +The following call returns when variable ivar is not equal to 100: +.Vb +INTEGER*8 IVAR + +CALL SHMEM_INT8_WAIT(IVAR, INT8(100)) +.Ve +\fBExample 2:\fP +The following call to SHMEM_INT8_WAIT_UNTIL is equivalent to the +call to SHMEM_INT8_WAIT in example 1: +.Vb +INTEGER*8 IVAR + +CALL SHMEM_INT8_WAIT_UNTIL(IVAR, SHMEM_CMP_NE, INT8(100)) +.Ve +\fBExample 3:\fP +The following C/C++ call waits until the sign bit in ivar is set by a +transfer from a remote PE: +.Vb +int ivar; + +shmem_int_wait_until(&ivar, SHMEM_CMP_LT, 0); +.Ve +\fBExample 4:\fP +The following Fortran example is in the context of a subroutine: +.Vb +SUBROUTINE EXAMPLE() + INTEGER FLAG_VAR + COMMON/FLAG/FLAG_VAR + . . . + FLAG_VAR = FLAG_VALUE ! initialize the event variable + . . . + IF (FLAG_VAR .EQ. FLAG_VALUE) THEN + CALL SHMEM_WAIT(FLAG_VAR, FLAG_VALUE) + ENDIF + FLAG_VAR = FLAG_VALUE ! reset the event variable for next time + . . . +END +.Ve +.SH SEE ALSO + +\fIintro_shmem\fP(3), +\fIshmem_put\fP(3) diff --git a/oshmem/shmem/man/man3/shmem_wait_until.3in b/oshmem/shmem/man/man3/shmem_wait_until.3in new file mode 100644 index 0000000000..03267ffbc5 --- /dev/null +++ b/oshmem/shmem/man/man3/shmem_wait_until.3in @@ -0,0 +1 @@ +.so man3/shmem_wait.3 diff --git a/oshmem/shmem/man/man3/shmemalign.3in b/oshmem/shmem/man/man3/shmemalign.3in new file mode 100644 index 0000000000..63a8ff4e8e --- /dev/null +++ b/oshmem/shmem/man/man3/shmemalign.3in @@ -0,0 +1 @@ +.so man3/shmalloc.3 diff --git a/oshmem/shmem/man/man3/shrealloc.3in b/oshmem/shmem/man/man3/shrealloc.3in new file mode 100644 index 0000000000..63a8ff4e8e --- /dev/null +++ b/oshmem/shmem/man/man3/shrealloc.3in @@ -0,0 +1 @@ +.so man3/shmalloc.3 diff --git a/oshmem/shmem/man/man3/start_pes.3in b/oshmem/shmem/man/man3/start_pes.3in new file mode 100644 index 0000000000..0901e38302 --- /dev/null +++ b/oshmem/shmem/man/man3/start_pes.3in @@ -0,0 +1,82 @@ +.\" -*- nroff -*- +.\" Copyright (c) 2015 University of Houston. All rights reserved. +.\" Copyright (c) 2015 Mellanox Technologies, Inc. +.\" $COPYRIGHT$ +.de Vb +.ft CW +.nf +.. +.de Ve +.ft R + +.fi +.. +.TH "START\\_PES" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" +.SH NAME + +\fIstart_pes\fP(3) +\- Allocates a block of memory from the symmetric heap. +.SH SYNOPSIS + +C or C++: +.Vb +void start_pes(int npes); +.Ve +Fortran: +.Vb +CALL START_PES(npes) +.Ve +.SH DESCRIPTION + +The start_pes routine should be the first statement in a SHMEM parallel program. +.PP +The start_pes routine accepts the following argument: +.TP +npes +Unused. Should be set to 0. +.PP +This routine initializes the SHMEM API, therefore it must be called before calling any +other SHMEM routine. +This routine is responsible inter alia for setting up the symmetric heap on the calling PE, and +the creation of the virtual PE numbers. Upon successful return from this routine, the calling PE +will be able to communicate with and transfer data to other PEs. +.PP +Multiple calls to this function are not allowed. +.PP +For an overview of programming with SHMEM communication routines, example SHMEM +programs, and instructions for compiling SHMEM programs, see the \fIintro_shmem\fP(3) +man page. +.SH EXAMPLES + +This is a simple program that calls \fIshmem_integer_put\fP(3): +.Vb +PROGRAM PUT + INCLUDE "mpp/shmem.fh" + + INTEGER TARG, SRC, RECEIVER, BAR + COMMON /T/ TARG + PARAMETER (RECEIVER=1) + + CALL START_PES(0) + IF (MY_PE() .EQ. 0) THEN + SRC = 33 + CALL SHMEM_INTEGER_PUT(TARG, SRC, 1, RECEIVER) + ENDIF + CALL SHMEM_BARRIER_ALL ! SYNCHRONIZES SENDER AND RECEIVER + IF (MY_PE() .EQ. RECEIVER) THEN + PRINT *,'PE ', MY_PE(),' TARG=',TARG,' (expect 33)' + ENDIF +END +.Ve +.SH NOTES + +If the start_pes call is not the first statement in a program, unexpected results may occur on +some architectures. +.SH SEE ALSO + +\fIintro_shmem\fP(3), +\fIshmem_barrier\fP(3), +\fIshmem_barrier_all\fP(3), +\fIshmem_put\fP(3), +\fImy_pe\fP(3I), +\fInum_pes\fP(3I)