diff --git a/oshmem/Makefile.am b/oshmem/Makefile.am
index 4fe3fb418c..ce2ffac368 100644
--- a/oshmem/Makefile.am
+++ b/oshmem/Makefile.am
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2013      Mellanox Technologies, Inc.
+# Copyright (c) 2013-2015 Mellanox Technologies, Inc.
 #                         All rights reserved.
 # Copyright (c) 2013-2014 Cisco Systems, Inc.  All rights reserved.
 # Copyright (c) 2014      Intel, Inc.  All rights reserved.
@@ -94,6 +94,25 @@ include proc/Makefile.am
 include request/Makefile.am
 include runtime/Makefile.am
 include shmem/Makefile.am
+include shmem/man/man3/Makefile.extra
 include mca/Makefile.am
 include tools/Makefile.am
 include util/Makefile.am
+
+# Ensure that the man page directory exists before we try to make man
+# page files (because ompi/mpi/man/man3 has no config.status-generated
+# Makefile)
+dir_stamp = $(top_builddir)/$(subdir)/shmem/man/man3/.dir-stamp
+
+# Also ensure that the man pages are rebuilt if the opal_config.h file
+# changes (e.g., configure was run again, meaning that the release
+# date or version may have changed)
+$(nodist_man_MANS): $(dir_stamp) $(top_builddir)/opal/include/opal_config.h
+
+$(dir_stamp):
+	$(MKDIR_P) `dirname $@`
+	touch "$@"
+
+# Remove the generated man pages
+distclean-local:
+	rm -f $(nodist_man_MANS) $(dir_stamp)
diff --git a/oshmem/shmem/Makefile.am b/oshmem/shmem/Makefile.am
index b8317e4f7b..14d9e4d9ff 100644
--- a/oshmem/shmem/Makefile.am
+++ b/oshmem/shmem/Makefile.am
@@ -7,6 +7,8 @@
 # $HEADER$
 #
 
+EXTRA_DIST =
+
 headers += shmem/shmem_api_logger.h \
            shmem/shmem_lock.h
 
diff --git a/oshmem/shmem/man/man3/Makefile.extra b/oshmem/shmem/man/man3/Makefile.extra
new file mode 100644
index 0000000000..965bbbb97c
--- /dev/null
+++ b/oshmem/shmem/man/man3/Makefile.extra
@@ -0,0 +1,180 @@
+# -*- makefile -*-
+# Copyright (c) 2015      Mellanox Technologies, Inc.
+# $COPYRIGHT$
+#
+# Additional copyrights may follow
+#
+# $HEADER$
+#
+
+include $(top_srcdir)/Makefile.ompi-rules
+
+shmem_api_man_pages = \
+        shmem/man/man3/intro_shmem.3 \
+        shmem/man/man3/OpenSHMEM.3 \
+        shmem/man/man3/start_pes.3 \
+        shmem/man/man3/_num_pes.3 \
+        shmem/man/man3/_my_pe.3 \
+        shmem/man/man3/shmem_pe_accessible.3 \
+        shmem/man/man3/shmem_addr_accessible.3 \
+        shmem/man/man3/shmalloc.3 \
+        shmem/man/man3/shmemalign.3 \
+        shmem/man/man3/shrealloc.3 \
+        shmem/man/man3/shfree.3 \
+        shmem/man/man3/shmem_ptr.3 \
+        shmem/man/man3/shmem_char_p.3 \
+        shmem/man/man3/shmem_short_p.3 \
+        shmem/man/man3/shmem_int_p.3 \
+        shmem/man/man3/shmem_long_p.3 \
+        shmem/man/man3/shmem_float_p.3 \
+        shmem/man/man3/shmem_double_p.3 \
+        shmem/man/man3/shmem_longlong_p.3 \
+        shmem/man/man3/shmem_longdouble_p.3 \
+        shmem/man/man3/shmem_char_put.3 \
+        shmem/man/man3/shmem_short_put.3 \
+        shmem/man/man3/shmem_int_put.3 \
+        shmem/man/man3/shmem_long_put.3 \
+        shmem/man/man3/shmem_float_put.3 \
+        shmem/man/man3/shmem_double_put.3 \
+        shmem/man/man3/shmem_longlong_put.3 \
+        shmem/man/man3/shmem_longdouble_put.3 \
+        shmem/man/man3/shmem_put32.3 \
+        shmem/man/man3/shmem_put64.3 \
+        shmem/man/man3/shmem_put128.3 \
+        shmem/man/man3/shmem_putmem.3 \
+        shmem/man/man3/shmem_short_iput.3 \
+        shmem/man/man3/shmem_int_iput.3 \
+        shmem/man/man3/shmem_long_iput.3 \
+        shmem/man/man3/shmem_float_iput.3 \
+        shmem/man/man3/shmem_double_iput.3 \
+        shmem/man/man3/shmem_longlong_iput.3 \
+        shmem/man/man3/shmem_longdouble_iput.3 \
+        shmem/man/man3/shmem_iput32.3 \
+        shmem/man/man3/shmem_iput64.3 \
+        shmem/man/man3/shmem_iput128.3 \
+        shmem/man/man3/shmem_char_g.3 \
+        shmem/man/man3/shmem_short_g.3 \
+        shmem/man/man3/shmem_int_g.3 \
+        shmem/man/man3/shmem_long_g.3 \
+        shmem/man/man3/shmem_float_g.3 \
+        shmem/man/man3/shmem_double_g.3 \
+        shmem/man/man3/shmem_longlong_g.3 \
+        shmem/man/man3/shmem_longdouble_g.3 \
+        shmem/man/man3/shmem_char_get.3 \
+        shmem/man/man3/shmem_short_get.3 \
+        shmem/man/man3/shmem_int_get.3 \
+        shmem/man/man3/shmem_long_get.3 \
+        shmem/man/man3/shmem_float_get.3 \
+        shmem/man/man3/shmem_double_get.3 \
+        shmem/man/man3/shmem_longlong_get.3 \
+        shmem/man/man3/shmem_longdouble_get.3 \
+        shmem/man/man3/shmem_get32.3 \
+        shmem/man/man3/shmem_get64.3 \
+        shmem/man/man3/shmem_get128.3 \
+        shmem/man/man3/shmem_getmem.3 \
+        shmem/man/man3/shmem_short_iget.3 \
+        shmem/man/man3/shmem_int_iget.3 \
+        shmem/man/man3/shmem_long_iget.3 \
+        shmem/man/man3/shmem_float_iget.3 \
+        shmem/man/man3/shmem_double_iget.3 \
+        shmem/man/man3/shmem_longlong_iget.3 \
+        shmem/man/man3/shmem_longdouble_iget.3 \
+        shmem/man/man3/shmem_iget32.3 \
+        shmem/man/man3/shmem_iget64.3 \
+        shmem/man/man3/shmem_iget128.3 \
+        shmem/man/man3/shmem_swap.3 \
+        shmem/man/man3/shmem_int_swap.3 \
+        shmem/man/man3/shmem_long_swap.3 \
+        shmem/man/man3/shmem_longlong_swap.3 \
+        shmem/man/man3/shmem_float_swap.3 \
+        shmem/man/man3/shmem_double_swap.3 \
+        shmem/man/man3/shmem_int_cswap.3 \
+        shmem/man/man3/shmem_long_cswap.3 \
+        shmem/man/man3/shmem_longlong_cswap.3 \
+        shmem/man/man3/shmem_int_fadd.3 \
+        shmem/man/man3/shmem_long_fadd.3 \
+        shmem/man/man3/shmem_longlong_fadd.3 \
+        shmem/man/man3/shmem_int_finc.3 \
+        shmem/man/man3/shmem_long_finc.3 \
+        shmem/man/man3/shmem_longlong_finc.3 \
+        shmem/man/man3/shmem_int_add.3 \
+        shmem/man/man3/shmem_long_add.3 \
+        shmem/man/man3/shmem_longlong_add.3 \
+        shmem/man/man3/shmem_int_inc.3 \
+        shmem/man/man3/shmem_long_inc.3 \
+        shmem/man/man3/shmem_longlong_inc.3 \
+        shmem/man/man3/shmem_set_lock.3 \
+        shmem/man/man3/shmem_clear_lock.3 \
+        shmem/man/man3/shmem_test_lock.3 \
+        shmem/man/man3/shmem_wait.3 \
+        shmem/man/man3/shmem_short_wait.3 \
+        shmem/man/man3/shmem_int_wait.3 \
+        shmem/man/man3/shmem_long_wait.3 \
+        shmem/man/man3/shmem_longlong_wait.3 \
+        shmem/man/man3/shmem_wait_until.3 \
+        shmem/man/man3/shmem_short_wait_until.3 \
+        shmem/man/man3/shmem_int_wait_until.3 \
+        shmem/man/man3/shmem_long_wait_until.3 \
+        shmem/man/man3/shmem_longlong_wait_until.3 \
+        shmem/man/man3/shmem_barrier.3 \
+        shmem/man/man3/shmem_barrier_all.3 \
+        shmem/man/man3/shmem_fence.3 \
+        shmem/man/man3/shmem_quiet.3 \
+        shmem/man/man3/shmem_broadcast32.3 \
+        shmem/man/man3/shmem_broadcast64.3 \
+        shmem/man/man3/shmem_collect32.3 \
+        shmem/man/man3/shmem_collect64.3 \
+        shmem/man/man3/shmem_fcollect32.3 \
+        shmem/man/man3/shmem_fcollect64.3 \
+        shmem/man/man3/shmem_short_and_to_all.3 \
+        shmem/man/man3/shmem_int_and_to_all.3 \
+        shmem/man/man3/shmem_long_and_to_all.3 \
+        shmem/man/man3/shmem_longlong_and_to_all.3 \
+        shmem/man/man3/shmem_short_or_to_all.3 \
+        shmem/man/man3/shmem_int_or_to_all.3 \
+        shmem/man/man3/shmem_long_or_to_all.3 \
+        shmem/man/man3/shmem_longlong_or_to_all.3 \
+        shmem/man/man3/shmem_short_xor_to_all.3 \
+        shmem/man/man3/shmem_int_xor_to_all.3 \
+        shmem/man/man3/shmem_long_xor_to_all.3 \
+        shmem/man/man3/shmem_longlong_xor_to_all.3 \
+        shmem/man/man3/shmem_short_max_to_all.3 \
+        shmem/man/man3/shmem_int_max_to_all.3 \
+        shmem/man/man3/shmem_long_max_to_all.3 \
+        shmem/man/man3/shmem_longlong_max_to_all.3 \
+        shmem/man/man3/shmem_float_max_to_all.3 \
+        shmem/man/man3/shmem_double_max_to_all.3 \
+        shmem/man/man3/shmem_longdouble_max_to_all.3 \
+        shmem/man/man3/shmem_short_min_to_all.3 \
+        shmem/man/man3/shmem_int_min_to_all.3 \
+        shmem/man/man3/shmem_long_min_to_all.3 \
+        shmem/man/man3/shmem_longlong_min_to_all.3 \
+        shmem/man/man3/shmem_float_min_to_all.3 \
+        shmem/man/man3/shmem_double_min_to_all.3 \
+        shmem/man/man3/shmem_longdouble_min_to_all.3 \
+        shmem/man/man3/shmem_short_sum_to_all.3 \
+        shmem/man/man3/shmem_int_sum_to_all.3 \
+        shmem/man/man3/shmem_long_sum_to_all.3 \
+        shmem/man/man3/shmem_longlong_sum_to_all.3 \
+        shmem/man/man3/shmem_float_sum_to_all.3 \
+        shmem/man/man3/shmem_double_sum_to_all.3 \
+        shmem/man/man3/shmem_complexf_sum_to_all.3 \
+        shmem/man/man3/shmem_complexd_sum_to_all.3 \
+        shmem/man/man3/shmem_short_prod_to_all.3 \
+        shmem/man/man3/shmem_int_prod_to_all.3 \
+        shmem/man/man3/shmem_long_prod_to_all.3 \
+        shmem/man/man3/shmem_longlong_prod_to_all.3 \
+        shmem/man/man3/shmem_float_prod_to_all.3 \
+        shmem/man/man3/shmem_double_prod_to_all.3 \
+        shmem/man/man3/shmem_longdouble_prod_to_all.3 \
+        shmem/man/man3/shmem_complexf_prod_to_all.3 \
+        shmem/man/man3/shmem_complexd_prod_to_all.3 \
+        shmem/man/man3/shmem_udcflush.3 \
+        shmem/man/man3/shmem_udcflush_line.3 \
+        shmem/man/man3/shmem_set_cache_inv.3 \
+        shmem/man/man3/shmem_set_cache_line_inv.3 \
+        shmem/man/man3/shmem_clear_cache_inv.3 \
+        shmem/man/man3/shmem_clear_cache_line_inv.3
+
+nodist_man_MANS += $(shmem_api_man_pages)
+EXTRA_DIST += $(shmem_api_man_pages:.3=.3in)
diff --git a/oshmem/shmem/man/man3/OpenSHMEM.3in b/oshmem/shmem/man/man3/OpenSHMEM.3in
new file mode 100644
index 0000000000..97469c502b
--- /dev/null
+++ b/oshmem/shmem/man/man3/OpenSHMEM.3in
@@ -0,0 +1 @@
+.so man3/intro_shmem.3
diff --git a/oshmem/shmem/man/man3/_my_pe.3in b/oshmem/shmem/man/man3/_my_pe.3in
new file mode 100644
index 0000000000..ddc43b4ff7
--- /dev/null
+++ b/oshmem/shmem/man/man3/_my_pe.3in
@@ -0,0 +1,41 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "MY\\_PE" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+my_pe, _my_pe, shmem_my_pe \- Returns the virtual PE number of the calling PE.
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+int _my_pe (void);
+int shmem_my_pe (void);
+.Ve
+Fortran:
+.Vb
+include 'mpp/shmem.fh'
+I = MY_PE ()
+I = SHMEM_MY_PE ()
+.Ve
+.SH DESCRIPTION
+
+my_pe() or shmem_my_pe() return the processing element (PE) number of the calling PE. It accepts no
+arguments. The result is an integer between 0 and npes \- 1, where npes is the total
+number of PEs executing the current program.
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3),
+\fInum_pes\fP(3),
+\fIstart_pes\fP(3)
diff --git a/oshmem/shmem/man/man3/_num_pes.3in b/oshmem/shmem/man/man3/_num_pes.3in
new file mode 100644
index 0000000000..21ab515dfd
--- /dev/null
+++ b/oshmem/shmem/man/man3/_num_pes.3in
@@ -0,0 +1,39 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "NUM\\_PES" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+num_pes, _num_pes, shmem_n_pes \- Returns the number of processing elements (PEs) used to run the application.
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+int _num_pes (void);
+int shmem_n_pes (void);
+.Ve
+Fortran:
+.Vb
+include 'mpp/shmem.fh'
+I = NUM_PES ()
+I = SHMEM_N_PES ()
+.Ve
+.SH DESCRIPTION
+
+num_pes() or shmem_n_pes() return the total number of PEs running in an application.
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3),
+\fImy_pe\fP(3),
+\fIstart_pes\fP(3)
diff --git a/oshmem/shmem/man/man3/intro_shmem.3in b/oshmem/shmem/man/man3/intro_shmem.3in
new file mode 100644
index 0000000000..caef30b881
--- /dev/null
+++ b/oshmem/shmem/man/man3/intro_shmem.3in
@@ -0,0 +1,1312 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "INTRO\\_SHMEM" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+intro_shmem \- Introduction to the OpenSHMEM programming model
+.PP
+.SH DESCRIPTION
+
+The SHMEM programming model consists of library routines that provide low\-latency,
+high\-bandwidth communication for use in highly parallelized scalable programs. The
+routines in the OpenSHMEM application programming interface (API) provide a programming
+model for exchanging data between cooperating parallel processes. The resulting programs
+are similar in style to Message Passing Interface (MPI) programs. The SHMEM API can
+be used either alone or in combination with MPI routines in the same parallel program.
+.PP
+An OpenSHMEM program is SPMD (single program, multiple data) in style. The SHMEM
+processes, called processing elements or PEs, all start at the same time and they all run the
+same program. Usually the PEs perform computation on their own subdomains of the larger
+problem and periodically communicate with other PEs to exchange information on
+which the next computation phase depends.
+.PP
+The OpenSHMEM routines minimize the overhead associated with data transfer requests,
+maximize bandwidth and minimize data latency. Data latency is the period of time that
+starts when a PE initiates a transfer of data and ends when a PE can use the data.
+OpenSHMEM routines support remote data transfer through put operations, which transfer
+data to a different PE, get operations, which transfer data from a different PE, and remote
+pointers, which allow direct references to data objects owned by another PE. Other
+operations supported are collective broadcast and reduction, barrier synchronization, and
+atomic memory operations. An atomic memory operation is an atomic read\-and\-update
+operation, such as a fetch\-and\-increment, on a remote or local data object.
+.PP
+.SH OPENSHMEM ROUTINES
+
+This section lists the significant OpenSHMEM message\-passing routines.
+.TP
+PE queries
+.PP
+.RS
+.TP
+.B *
+C/C++ only:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fI_num_pes\fP(3)
+.TP
+.B *
+\fI_my_pe\fP(3)
+.RE
+.RS
+.PP
+.RE
+.TP
+.B *
+Fortran only:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fINUM_PES\fP(3)
+.TP
+.B *
+\fIMY_PE\fP(3)
+.RE
+.RS
+.PP
+.RE
+.RE
+.PP
+.RE
+.TP
+Elemental data put routines
+.PP
+.RS
+.TP
+.B *
+C/C++ only:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_double_p\fP(3)
+.TP
+.B *
+\fIshmem_float_p\fP(3)
+.TP
+.B *
+\fIshmem_int_p\fP(3)
+.TP
+.B *
+\fIshmem_long_p\fP(3)
+.TP
+.B *
+\fIshmem_short_p.\fP(3)
+.RE
+.RS
+.PP
+.RE
+.RE
+.PP
+.RE
+.TP
+Block data put routines
+.PP
+.RS
+.TP
+.B *
+C/C++ and Fortran:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_put32\fP(3)
+.TP
+.B *
+\fIshmem_put64\fP(3)
+.TP
+.B *
+\fIshmem_put128\fP(3)
+.RE
+.RS
+.PP
+.RE
+.TP
+.B *
+C/C++ only:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_double_put\fP(3)
+.TP
+.B *
+\fIshmem_float_put\fP(3)
+.TP
+.B *
+\fIshmem_int_put\fP(3)
+.TP
+.B *
+\fIshmem_long_put\fP(3)
+.TP
+.B *
+\fIshmem_short_put.\fP(3)
+.RE
+.RS
+.PP
+.RE
+.TP
+.B *
+Fortran only:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_complex_put\fP(3)
+.TP
+.B *
+\fIshmem_integer_put\fP(3)
+.TP
+.B *
+\fIshmem_logical_put\fP(3)
+.TP
+.B *
+\fIshmem_real_put\fP(3)
+.RE
+.RS
+.PP
+.RE
+.RE
+.PP
+.RE
+.TP
+Elemental data get routines
+.PP
+.RS
+.TP
+.B *
+C/C++ only:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_double_g\fP(3)
+.TP
+.B *
+\fIshmem_float_g\fP(3)
+.TP
+.B *
+\fIshmem_int_g\fP(3)
+.TP
+.B *
+\fIshmem_long_g\fP(3)
+.TP
+.B *
+\fIshmem_short_g\fP(3)
+.RE
+.RS
+.PP
+.RE
+.RE
+.PP
+.RE
+.TP
+Block data get routines
+.RS
+.TP
+.B *
+C/C++ and Fortran:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_get32\fP(3)
+.TP
+.B *
+\fIshmem_get64\fP(3)
+.TP
+.B *
+\fIshmem_get128\fP(3)
+.RE
+.RS
+.PP
+.RE
+.TP
+.B *
+C/C++ only:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_double_get\fP(3)
+.TP
+.B *
+\fIshmem_float_get\fP(3)
+.TP
+.B *
+\fIshmem_int_get\fP(3)
+.TP
+.B *
+\fIshmem_long_get\fP(3)
+.TP
+.B *
+\fIshmem_short_get\fP(3)
+.RE
+.RS
+.PP
+.RE
+.TP
+.B *
+Fortran only:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_complex_get\fP(3)
+.TP
+.B *
+\fIshmem_integer_get\fP(3)
+.TP
+.B *
+\fIshmem_logical_get\fP(3)
+.TP
+.B *
+\fIshmem_real_get\fP(3)
+.RE
+.RS
+.PP
+.RE
+.RE
+.PP
+.RE
+.TP
+Strided put routines
+.RS
+.TP
+.B *
+C/C++ and Fortran:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_iput32\fP(3)
+.TP
+.B *
+\fIshmem_iput64\fP(3)
+.TP
+.B *
+\fIshmem_iput128\fP(3)
+.RE
+.RS
+.PP
+.RE
+.TP
+.B *
+C/C++ only:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_double_iput\fP(3)
+.TP
+.B *
+\fIshmem_float_iput\fP(3)
+.TP
+.B *
+\fIshmem_int_iput\fP(3)
+.TP
+.B *
+\fIshmem_long_iput\fP(3)
+.TP
+.B *
+\fIshmem_short_iput\fP(3)
+.RE
+.RS
+.PP
+.RE
+.TP
+.B *
+Fortran only:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_complex_iput\fP(3)
+.TP
+.B *
+\fIshmem_integer_iput\fP(3)
+.TP
+.B *
+\fIshmem_logical_iput\fP(3)
+.TP
+.B *
+\fIshmem_real_iput\fP(3)
+.RE
+.RS
+.PP
+.RE
+.RE
+.PP
+.RE
+.TP
+Strided get routines
+.PP
+.RS
+.TP
+.B *
+C/C++ and Fortran:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_iget32\fP(3)
+.TP
+.B *
+\fIshmem_iget64\fP(3)
+.TP
+.B *
+\fIshmem_iget128\fP(3)
+.RE
+.RS
+.PP
+.RE
+.TP
+.B *
+C/C++ only:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_double_iget\fP(3)
+.TP
+.B *
+\fIshmem_float_iget\fP(3)
+.TP
+.B *
+\fIshmem_int_iget\fP(3)
+.TP
+.B *
+\fIshmem_long_iget\fP(3)
+.TP
+.B *
+\fIshmem_short_iget\fP(3)
+.RE
+.RS
+.PP
+.RE
+.TP
+.B *
+Fortran only:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_complex_iget\fP(3)
+.TP
+.B *
+\fIshmem_integer_iget\fP(3)
+.TP
+.B *
+\fIshmem_logical_iget\fP(3)
+.TP
+.B *
+\fIshmem_real_iget\fP(3)
+.RE
+.RS
+.PP
+.RE
+.RE
+.PP
+.RE
+.TP
+Point\-to\-point synchronization routines
+.RS
+.TP
+.B *
+C/C++ only:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_int_wait\fP(3)
+.TP
+.B *
+\fIshmem_int_wait_until\fP(3)
+.TP
+.B *
+\fIshmem_long_wait\fP(3)
+.TP
+.B *
+\fIshmem_long_wait_until\fP(3)
+.TP
+.B *
+\fIshmem_longlong_wait\fP(3)
+.TP
+.B *
+\fIshmem_longlong_wait_until\fP(3)
+.TP
+.B *
+\fIshmem_short_wait\fP(3)
+.TP
+.B *
+\fIshmem_short_wait_until\fP(3)
+.RE
+.RS
+.PP
+.RE
+.TP
+.B *
+Fortran:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_int4_wait\fP(3)
+.TP
+.B *
+\fIshmem_int4_wait_until\fP(3)
+.TP
+.B *
+\fIshmem_int8_wait\fP(3)
+.TP
+.B *
+\fIshmem_int8_wait_until\fP(3)
+.RE
+.RS
+.PP
+.RE
+.RE
+.PP
+.RE
+.TP
+Barrier synchronization routines
+.PP
+.RS
+.TP
+.B *
+C/C++ and Fortran:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_barrier_all\fP(3)
+.TP
+.B *
+\fIshmem_barrier\fP(3)
+.RE
+.RS
+.PP
+.RE
+.RE
+.PP
+.RE
+.TP
+Atomic memory fetch\-and\-operate (fetch\-op) routines
+.RS
+.TP
+.B *
+C/C++ and Fortran:
+.RS
+.TP
+.B *
+shmem_swap
+.RE
+.RS
+.PP
+.RE
+.RE
+.PP
+.RE
+.TP
+Reduction routines
+.RS
+.TP
+.B *
+C/C++ only:
+.RS
+.TP
+.B *
+\fIshmem_int_and_to_all\fP(3)
+.TP
+.B *
+\fIshmem_long_and_to_all\fP(3)
+.TP
+.B *
+\fIshmem_longlong_and_to_all\fP(3)
+.TP
+.B *
+\fIshmem_short_and_to_all\fP(3)
+.TP
+.B *
+\fIshmem_double_max_to_all\fP(3)
+.TP
+.B *
+\fIshmem_float_max_to_all\fP(3)
+.TP
+.B *
+\fIshmem_int_max_to_all\fP(3)
+.TP
+.B *
+\fIshmem_long_max_to_all\fP(3)
+.TP
+.B *
+\fIshmem_longlong_max_to_all\fP(3)
+.TP
+.B *
+\fIshmem_short_max_to_all\fP(3)
+.TP
+.B *
+\fIshmem_double_min_to_all\fP(3)
+.TP
+.B *
+\fIshmem_float_min_to_all\fP(3)
+.TP
+.B *
+\fIshmem_int_min_to_all\fP(3)
+.TP
+.B *
+\fIshmem_long_min_to_all\fP(3)
+.TP
+.B *
+\fIshmem_longlong_min_to_all\fP(3)
+.TP
+.B *
+\fIshmem_short_min_to_all\fP(3)
+.TP
+.B *
+\fIshmem_double_sum_to_all\fP(3)
+.TP
+.B *
+\fIshmem_float_sum_to_all\fP(3)
+.TP
+.B *
+\fIshmem_int_sum_to_all\fP(3)
+.TP
+.B *
+\fIshmem_long_sum_to_all\fP(3)
+.TP
+.B *
+\fIshmem_longlong_sum_to_all\fP(3)
+.TP
+.B *
+\fIshmem_short_sum_to_all\fP(3)
+.TP
+.B *
+\fIshmem_double_prod_to_all\fP(3)
+.TP
+.B *
+\fIshmem_float_prod_to_all\fP(3)
+.TP
+.B *
+\fIshmem_int_prod_to_all\fP(3)
+.TP
+.B *
+\fIshmem_long_prod_to_all\fP(3)
+.TP
+.B *
+\fIshmem_longlong_prod_to_all\fP(3)
+.TP
+.B *
+\fIshmem_short_prod_to_all\fP(3)
+.TP
+.B *
+\fIshmem_int_or_to_all\fP(3)
+.TP
+.B *
+\fIshmem_long_or_to_all\fP(3)
+.TP
+.B *
+\fIshmem_longlong_or_to_all\fP(3)
+.TP
+.B *
+\fIshmem_short_or_to_all\fP(3)
+.TP
+.B *
+\fIshmem_int_xor_to_all\fP(3)
+.TP
+.B *
+\fIshmem_long_xor_to_all\fP(3)
+.TP
+.B *
+\fIshmem_longlong_xor_to_all\fP(3)
+.TP
+.B *
+\fIshmem_short_xor_to_all\fP(3)
+.RE
+.RS
+.PP
+.RE
+.TP
+.B *
+Fortran only:
+.RS
+.TP
+.B *
+\fIshmem_int4_and_to_all\fP(3)
+.TP
+.B *
+\fIshmem_int8_and_to_all\fP(3)
+.TP
+.B *
+\fIshmem_real4_max_to_all\fP(3)
+.TP
+.B *
+\fIshmem_real8_max_to_all\fP(3)
+.TP
+.B *
+\fIshmem_int4_max_to_all\fP(3)
+.TP
+.B *
+\fIshmem_int8_max_to_all\fP(3)
+.TP
+.B *
+\fIshmem_real4_min_to_all\fP(3)
+.TP
+.B *
+\fIshmem_real8_min_to_all\fP(3)
+.TP
+.B *
+\fIshmem_int4_min_to_all\fP(3)
+.TP
+.B *
+\fIshmem_int8_min_to_all\fP(3)
+.TP
+.B *
+\fIshmem_real4_sum_to_all\fP(3)
+.TP
+.B *
+\fIshmem_real8_sum_to_all\fP(3)
+.TP
+.B *
+\fIshmem_int4_sum_to_all\fP(3)
+.TP
+.B *
+\fIshmem_int8_sum_to_all\fP(3)
+.TP
+.B *
+\fIshmem_real4_prod_to_all\fP(3)
+.TP
+.B *
+\fIshmem_real8_prod_to_all\fP(3)
+.TP
+.B *
+\fIshmem_int4_prod_to_all\fP(3)
+.TP
+.B *
+\fIshmem_int8_prod_to_all\fP(3)
+.TP
+.B *
+\fIshmem_int4_or_to_all\fP(3)
+.TP
+.B *
+\fIshmem_int8_or_to_all\fP(3)
+.TP
+.B *
+\fIshmem_int4_xor_to_all\fP(3)
+.TP
+.B *
+\fIshmem_int8_xor_to_all\fP(3)
+.RE
+.RS
+.PP
+.RE
+.RE
+.PP
+.RE
+.TP
+Broadcast routines
+.PP
+.RS
+.TP
+.B *
+C/C++ and Fortran:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_broadcast32\fP(3)
+.TP
+.B *
+\fIshmem_broadcast64\fP(3)
+.RE
+.RS
+.PP
+.RE
+.RE
+.PP
+.RE
+.TP
+Cache management routines
+.PP
+.RS
+.TP
+.B *
+C/C++ and Fortran:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_udcflush\fP(3)
+.TP
+.B *
+\fIshmem_udcflush_line\fP(3)
+.RE
+.RS
+.PP
+.RE
+.RE
+.PP
+.RE
+.TP
+Byte\-granularity block put routines
+.PP
+.RS
+.TP
+.B *
+C/C++ and Fortran
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_putmem\fP(3)
+.TP
+.B *
+\fIshmem_getmem\fP(3)
+.RE
+.RS
+.PP
+.RE
+.TP
+.B *
+Fortran only:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_character_put\fP(3)
+.TP
+.B *
+\fIshmem_character_get\fP(3)
+.RE
+.RS
+.PP
+.RE
+.RE
+.PP
+.RE
+.TP
+Collect routines
+.RS
+.TP
+.B *
+C/C++ and Fortran:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_collect32\fP(3)
+.TP
+.B *
+\fIshmem_collect64\fP(3)
+.TP
+.B *
+\fIshmem_fcollect32\fP(3)
+.TP
+.B *
+\fIshmem_fcollect64\fP(3)
+.RE
+.RS
+.PP
+.RE
+.RE
+.PP
+.RE
+.TP
+Atomic memory fetch\-and\-operate (fetch\-op) routines
+.RS
+.TP
+.B *
+C/C++ only:
+.RS
+.TP
+.B *
+\fIshmem_double_swap\fP(3)
+.TP
+.B *
+\fIshmem_float_swap\fP(3)
+.TP
+.B *
+\fIshmem_int_cswap\fP(3)
+.TP
+.B *
+\fIshmem_int_fadd\fP(3)
+.TP
+.B *
+\fIshmem_int_finc\fP(3)
+.TP
+.B *
+\fIshmem_int_swap\fP(3)
+.TP
+.B *
+\fIshmem_long_cswap\fP(3)
+.TP
+.B *
+\fIshmem_long_fadd\fP(3)
+.TP
+.B *
+\fIshmem_long_finc\fP(3)
+.TP
+.B *
+\fIshmem_long_swap\fP(3)
+.TP
+.B *
+\fIshmem_longlong_cswap\fP(3)
+.TP
+.B *
+\fIshmem_longlong_fadd\fP(3)
+.TP
+.B *
+\fIshmem_longlong_finc\fP(3)
+.TP
+.B *
+\fIshmem_longlong_swap\fP(3)
+.RE
+.RS
+.PP
+.RE
+.TP
+.B *
+Fortran only:
+.RS
+.TP
+.B *
+\fIshmem_int4_cswap\fP(3)
+.TP
+.B *
+\fIshmem_int4_fadd\fP(3)
+.TP
+.B *
+\fIshmem_int4_finc\fP(3)
+.TP
+.B *
+\fIshmem_int4_swap\fP(3)
+.TP
+.B *
+\fIshmem_int8_swap\fP(3)
+.TP
+.B *
+\fIshmem_real4_swap\fP(3)
+.TP
+.B *
+\fIshmem_real8_swap\fP(3)
+.TP
+.B *
+\fIshmem_int8_cswap\fP(3)
+.RE
+.RS
+.PP
+.RE
+.RE
+.PP
+.RE
+.TP
+Atomic memory operation routines
+.RS
+.TP
+.B *
+Fortran only:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_int4_add\fP(3)
+.TP
+.B *
+\fIshmem_int4_inc\fP(3)
+.RE
+.RS
+.PP
+.RE
+.RE
+.PP
+.RE
+.TP
+Remote memory pointer function
+.RS
+.TP
+.B *
+C/C++ and Fortran:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_ptr\fP(3)
+.RE
+.RS
+.PP
+.RE
+.RE
+.PP
+.RE
+.TP
+Reduction routines
+.RS
+.TP
+.B *
+C/C++ only:
+.RS
+.TP
+.B *
+\fIshmem_longdouble_max_to_all\fP(3)
+.TP
+.B *
+\fIshmem_longdouble_min_to_all\fP(3)
+.TP
+.B *
+\fIshmem_longdouble_prod_to_all\fP(3)
+.TP
+.B *
+\fIshmem_longdouble_sum_to_all\fP(3)
+.RE
+.RS
+.PP
+.RE
+.TP
+.B *
+Fortran only:
+.RS
+.PP
+.RS
+.RE
+.TP
+.B *
+\fIshmem_real16_max_to_all\fP(3)
+.TP
+.B *
+\fIshmem_real16_min_to_all\fP(3)
+.TP
+.B *
+\fIshmem_real16_prod_to_all\fP(3)
+.TP
+.B *
+\fIshmem_real16_sum_to_all\fP(3)
+.RE
+.RS
+.PP
+.RE
+.RE
+.PP
+.RE
+.TP
+Accessibility query routines
+.RS
+.TP
+.B *
+C/C++ and Fortran:
+.RS
+.TP
+.B *
+\fIshmem_pe_accessible\fP(3)
+.TP
+.B *
+\fIshmem_addr_accessible\fP(3)
+.RE
+.RS
+.PP
+.RE
+.RE
+.PP
+.RE
+.TP
+Symmetric Data Objects
+.PP
+Consistent with the SPMD nature of the OpenSHMEM programming model is the
+concept of symmetric data objects. These are arrays or variables that
+exist with the same size, type, and relative address on all PEs.
+Another term for symmetric data objects is "remotely accessible data objects".
+In the interface definitions for OpenSHMEM data transfer routines, one or more of the
+parameters are typically required to be symmetric or remotely accessible.
+.PP
+The following kinds of data objects are symmetric:
+.RS
+.TP
+.B *
+Fortran data objects in common blocks or with the SAVE attribute. These data
+objects must not be defined in a dynamic shared object (DSO).
+.TP
+.B *
+Non\-stack C and C++ variables. These data objects must not be defined in a DSO.
+.TP
+.B *
+Fortran arrays allocated with \fIshpalloc\fP(3F)
+.TP
+.B *
+C and C++ data allocated by \fIshmalloc\fP(3C)
+.RE
+.RS
+.PP
+.RE
+.TP
+Collective Routines
+Some SHMEM routines, for example, \fIshmem_broadcast\fP(3)
+and
+\fIshmem_float_sum_to_all\fP(3),
+are classified as collective routines
+because they distribute work across a set of PEs.
+They must be called concurrently by all PEs in the active set defined by the PE_start,
+logPE_stride, PE_size argument triplet. The following man pages describe the OpenSHMEM
+collective routines:
+.RS
+.TP
+.B *
+\fIshmem_and\fP(3)
+.TP
+.B *
+\fIshmem_barrier\fP(3)
+.TP
+.B *
+\fIshmem_broadcast\fP(3)
+.TP
+.B *
+\fIshmem_collect\fP(3)
+.TP
+.B *
+\fIshmem_max\fP(3)
+.TP
+.B *
+\fIshmem_min\fP(3)
+.TP
+.B *
+\fIshmem_or\fP(3)
+.TP
+.B *
+\fIshmem_prod\fP(3)
+.TP
+.B *
+\fIshmem_sum\fP(3)
+.TP
+.B *
+\fIshmem_xor\fP(3)
+.RE
+.RS
+.PP
+.RE
+.PP
+.SH USING THE SYMMETRIC WORK ARRAY, PSYNC
+
+Multiple pSync arrays are often needed if a particular PE calls as OpenSHMEM collective
+routine twice without intervening barrier synchronization. Problems would occur if some PEs
+in the active set for call 2 arrive at call 2 before processing of call 1 is complete by all PEs in
+the call 1 active set. You can use \fIshmem_barrier\fP(3)
+or \fIshmem_barrier_all\fP(3)
+to perform a barrier synchronization between consecutive calls to OpenSHMEM collective
+routines.
+.PP
+There are two special cases:
+.RE
+.TP
+.B *
+The \fIshmem_barrier\fP(3) routine allows the same pSync array to be used on
+consecutive calls as long as the active PE set does not change.
+.TP
+.B *
+If the same collective routine is called multiple times with the same active set, the
+calls may alternate between two pSync arrays. The SHMEM routines guarantee that a
+first call is completely finished by all PEs by the time processing of a third call begins
+on any PE.
+.PP
+Because the SHMEM routines restore pSync to its original contents, multiple calls that
+use the same pSync array do not require that pSync be reinitialized after the first call.
+.PP
+.SH SHMEM ENVIRONMENT VARIABLES
+
+This section lists the significant SHMEM environment variables.
+.TP
+.B *
+\fBSMA_VERSION\fP print the library version at start\-up.
+.TP
+.B *
+\fBSMA_INFO\fP print helpful text about all these environment variables.
+.TP
+.B *
+\fBSMA_SYMMETRIC_SIZE\fP number of bytes to allocate for the symmetric heap.
+.TP
+.B *
+\fBSMA_DEBUG\fP enable debugging messages.
+.PP
+The first call to SHMEM must be \fIstart_pes\fP(3)\&.
+This routines initialize the SHMEM runtime.
+.PP
+Calling any other SHMEM routines beforehand has undefined behavior. Multiple calls
+to this routine is not allowed.
+.PP
+.SH COMPILING AND RUNNING OPENSHMEM PROGRAMS
+
+The OpenSHMEM specification is silent regarding how OpenSHMEM programs are compiled,
+linked and run. This section shows some examples of how wrapper programs could be utilized
+to compile and launch applications. The commands are styled after wrapper programs
+found in many MPI implementations.
+.PP
+The following sample command line demonstrates running an OpenSHMEM Program using a wrapper script (\fBoshrun\fP
+in this case):
+.PP
+.TP
+.B *
+C/C++:
+.Vb
+oshcc c_program.c
+.Ve
+.TP
+.B *
+FORTRAN:
+.Vb
+oshfort fortran_program.f
+.Ve
+.PP
+The following sample command line demonstrates running an OpenSHMEM Program assuming that the library provides a wrapper script for such purpose
+(named \fBoshrun\fP
+for this example):
+.PP
+.Vb
+oshrun \-np 32 ./a.out
+.Ve
+.PP
+.SH EXAMPLES
+
+\fBExample 1\fP:
+The following Fortran OpenSHMEM program directs all PEs to sum
+simultaneously the numbers in the VALUES variable across all PEs:
+.Vb
+PROGRAM REDUCTION
+  REAL VALUES, SUM
+  COMMON /C/ VALUES
+  REAL WORK
+
+  CALL START_PES(0)
+  VALUES = MY_PE()
+  CALL SHMEM_BARRIER_ALL ! Synchronize all PEs
+  SUM = 0.0
+  DO I = 0, NUM_PES()\-1
+    CALL SHMEM_REAL_GET(WORK, VALUES, 1, I) ! Get next value
+    SUM = SUM + WORK                ! Sum it
+  ENDDO
+  PRINT *, 'PE ', MY_PE(), ' COMPUTED SUM=', SUM
+  CALL SHMEM_BARRIER_ALL
+END
+.Ve
+\fBExample 2\fP:
+The following C OpenSHMEM program transfers an array of 10 longs from
+PE 0 to PE 1:
+.Vb
+#include <mpp/shmem.h>
+
+main() {
+  long source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+  static long target[10];
+
+  start_pes(0);
+  if (_my_pe() == 0) {
+    /* put 10 elements into target on PE 1 */
+    shmem_long_put(target, source, 10, 1);
+  }
+  shmem_barrier_all(); /* sync sender and receiver */
+  if (_my_pe() == 1)
+    printf("target[0] on PE %d is %d\\n", _my_pe(), target[0]);
+}
+.Ve
+.SH SEE ALSO
+
+The following man pages also contain information on OpenSHMEM routines. See the
+specific man pages for implementation information.
+.PP
+\fIshmem_add\fP(3),
+\fIshmem_and\fP(3),
+\fIshmem_barrier\fP(3),
+\fIshmem_barrier_all\fP(3),
+\fIshmem_broadcast\fP(3),
+\fIshmem_cache\fP(3),
+\fIshmem_collect\fP(3),
+\fIshmem_cswap\fP(3),
+\fIshmem_fadd\fP(3),
+\fIshmem_fence\fP(3),
+\fIshmem_finc\fP(3),
+\fIshmem_get\fP(3),
+\fIshmem_iget\fP(3),
+\fIshmem_inc\fP(3),
+\fIshmem_iput\fP(3),
+\fIshmem_lock\fP(3),
+\fIshmem_max\fP(3),
+\fIshmem_min\fP(3),
+\fIshmem_my_pe\fP(3),
+\fIshmem_or\fP(3),
+\fIshmem_prod\fP(3),
+\fIshmem_put\fP(3),
+\fIshmem_quiet\fP(3),
+\fIshmem_short_g\fP(3),
+\fIshmem_short_p\fP(3),
+\fIshmem_sum\fP(3),
+\fIshmem_swap\fP(3),
+\fIshmem_wait\fP(3),
+\fIshmem_xor\fP(3),
+\fIshmem_pe_accessible\fP(3),
+\fIshmem_addr_accessible\fP(3),
+\fIstart_pes\fP(3),
+\fIshmalloc\fP(3C),
+\fIshpalloc\fP(3F),
+\fIMY_PE\fP(3I),
+\fINUM_PES\fP(3I)
diff --git a/oshmem/shmem/man/man3/shfree.3in b/oshmem/shmem/man/man3/shfree.3in
new file mode 100644
index 0000000000..63a8ff4e8e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shfree.3in
@@ -0,0 +1 @@
+.so man3/shmalloc.3
diff --git a/oshmem/shmem/man/man3/shmalloc.3in b/oshmem/shmem/man/man3/shmalloc.3in
new file mode 100644
index 0000000000..5255598fa9
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmalloc.3in
@@ -0,0 +1,105 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMALLOC" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmalloc\fP(3),
+\fIshfree\fP(3),
+\fIshmemalign\fP(3),
+\fIshrealloc\fP(3)
+\- Symmetric heap memory management functions.
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void *shmalloc(size_t size);
+
+void shfree(void *ptr);
+
+void *shrealloc(void *ptr, size_t size);
+
+void *shmemalign(size_t alignment, size_t size);
+
+extern long malloc_error;
+.Ve
+.SH DESCRIPTION
+
+The \fBshmalloc\fP
+function returns a pointer to a block of at least size bytes
+suitably aligned for any use. This space is allocated from the symmetric heap (in contrast
+to \fImalloc\fP(3C),
+which allocates from the private heap).
+.PP
+The \fBshmemalign\fP
+function allocates a block in the symmetric heap that has a
+byte alignment specified by the alignment argument.
+.PP
+The \fBshfree\fP
+function causes the block to which ptr points to, to be deallocated,
+that is, made available for further allocation. If ptr is a null pointer, no action
+occurs; otherwise, if the argument does not match a pointer earlier returned by a symmetric
+heap function, or if the space has already been deallocated, malloc_error is set to indicate the
+error, and shfree returns.
+.PP
+The \fBshrealloc\fP
+function changes the size of the block to which ptr points to, to the
+size (in bytes) specified by size.
+.PP
+The contents of the block are unchanged up to the lesser of the new and old sizes. If the new
+size is larger, the value of the newly allocated portion of the block is indeterminate. If ptr is a
+null pointer, the shrealloc function behaves like the shmalloc function for the specified size. If
+size is 0 and ptr is not a null pointer, the block to which it points to is freed. Otherwise, if ptr
+does not match a pointer earlier returned by a symmetric heap function, or if the space has
+already been deallocated, the malloc_error variable is set to indicate the error, and shrealloc
+returns a null pointer. If the space cannot be allocated, the block to which ptr points to is
+unchanged.
+.PP
+The shmalloc, shfree, and shrealloc functions are provided so that multiple PEs in an
+application can allocate symmetric, remotely accessible memory blocks. These memory
+blocks can then be used with (shmem) communication routines. Each of these functions call
+the \fIshmem_barrier_all\fP(3)
+function before returning; this ensures that all PEs
+participate in the memory allocation, and that the memory on other PEs can be used as soon
+as the local PE returns.
+.PP
+The user is responsible for calling these functions with identical argument(s) on all PEs; if
+differing size arguments are used, subsequent calls may not return the same symmetric heap
+address on all PEs.
+.PP
+.SH NOTES
+
+The total size of the symmetric heap is determined at job startup. One can adjust the size of
+the heap using the SHMEM_SYMMETRIC_HEAP_SIZE environment variable. See the
+\fIintro_shmem\fP(3)
+man page for futher details.
+The shmalloc, shfree, and shrealloc functions differ from the private heap allocation functions
+in that all PEs in an application must call them (a barrier is used to ensure this).
+.PP
+.SH RETURN VALUES
+
+The \fBshmalloc\fP
+function returns a pointer to the allocated space (which should
+be identical on all PEs); otherwise, it returns a null pointer (with malloc_error set).
+The \fBshfree\fP
+function returns no value.
+The \fBshrealloc\fP
+function returns a pointer to the allocated space (which
+may have moved); otherwise, it returns a null pointer (with malloc_error set).
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3),
+\fImy_pe\fP(3I),
+\fIstart_pes\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_addr_accessible.3in b/oshmem/shmem/man/man3/shmem_addr_accessible.3in
new file mode 100644
index 0000000000..bfc6e9d78e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_addr_accessible.3in
@@ -0,0 +1,56 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_ADDR\\_ACCESSIBLE" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+shmem_addr_accessible \- Indicates if an address is accessible via OpenSHMEM operations
+from the specified remote PE.
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+int shmem_addr_accessible(void *addr, int pe);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+LOGICAL LOG, SHMEM_ADDR_ACCESSIBLE
+INTEGER pe
+
+LOG = SHMEM_ADDR_ACCESSIBLE(addr, pe)
+.Ve
+.SH DESCRIPTION
+
+shmem_addr_accessible is a query function that indicates whether a local address is
+accessible via SHMEM operations from the specified remote PE.
+.PP
+This function verifies that the remote PE is accessible via SHMEM data transfer functions from
+the local PE, and that the specified address is in a symmetric data segment with respect to the
+remote PE.
+.PP
+.SH RETURN VALUES
+
+C: The return value is 1 if addr is a symmetric data object and accessible via SHMEM
+operations from the specified remote PE; otherwise, it is 0.
+.PP
+Fortran: The return value is \&.TRUE. if addr is a symmetric data object and accessible via
+SHMEM operations from the specified remote PE; otherwise, it is \&.FALSE..
+.PP
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3),
+\fIshmem_pe_accessible\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_barrier.3in b/oshmem/shmem/man/man3/shmem_barrier.3in
new file mode 100644
index 0000000000..e9e9722a04
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_barrier.3in
@@ -0,0 +1,112 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_BARRIER" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+shmem_barrier \- Performs a barrier operation on a subset of processing elements (PEs).
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void shmem_barrier(int PE_start, int logPE_stride, int PE_size,
+  long *pSync);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER PE_start, logPE_stride, PE_size
+INTEGER pSync(SHMEM_BARRIER_SYNC_SIZE)
+
+CALL SHMEM_BARRIER(PE_start, logPE_stride, PE_size, pSync)
+.Ve
+.SH DESCRIPTION
+
+The shmem_barrier routine does not return until the subset of PEs specified by
+\fBPE_start\fP,
+\fBlogPE_stride\fP
+and \fBPE_size\fP,
+has entered this routine at the
+same point of the execution path.
+.PP
+As with all SHMEM collective routines, each of these routines assumes that only PEs in the
+active set call the routine. If a PE not in the active set calls a SHMEM collective routine,
+undefined behavior results.
+.PP
+The arguments are as follows:
+.TP
+PE_start
+The lowest virtual PE number of the active set of PEs. PE_start must be of
+type integer. If you are using Fortran, it must be a default integer value.
+.TP
+logPE_stride
+The log (base 2) of the stride between consecutive virtual PE numbers in
+the active set. logPE_stride must be of type integer. If you are using Fortran, it must be a
+default integer value.
+.TP
+PE_size
+The number of PEs in the active set. PE_size must be of type integer. If you
+are using Fortran, it must be a default integer value.
+.TP
+pSync
+A symmetric work array. In C/C++, pSync must be of type int and size
+_SHMEM_BARRIER_SYNC_SIZE. In Fortran, pSync must be of type integer and size
+SHMEM_BARRIER_SYNC_SIZE. If you are using Fortran, it must be a default integer type.
+Every element of this array must be initialized to 0 before any of the PEs in the active set enter
+shmem_barrier the first time.
+.PP
+The values of arguments PE_start, logPE_stride, and PE_size must be equal on all PEs in the
+active set. The same work array must be passed in pSync to all PEs in the active set.
+.PP
+shmem_barrier ensures that all previously issued local stores and previously issued remote
+memory updates done by any of the PEs in the active set (by using SHMEM calls, for
+example \fIshmem_put\fP(3))
+are complete before returning.
+.PP
+The same pSync array may be reused on consecutive calls to shmem_barrier if the same
+active PE set is used.
+.PP
+.SH NOTES
+
+The term symmetric is defined in \fIintro_shmem\fP(3)\&.
+.PP
+If the pSync array is initialized at run time, be sure to use some type of synchronization, for
+example, a call to \fIshmem_barrier_all\fP(3),
+before calling shmem_barrier for the first
+time.
+.PP
+If the active set does not change, shmem_barrier can be called repeatedly with the same
+pSync array. No additional synchronization beyond that implied by shmem_barrier itself is
+necessary in this case.
+.PP
+.SH EXAMPLES
+
+C/C++ example:
+.Vb
+shmem_barrier(PE_start, logPE_stride, size, pSync);
+.Ve
+Fortran example:
+.Vb
+INTEGER PSYNC(SHMEM_BARRIER_SYNC_SIZE)
+INTEGER PE_START, LOGPE_STRIDE, PE_SIZE, PSYNC
+DATA PSYNC /SHMEM_BARRIER_SYNC_SIZE*0/
+
+CALL SHMEM_BARRIER(PE_START, LOGPE_STRIDE, PE_SIZE, PSYNC)
+.Ve
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3),
+\fIshmem_barrier_all\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_barrier_all.3in b/oshmem/shmem/man/man3/shmem_barrier_all.3in
new file mode 100644
index 0000000000..df94380809
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_barrier_all.3in
@@ -0,0 +1,59 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_BARRIER\\_ALL" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+shmem_barrier_all \- Suspends the execution of the calling PE until all other PEs issue a call
+to this particular shmem_barrier_all() statement.
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void shmem_barrier_all(void);
+.Ve
+Fortran:
+.Vb
+include 'mpp/shmem.h'
+
+CALL SHMEM_BARRIER_ALL
+.Ve
+.SH DESCRIPTION
+
+The shmem_barrier_all routine does not return until all other PEs have entered this routine
+at the same point of the execution path.
+.PP
+Prior to synchronizing with other PEs, shmem_barrier_all ensures completion of all
+previously issued local memory stores and remote memory updates issued via SHMEM
+functions such as \fIshmem_put32\fP(3)\&.
+.PP
+.SH EXAMPLES
+
+.Vb
+setup_data()
+{
+  if (_my_pe() == 0) {
+    setup();
+  }
+
+  /* All PEs wait for PE 0 to complete setup().  */
+  shmem_barrier_all();
+}
+.Ve
+.PP
+.SH SEE ALSO
+
+\fIshmem_barrier\fP(3),
+\fIstart_pes\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_broadcast32.3in b/oshmem/shmem/man/man3/shmem_broadcast32.3in
new file mode 100644
index 0000000000..abb38e7952
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_broadcast32.3in
@@ -0,0 +1,186 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_BROADCAST" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_broadcast4\fP(3),
+\fIshmem_broadcast8\fP(3),
+\fIshmem_broadcast32\fP(3),
+\fIshmem_broadcast64\fP(3)
+\- Copy a data object from a designated PE to a target
+location on all other PEs of the active set.
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void shmem_broadcast32(void *target, const void *source,
+  size_t nelems, int PE_root, int PE_start, int logPE_stride,
+  int PE_size, long *pSync);
+
+void shmem_broadcast64(void *target, const void *source,
+  size_t nelems, int PE_root, int PE_start, int logPE_stride,
+  int PE_size, long *pSync);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER nelems, PE_root, PE_start, logPE_stride, PE_size
+INTEGER pSync(SHMEM_BCAST_SYNC_SIZE)
+
+CALL SHMEM_BROADCAST4(target, source, nelems, PE_root,
+& PE_start, logPE_stride, PE_size, fIpSync)
+
+CALL SHMEM_BROADCAST8(target, source, nelems, PE_root,
+& PE_start, logPE_stride, PE_size, pSync)
+
+CALL SHMEM_BROADCAST32(target, source, nelems,
+& PE_root, PE_start, logPE_stride, PE_size, pSync)
+
+CALL SHMEM_BROADCAST64(target, source, nelems,
+& PE_root, PE_start, logPE_stride, PE_size, pSync)
+.Ve
+.SH DESCRIPTION
+
+The broadcast routines write the data at address source of the PE specified by
+\fBPE_root\fP
+to address \fBtarget\fP
+on all other PEs in the active set. The active set of
+PEs is defined by the triplet \fBPE_start\fP,
+\fBlogPE_stride\fP
+and \fBPE_size\fP\&.
+The data is not copied to the target address on the PE specified by \fBPE_root\fP\&.
+Before returning, the broadcast routines ensure that the elements of the pSync array are
+restored to their initial values.
+.PP
+As with all SHMEM collective routines, each of these routines assumes that only PEs in the
+active set call the routine. If a PE not in the active set calls a SHMEM collective routine,
+undefined behavior results.
+.PP
+The arguments are as follows:
+.TP
+target
+A symmetric data object with one of the following data types:
+.RS
+.TP
+\fBshmem_broadcast8, shmem_broadcast64\fP: Any noncharacter type that
+has an element size of 64 bits. No Fortran derived types or C/C++ structures are allowed.
+.TP
+\fBshmem_broadcast32\fP: Any noncharacter type that has an element size
+of 32 bits. No Fortran derived types or C/C++ structures are allowed.
+.TP
+\fBshmem_broadcast4\fP: Any noncharacter type that has an element size
+of 32 bits.
+.RE
+.RS
+.PP
+.RE
+.TP
+source
+A symmetric data object that can be of any data type that is permissible for the
+target argument.
+.TP
+nelems
+The number of elements in source. For shmem_broadcast32 and
+shmem_broadcast4, this is the number of 32\-bit halfwords. nelems must be of type integer.
+If you are using Fortran, it must be a default integer value.
+.TP
+PE_root
+Zero\-based ordinal of the PE, with respect to the active set, from which the
+data is copied. Must be greater than or equal to 0 and less than PE_size. PE_root must be of
+type integer. If you are using Fortran, it must be a default integer value.
+.TP
+PE_start
+The lowest virtual PE number of the active set of PEs. PE_start must be of
+type integer. If you are using Fortran, it must be a default integer value.
+.TP
+logPE_stride
+The log (base 2) of the stride between consecutive virtual PE numbers in
+the active set. log_PE_stride must be of type integer. If you are using Fortran, it must be a
+default integer value.
+.TP
+PE_size
+The number of PEs in the active set. PE_size must be of type integer. If you
+are using Fortran, it must be a default integer value.
+.PP
+.TP
+pSync
+A symmetric work array. In C/C++, pSync must be of type long and size
+_SHMEM_BCAST_SYNC_SIZE.
+In Fortran, pSync must be of type integer and size SHMEM_BCAST_SYNC_SIZE. Every
+element of this array must be initialized with the value _SHMEM_SYNC_VALUE (in C/C++)
+or SHMEM_SYNC_VALUE (in Fortran) before any of the PEs in the active set enter
+shmem_barrier().
+.PP
+The values of arguments PE_root, PE_start, logPE_stride, and PE_size must be equal on
+all PEs in the active set. The same target and source data objects and the same pSync work
+array must be passed to all PEs in the active set.
+.PP
+Before any PE calls a broadcast routine, you must ensure that the following conditions exist
+(synchronization via a barrier or some other method is often needed to ensure this): The
+pSync array on all PEs in the active set is not still in use from a prior call to a broadcast
+routine. The target array on all PEs in the active set is ready to accept the broadcast data.
+.PP
+Upon return from a broadcast routine, the following are true for the local PE: If the current PE
+is not the root PE, the target data object is updated. The values in the pSync array are
+restored to the original values.
+.SH NOTES
+
+The terms collective and symmetric are defined in \fIintro_shmem\fP(3)\&.
+.PP
+All SHMEM broadcast routines restore pSync to its original contents. Multiple calls to SHMEM
+routines that use the same pSync array do not require that pSync be reinitialized after the
+first call.
+.PP
+You must ensure the that the pSync array is not being updated by any PE in the active set
+while any of the PEs participates in processing of a SHMEM broadcast routine. Be careful to
+avoid these situations: If the pSync array is initialized at run time, some type of
+synchronization is needed to ensure that all PEs in the working set have initialized pSync
+before any of them enter a SHMEM routine called with the pSync synchronization array. A
+pSync array may be reused on a subsequent SHMEM broadcast routine only if none of the PEs
+in the active set are still processing a prior SHMEM broadcast routine call that used the same
+pSync array. In general, this can be ensured only by doing some type of synchronization.
+However, in the special case of SHMEM routines being called with the same active set, you
+can allocate two pSync arrays and alternate between them on successive calls.
+.PP
+.SH EXAMPLES
+
+In the following examples, the call to shmem_broadcast64 copies source on PE 4 to target
+on PEs 5, 6, and 7.
+.PP
+C/C++ example:
+.Vb
+for (i=0; i < _SHMEM_BCAST_SYNC_SIZE; i++) {
+  pSync[i] = _SHMEM_SYNC_VALUE;
+}
+shmem_barrier_all(); /* Wait for all PEs to initialize pSync */
+shmem_broadcast64(target, source, nelems, 0, 4, 0, 4, pSync);
+.Ve
+Fortran example:
+.Vb
+INTEGER PSYNC(SHMEM_BCAST_SYNC_SIZE)
+INTEGER TARGET, SOURCE, NELEMS, PE_ROOT, PE_START,
+& LOGPE_STRIDE, PE_SIZE, PSYNC
+COMMON /COM/ TARGET, SOURCE
+DATA PSYNC /SHMEM_BCAST_SYNC_SIZE*SHMEM_SYNC_VALUE/
+
+CALL SHMEM_BROADCAST64(TARGET, SOURCE, NELEMS, 0, 4, 0, 4,
+& PSYNC)
+.Ve
+.PP
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_broadcast64.3in b/oshmem/shmem/man/man3/shmem_broadcast64.3in
new file mode 100644
index 0000000000..b9255d5020
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_broadcast64.3in
@@ -0,0 +1 @@
+.so man3/shmem_broadcast32.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_char_g.3in b/oshmem/shmem/man/man3/shmem_char_g.3in
new file mode 100644
index 0000000000..722a79c640
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_char_g.3in
@@ -0,0 +1,64 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_CHAR\\_G" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_char_g\fP(3),
+\fIshmem_float_g\fP(3),
+\fIshmem_int_g\fP(3),
+\fIshmem_long_g\fP(3),
+\fIshmem_short_g\fP(3),
+\fIshmem_longlong_g\fP(3),
+\fIshmem_longdouble_g\fP(3)
+\- These routines provide a low latency mechanism to read basic types (char, short, int, float, double, long, long long, long double) from symmetric data objects on remote PEs.
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+
+char shmem_char_g(char *addr, int pe);
+
+short shmem_short_g(short *addr, int pe);
+
+int shmem_int_g(int *addr, int pe);
+
+long shmem_long_g(long *addr, int pe);
+
+long shmem_longlong_g(long long *addr, int pe);
+
+float shmem_float_g(float *addr, int pe);
+
+double shmem_double_g(double *addr, int pe);
+
+long shmem_longdouble_g(long double *addr, int pe);
+
+.Ve
+.SH DESCRIPTION
+
+These routines provide a very low latency get capability for single elements of most basic types.
+.PP
+The arguments are as follows:
+.TP
+addr
+The remotely accessible array element or scalar data object which will receive the
+data on the remote PE.
+.TP
+pe
+The number of the remote PE.
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3),
+\fIshmem_get\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_char_get.3in b/oshmem/shmem/man/man3/shmem_char_get.3in
new file mode 100644
index 0000000000..8091004920
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_char_get.3in
@@ -0,0 +1,207 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_GET" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_character_get\fP(3),
+\fIshmem_complex_get\fP(3),
+\fIshmem_double_get\fP(3),
+\fIshmem_float_get\fP(3),
+\fIshmem_get4\fP(3),
+\fIshmem_get8\fP(3),
+\fIshmem_get32\fP(3),
+\fIshmem_get64\fP(3),
+\fIshmem_get128\fP(3),
+\fIshmem_getmem\fP(3),
+\fIshmem_int_get\fP(3),
+\fIshmem_integer_get\fP(3),
+\fIshmem_logical_get\fP(3),
+\fIshmem_long_get\fP(3),
+\fIshmem_longdouble_get\fP(3),
+\fIshmem_longlong_get\fP(3),
+\fIshmem_real_get\fP(3),
+\fIshmem_short_get\fP(3)
+\- Transfers data from a specified processing element (PE).
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void shmem_get32(void *target, const void *source,
+  size_t len, int pe);
+
+void shmem_get64(void *target, const void *source,
+  size_t len, int pe);
+
+void shmem_get128(void *target, const void *source,
+  size_t len, int pe);
+
+void shmem_getmem(void *target, const void *source,
+  size_t len, int pe);
+
+void shmem_int_get(int *target, const int *source,
+  size_t len, int pe);
+
+void shmem_double_get(double *target, const double *source,
+  size_t len, int pe);
+
+void shmem_float_get(float *target, const float *source,
+  size_t len, int pe);
+
+void shmem_long_get(long *target, const long *source,
+  size_t len, int pe);
+
+void shmem_longdouble_get(long double *target,
+  const long double *source, size_t len, int pe);
+
+void shmem_longlong_get(long long *target,
+  const long long *source, size_t len, int pe);
+
+void shmem_short_get(short *target,
+  const short *source, size_t len, int pe);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER len, pe
+
+CALL SHMEM_CHARACTER_GET(target, source, len, pe)
+
+CALL SHMEM_COMPLEX_GET(target, source, len, pe)
+
+CALL SHMEM_DOUBLE_GET(target, source, len, pe)
+
+CALL SHMEM_GET4(target, source, len, pe)
+
+CALL SHMEM_GET8(target, source, len, pe)
+
+CALL SHMEM_GET32(target, source, len, pe)
+
+CALL SHMEM_GET64(target, source, len, pe)
+
+CALL SHMEM_GET128(target, source, len, pe)
+
+CALL SHMEM_GETMEM(target, source, len, pe)
+
+CALL SHMEM_INTEGER_GET(target, source, len, pe)
+
+CALL SHMEM_LOGICAL_GET(target, source, len, pe)
+
+CALL SHMEM_REAL_GET(target, source, len, pe)
+.Ve
+.SH DESCRIPTION
+
+The shmem_get routines transfer \fBnelems\fP
+elements of the data object at address \fBsource\fP
+on the remote PE \fBpe\fP,
+to the data object at address \fBtarget\fP
+on the local PE. These routines
+return after the data has been copied to address \fBtarget\fP
+on the local PE.
+.PP
+The arguments are as follows:
+.TP
+target
+Local data object to be updated.
+.TP
+source
+Data object on the PE identified by pe that contains the data to be copied. This
+data object must be remotely accessible.
+.TP
+len
+Number of elements in the target and source arrays. len must be of type integer. If
+you are using Fortran, it must be a constant, variable, or array element of default
+integer type.
+.TP
+pe
+PE number of the remote PE. pe must be of type integer. If you are using Fortran, it
+must be a constant, variable, or array element of default integer type.
+.PP
+The target and source data objects must conform to typing constraints, which are as follows:
+.TP
+\fBshmem_getmem\fP: Fortran: Any noncharacter type. C: Any data type. len is
+scaled in bytes.
+.TP
+\fBshmem_get4, shmem_get32\fP: Any noncharacter type that has a storage size
+equal to 32 bits.
+.TP
+{shmem_get8, shmem_get64}: Any noncharacter type that has a storage size equal to
+64 bits.
+.TP
+\fBshmem_get128\fP: Any noncharacter type that has a storage size equal to 128
+bits.
+.TP
+\fBshmem_short_get\fP: Elements of type short.
+.TP
+\fBshmem_int_get\fP: Elements of type int.
+.TP
+\fBshmem_long_get\fP: Elements of type long.
+.TP
+\fBshmem_longlong_get\fP: Elements of type long long.
+.TP
+\fBshmem_float_get\fP: Elements of type float.
+.TP
+\fBshmem_double_get\fP: Elements of type double.
+.TP
+\fBshmem_longdouble_get\fP: Elements of type long double.
+.TP
+\fBSHMEM_CHARACTER_GET\fP: Elements of type character. len is the number of
+characters to transfer. The actual character lengths of the source and target variables are
+ignored.
+.TP
+\fBSHMEM_COMPLEX_GET\fP: Elements of type complex of default size.
+.TP
+\fBSHMEM_DOUBLE_GET\fP: (Fortran) Elements of type double precision.
+.TP
+\fBSHMEM_INTEGER_GET\fP: Elements of type integer.
+.TP
+\fBSHMEM_LOGICAL_GET\fP: Elements of type logical.
+.TP
+\fBSHMEM_REAL_GET\fP: Elements of type real.
+.PP
+If you are using Fortran, data types must be of default size. For example, a real variable must
+be declared as REAL, REAL*4, or REAL(KIND=4).
+.SH NOTES
+
+See \fIintro_shmem\fP(3)
+for a definition of the term remotely accessible.
+.SH EXAMPLES
+
+Consider this simple example for Fortran.
+.Vb
+PROGRAM REDUCTION
+  REAL VALUES, SUM
+  COMMON /C/ VALUES
+  REAL WORK
+
+  CALL START_PES(0) ! ALLOW ANY NUMBER OF PES
+  VALUES = MY_PE() ! INITIALIZE IT TO SOMETHING
+  CALL SHMEM_BARRIER_ALL
+  SUM = 0.0
+  DO I = 0,NUM_PES()\-1
+    CALL SHMEM_REAL_GET(WORK, VALUES, 1, I)
+    SUM = SUM + WORK
+  ENDDO
+  PRINT *, 'PE ', MY_PE(), ' COMPUTED SUM=', SUM
+  CALL SHMEM_BARRIER_ALL
+END
+.Ve
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3),
+\fIshmem_put\fP(3),
+\fIshmem_iget\fP(3),
+\fIshmem_quiet\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_char_p.3in b/oshmem/shmem/man/man3/shmem_char_p.3in
new file mode 100644
index 0000000000..3d122b2d9c
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_char_p.3in
@@ -0,0 +1,73 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_CHAR\\_P" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_char_p\fP(3),
+\fIshmem_float_p\fP(3),
+\fIshmem_int_p\fP(3),
+\fIshmem_long_p\fP(3),
+\fIshmem_short_p\fP(3),
+\fIshmem_longlong_p\fP(3),
+\fIshmem_longdouble_p\fP(3)
+\- These routines provide a low latency mechanism to write basic types (char, short, int, float, double, long, long long, long double) to symmetric data objects on remote PEs.
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+
+void shmem_char_p(char *addr, char value, int pe);
+
+void shmem_short_p(short *addr, short value, int pe);
+
+void shmem_int_p(int *addr, int value, int pe);
+
+void shmem_long_p(long *addr, long value, int pe);
+
+void shmem_longlong_p(long long *addr, long long value, int pe);
+
+void shmem_float_p(float *addr, float value, int pe);
+
+void shmem_double_p(double *addr, double value, int pe);
+
+void shmem_longdouble_p(long double *addr, long double value, int pe);
+
+.Ve
+.SH DESCRIPTION
+
+These routines provide a very low latency put capability for single elements of most basic types.
+.PP
+The arguments are as follows:
+.TP
+addr
+The remotely accessible array element or scalar data object which will receive the
+data on the remote PE.
+.TP
+value
+The value to be transferred to addr on the remote PE.
+.TP
+pe
+The number of the remote PE.
+.PP
+As with \fIshmem_put\fP(3),
+these functions start the remote transfer and may return before
+the data is delivered to the remote PE. Use \fIshmem_quiet\fP(3)
+to force completion of all
+remote PUT transfers.
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3),
+\fIshmem_put\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_char_put.3in b/oshmem/shmem/man/man3/shmem_char_put.3in
new file mode 100644
index 0000000000..2ba020522e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_char_put.3in
@@ -0,0 +1,214 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_PUT" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_character_put\fP(3),
+\fIshmem_complex_put\fP(3),
+\fIshmem_double_put\fP(3),
+\fIshmem_float_put\fP(3),
+\fIshmem_int_put\fP(3),
+\fIshmem_integer_put\fP(3),
+\fIshmem_logical_put\fP(3),
+\fIshmem_long_put\fP(3),
+\fIshmem_longdouble_put\fP(3),
+\fIshmem_longlong_put\fP(3),
+\fIshmem_put4\fP(3),
+\fIshmem_put8\fP(3),
+\fIshmem_put32\fP(3),
+\fIshmem_put64\fP(3),
+\fIshmem_put128\fP(3),
+\fIshmem_putmem\fP(3),
+\fIshmem_real_put\fP(3),
+\fIshmem_short_put\fP(3)
+\- Transfers data to a specified
+processing element (PE)
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void shmem_double_put(double *target, const double *source,
+  size_t len, int pe);
+
+void shmem_float_put(float *target, const float *source,
+  size_t len, int pe);
+
+void shmem_int_put(int *target, const int *source, size_t len,
+  int pe);
+
+void shmem_long_put(long *target, const long *source,
+  size_t len, int pe);
+
+void shmem_longdouble_put(long double *target,
+  const long double *source, size_t len, int pe);
+
+void shmem_longlong_put(long long *target,
+  const long long *source, size_t len, int pe);
+
+void shmem_put32(void *target, const void *source, size_t len,
+  int pe);
+
+void shmem_put64(void *target, const void *source, size_t len,
+  int pe);
+
+void shmem_put128(void *target, const void *source, size_t len,
+  int pe);
+
+void shmem_putmem(void *target, const void *source, size_t len,
+  int pe);
+
+void shmem_short_put(short *target, const short *source,
+  size_t len, int pe);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER len, pe
+
+CALL SHMEM_CHARACTER_PUT(target, source, len, pe)
+
+CALL SHMEM_COMPLEX_PUT(target, source, len, pe)
+
+CALL SHMEM_DOUBLE_PUT(target, source, len, pe)
+
+CALL SHMEM_INTEGER_PUT(target, source, len, pe)
+
+CALL SHMEM_LOGICAL_PUT(target, source, len, pe)
+
+CALL SHMEM_PUT(target, source, len, pe)
+
+CALL SHMEM_PUT4(target, source, len, pe)
+
+CALL SHMEM_PUT8(target, source, len, pe)
+
+CALL SHMEM_PUT32(target, source, len, pe)
+
+CALL SHMEM_PUT64(target, source, len, pe)
+
+CALL SHMEM_PUT128(target, source, len, pe)
+
+CALL SHMEM_PUTMEM(target, source, len, pe)
+
+CALL SHMEM_REAL_PUT(target, source, len, pe)
+.Ve
+.SH DESCRIPTION
+
+These routines transfer \fBnelems\fP
+elements of the data object at address
+\fBsource\fP
+on the calling PE, to the data object at address \fBtarget\fP
+on the remote
+PE \fBpe\fP\&.
+These routines start the remote transfer and may return before the data is
+delivered to the remote PE.
+.PP
+The delivery of data into the data object on the destination PE from different put calls may
+occur in any order. Because of this, two successive put operations may deliver data out of
+order unless a call to \fIshmem_fence\fP(3)
+is introduced between the two calls.
+.PP
+The arguments are as follows:
+.TP
+target
+Data object to be updated on the remote PE. This data object must be remotely
+accessible.
+.TP
+source
+Data object containing the data to be copied.
+.TP
+len
+Number of elements in the target and source arrays. len must be of type integer. If
+you are using Fortran, it must be a constant, variable, or array element of default integer
+type.
+.TP
+pe
+PE number of the remote PE. pe must be of type integer. If you are using Fortran, it
+must be a constant, variable, or array element of default integer type.
+.PP
+The target and source data objects must conform to certain typing constraints, which are as
+follows:
+.TP
+\fBshmem_putmem\fP: Fortran: Any noncharacter type. C: Any data type. len is scaled in
+bytes.
+.TP
+\fBshmem_put4, shmem_put32:\fP Any noncharacter type that has a storage size
+equal to 32 bits.
+.TP
+\fBshmem_put8, shmem_put64:\fP Any noncharacter type that has a storage size
+equal to 64 bits.
+.TP
+\fBshmem_put128:\fP Any noncharacter type that has a storage size equal to 128
+bits.
+.TP
+\fBshmem_short_put:\fP Elements of type short.
+.TP
+\fBshmem_int_put:\fP Elements of type int.
+.TP
+\fBshmem_long_put:\fP Elements of type long.
+.TP
+\fBshmem_longlong_put:\fP Elements of type long long.
+.TP
+\fBshmem_float_put:\fP Elements of type float.
+.TP
+\fBshmem_double_put:\fP Elements of type double.
+.TP
+\fBshmem_longdouble_put:\fP Elements of type long double.
+.TP
+\fBSHMEM_CHARACTER_PUT:\fP Elements of type character. len is the number of
+characters to transfer. The actual character lengths of the source and target variables are
+ignored.
+.TP
+\fBSHMEM_COMPLEX_PUT:\fP Elements of type complex of default size.
+.TP
+\fBSHMEM_DOUBLE_PUT:\fP (Fortran) Elements of type double precision.
+.TP
+\fBSHMEM_INTEGER_PUT:\fP Elements of type integer.
+.TP
+\fBSHMEM_LOGICAL_PUT:\fP Elements of type logical.
+.TP
+\fBSHMEM_REAL_PUT:\fP Elements of type real.
+If you are using Fortran, data types must be of default size. For example, a real variable must
+be declared as REAL, REAL*4, or REAL(KIND=4).
+.PP
+.SH EXAMPLES
+
+The following shmem_put example is for C/C++ programs:
+.Vb
+#include <stdio.h>
+#include <mpp/shmem.h>
+
+main()
+{
+  long source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+  static long target[10];
+  start_pes(2);
+
+  if (_my_pe() == 0) {
+    /* put 10 words into target on PE 1 */
+    shmem_long_put(target, source, 10, 1);
+  }
+  shmem_barrier_all();  /* sync sender and receiver */
+  if (_my_pe() == 1)
+    shmem_udcflush();  /* not required on Altix systems */
+  printf("target[0] on PE %d is %d\\n", _my_pe(), target[0]);
+}
+.Ve
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3),
+\fIshmem_iput\fP(3),
+\fIshmem_quiet\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_clear_cache_inv.3in b/oshmem/shmem/man/man3/shmem_clear_cache_inv.3in
new file mode 100644
index 0000000000..4a6a361ef9
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_clear_cache_inv.3in
@@ -0,0 +1 @@
+.so man3/shmem_udcflush.3
diff --git a/oshmem/shmem/man/man3/shmem_clear_cache_line_inv.3in b/oshmem/shmem/man/man3/shmem_clear_cache_line_inv.3in
new file mode 100644
index 0000000000..4a6a361ef9
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_clear_cache_line_inv.3in
@@ -0,0 +1 @@
+.so man3/shmem_udcflush.3
diff --git a/oshmem/shmem/man/man3/shmem_clear_lock.3in b/oshmem/shmem/man/man3/shmem_clear_lock.3in
new file mode 100644
index 0000000000..49974c4f17
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_clear_lock.3in
@@ -0,0 +1 @@
+.so man3/shmem_set_lock.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_collect32.3in b/oshmem/shmem/man/man3/shmem_collect32.3in
new file mode 100644
index 0000000000..bce6dc5aa0
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_collect32.3in
@@ -0,0 +1,197 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_COLLECT" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_collect4\fP(3),
+\fIshmem_collect8\fP(3),
+\fIshmem_collect32\fP(3),
+\fIshmem_collect64\fP(3),
+\fIshmem_fcollect\fP(3),
+\fIshmem_fcollect4\fP(3),
+\fIshmem_fcollect8\fP(3),
+\fIshmem_fcollect32\fP(3),
+\fIshmem_fcollect64\fP(3)
+\- Concatenates blocks of data from multiple processing elements (PEs) to an array in every PE
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void shmem_collect32(void *target, const void *source,
+  size_t nelems, int PE_start, int logPE_stride, int PE_size,
+  long *pSync);
+
+void shmem_collect64(void *target, const void *source,
+  size_t nelems, int PE_start, int logPE_stride, int PE_size,
+  long *pSync);
+
+void shmem_fcollect32(void *target, const void *source,
+  size_t nelems, int PE_start, int logPE_stride, int PE_size,
+  long *pSync);
+
+void shmem_fcollect64(void *target, const void *source,
+  size_t nelems, int PE_start, int logPE_stride, int PE_size,
+  long *pSync);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER nelems
+INTEGER PE_start, logPE_stride, PE_size
+INTEGER pSync(SHMEM_COLLECT_SYNC_SIZE)
+
+CALL SHMEM_COLLECT4(target, source, nelems, PE_start,
+& logPE_stride, PE_size, pSync)
+
+CALL SHMEM_COLLECT8(target, source, nelems, PE_start,
+& logPE_stride, PE_size, pSync)
+
+CALL SHMEM_FCOLLECT4(target, source, nelems, PE_start,
+& logPE_stride, PE_size, pSync)
+
+CALL SHMEM_FCOLLECT8(target, source, nelems, PE_start,
+& logPE_stride, PE_size, pSync)
+.Ve
+.SH DESCRIPTION
+
+The shared memory (SHMEM) collect and fcollect routines concatenate nelems 64\-bit or 32\-bit
+data items from the source array into the target array, over the set of PEs defined by
+PE_start, log2PE_stride, and PE_size, in processor number order. The resultant target array
+contains the contribution from PE PE_start first, then the contribution from PE PE_start +
+PE_stride second, and so on. The collected result is written to the target array for all PEs in
+the active set.
+.PP
+The fcollect routines require that nelems be the same value in all participating PEs, while the
+collect routines allow nelems to vary from PE to PE.
+.PP
+The resulting target array is as follows:
+.Vb
+\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-
+   source(1..nelems)
+       from PE (PE_start + 0 * (2**logPE_stride))
+\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-
+   source(1..nelems)
+       from PE (PE_start + 1 * (2**logPE_stride))
+\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-
+   ...
+\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-
+   source(1..nelems) from
+       PE (PE_start + (PE_size \- 1) * (2**logPE_stride))
+\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-
+.Ve
+.PP
+As with all SHMEM collective routines, each of these routines assumes that only PEs in
+the active set call the routine. If a PE not in the active set calls a SHMEM collective routine,
+undefined behavior results.
+.PP
+The arguments are as follows:
+.TP
+target
+A symmetric array. The target argument must be large enough to accept the concatenation of the source arrays on all PEs. The data types are
+as follows:
+.RS
+.TP
+[shmem_collect8, shmem_collect64, shmem_fcollect8, and
+shmem_fcollect64] any data type with an element size of 64 bits. Fortran derived types,
+Fortran character type, and C/C++ structures are not permitted.
+.TP
+[shmem_collect4, shmem_collect32, shmem_fcollect4, and
+shmem_fcollect32] any data type with an element size of 32 bits. Fortran derived types,
+Fortran character type, and C/C++ structures are not permitted.
+.RE
+.RS
+.PP
+.RE
+.TP
+source
+A symmetric data object that can be of any type permissible for the target
+argument.
+.TP
+nelems
+The number of elements in the source array. nelems must be of type integer. If
+you are using Fortran, it must be a default integer value.
+.TP
+PE_start
+The lowest virtual PE number of the active set of PEs. PE_start must be of
+type integer. If you are using Fortran, it must be a default integer value.
+.TP
+logPE_stride
+The log (base 2) of the stride between consecutive virtual PE numbers in
+the active set. logPE_stride must be of type integer. If you are using Fortran, it must be a
+default integer value.
+.TP
+PE_size
+The number of PEs in the active set. PE_size must be of type integer. If you
+are using Fortran, it must be a default integer value.
+.TP
+pSync
+A symmetric work array. In C/C++, pSync must be of type int and size
+_SHMEM_COLLECT_SYNC_SIZE. In Fortran, pSync must be of type integer and size
+SHMEM_COLLECT_SYNC_SIZE. If you are using Fortran, it must be a default integer value.
+Every element of this array must be initialized with the value _SHMEM_SYNC_VALUE in
+C/C++ or SHMEM_SYNC_VALUE in Fortran before any of the PEs in the active set enter
+shmem_barrier().
+.PP
+The values of arguments PE_start, logPE_stride, and PE_size must be equal on all PEs in
+the active set. The same target and source arrays and the same pSync work array must be
+passed to all PEs in the active set.
+.PP
+Upon return from a collective routine, the following are true for the local PE: The target array
+is updated. The values in the pSync array are restored to the original values.
+.SH NOTES
+
+The terms collective and symmetric are defined in \fIintro_shmem\fP(3)\&.
+All SHMEM collective routines reset the values in pSync before they return, so a particular
+pSync buffer need only be initialized the first time it is used.
+.PP
+You must ensure that the pSync array is not being updated on any PE in the active set while
+any of the PEs participate in processing of a SHMEM collective routine. Be careful to
+avoid these situations: If the pSync array is initialized at run time, some type of
+synchronization is needed to ensure that all PEs in the working set have initialized
+pSync before any of them enter a SHMEM routine called with the pSync synchronization array.
+A pSync array can be reused on a subsequent SHMEM collective routine only if none
+of the PEs in the active set are still processing a prior SHMEM collective routine call that used
+the same pSync array. In general, this may be ensured only by doing some type of
+synchronization. However, in the special case of SHMEM routines being called with the same
+active set, you can allocate two pSync arrays and alternate between them on
+successive calls.
+.PP
+The collective routines operate on active PE sets that have a non\-power\-of\-two PE_size
+with some performance degradation. They operate with no performance degradation
+when nelems is a non\-power\-of\-two value.
+.SH EXAMPLES
+
+C/C++:
+.Vb
+for (i=0; i < _SHMEM_COLLECT_SYNC_SIZE; i++) {
+  pSync[i] = _SHMEM_SYNC_VALUE;
+}
+shmem_barrier_all(); /* Wait for all PEs to initialize pSync */
+shmem_collect32(target, source, 64, pe_start, logPE_stride,
+   pe_size, pSync);
+.Ve
+Fortran:
+.Vb
+INTEGER PSYNC(SHMEM_COLLECT_SYNC_SIZE)
+DATA PSYNC /SHMEM_COLLECT_SYNC_SIZE*SHMEM_SYNC_VALUE/
+
+CALL SHMEM_COLLECT4(TARGET, SOURCE, 64, PE_START,
+& LOGPE_STRIDE, PE_SIZE, PSYNC)
+.Ve
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_collect64.3in b/oshmem/shmem/man/man3/shmem_collect64.3in
new file mode 100644
index 0000000000..17caf5bf98
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_collect64.3in
@@ -0,0 +1 @@
+.so man3/shmem_collect32.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_complexd_prod_to_all.3in b/oshmem/shmem/man/man3/shmem_complexd_prod_to_all.3in
new file mode 100644
index 0000000000..39b196d082
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_complexd_prod_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_prod_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_complexd_sum_to_all.3in b/oshmem/shmem/man/man3/shmem_complexd_sum_to_all.3in
new file mode 100644
index 0000000000..f75a494841
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_complexd_sum_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_sum_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_complexf_prod_to_all.3in b/oshmem/shmem/man/man3/shmem_complexf_prod_to_all.3in
new file mode 100644
index 0000000000..39b196d082
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_complexf_prod_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_prod_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_complexf_sum_to_all.3in b/oshmem/shmem/man/man3/shmem_complexf_sum_to_all.3in
new file mode 100644
index 0000000000..f75a494841
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_complexf_sum_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_sum_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_double_g.3in b/oshmem/shmem/man/man3/shmem_double_g.3in
new file mode 100644
index 0000000000..d2bbc4ad8d
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_double_g.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_g.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_double_get.3in b/oshmem/shmem/man/man3/shmem_double_get.3in
new file mode 100644
index 0000000000..6d7c165d2e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_double_get.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_get.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_double_iget.3in b/oshmem/shmem/man/man3/shmem_double_iget.3in
new file mode 100644
index 0000000000..48dee9db50
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_double_iget.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_iget.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_double_iput.3in b/oshmem/shmem/man/man3/shmem_double_iput.3in
new file mode 100644
index 0000000000..c7b4a30e1a
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_double_iput.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_iput.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_double_max_to_all.3in b/oshmem/shmem/man/man3/shmem_double_max_to_all.3in
new file mode 100644
index 0000000000..e4ad3901e8
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_double_max_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_max_to_all.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_double_min_to_all.3in b/oshmem/shmem/man/man3/shmem_double_min_to_all.3in
new file mode 100644
index 0000000000..d688221529
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_double_min_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_min_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_double_p.3in b/oshmem/shmem/man/man3/shmem_double_p.3in
new file mode 100644
index 0000000000..c08d60a543
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_double_p.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_p.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_double_prod_to_all.3in b/oshmem/shmem/man/man3/shmem_double_prod_to_all.3in
new file mode 100644
index 0000000000..39b196d082
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_double_prod_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_prod_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_double_put.3in b/oshmem/shmem/man/man3/shmem_double_put.3in
new file mode 100644
index 0000000000..e3ca73d483
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_double_put.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_put.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_double_sum_to_all.3in b/oshmem/shmem/man/man3/shmem_double_sum_to_all.3in
new file mode 100644
index 0000000000..f75a494841
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_double_sum_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_sum_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_double_swap.3in b/oshmem/shmem/man/man3/shmem_double_swap.3in
new file mode 100644
index 0000000000..a038a3f31e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_double_swap.3in
@@ -0,0 +1 @@
+.so man3/shmem_swap.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_fcollect32.3in b/oshmem/shmem/man/man3/shmem_fcollect32.3in
new file mode 100644
index 0000000000..17caf5bf98
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_fcollect32.3in
@@ -0,0 +1 @@
+.so man3/shmem_collect32.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_fcollect64.3in b/oshmem/shmem/man/man3/shmem_fcollect64.3in
new file mode 100644
index 0000000000..17caf5bf98
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_fcollect64.3in
@@ -0,0 +1 @@
+.so man3/shmem_collect32.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_fence.3in b/oshmem/shmem/man/man3/shmem_fence.3in
new file mode 100644
index 0000000000..12e97a55dc
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_fence.3in
@@ -0,0 +1,54 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_FENCE" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+shmem_fence \- Provides a separate ordering on the sequence of puts issued by this PE to each destination
+PE.
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void shmem_fence(void);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+CALL SHMEM_FENCE
+.Ve
+.SH DESCRIPTION
+
+The \fBshmem_fence()\fP
+routine provides an ordering on the put operations issued by the calling
+PE prior to the call to \fBshmem_fence()\fP
+relative to the put operations issued by the
+calling PE following the call to \fBshmem_fence()\fP\&.
+It guarantees that all such prior put operations
+issued to a particular destination PE are fully written to the symmetric memory of
+that destination PE, before any such following put operations to that same destination PE
+are written to the symmetric memory of that destination PE.
+Note that the ordering is provided separately on the sequences of puts from the calling PE to
+each distinct destination PE. The \fBshmem_quiet()\fP
+routine should be used instead if ordering
+of puts is required when multiple destination PEs are involved.
+.SH NOTES
+
+The shmem_quiet function should be called if ordering of puts is desired when multiple remote
+PEs are involved.
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_float_g.3in b/oshmem/shmem/man/man3/shmem_float_g.3in
new file mode 100644
index 0000000000..d2bbc4ad8d
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_float_g.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_g.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_float_get.3in b/oshmem/shmem/man/man3/shmem_float_get.3in
new file mode 100644
index 0000000000..6d7c165d2e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_float_get.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_get.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_float_iget.3in b/oshmem/shmem/man/man3/shmem_float_iget.3in
new file mode 100644
index 0000000000..48dee9db50
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_float_iget.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_iget.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_float_iput.3in b/oshmem/shmem/man/man3/shmem_float_iput.3in
new file mode 100644
index 0000000000..c7b4a30e1a
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_float_iput.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_iput.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_float_max_to_all.3in b/oshmem/shmem/man/man3/shmem_float_max_to_all.3in
new file mode 100644
index 0000000000..e4ad3901e8
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_float_max_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_max_to_all.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_float_min_to_all.3in b/oshmem/shmem/man/man3/shmem_float_min_to_all.3in
new file mode 100644
index 0000000000..d688221529
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_float_min_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_min_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_float_p.3in b/oshmem/shmem/man/man3/shmem_float_p.3in
new file mode 100644
index 0000000000..c08d60a543
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_float_p.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_p.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_float_prod_to_all.3in b/oshmem/shmem/man/man3/shmem_float_prod_to_all.3in
new file mode 100644
index 0000000000..39b196d082
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_float_prod_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_prod_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_float_put.3in b/oshmem/shmem/man/man3/shmem_float_put.3in
new file mode 100644
index 0000000000..e3ca73d483
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_float_put.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_put.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_float_sum_to_all.3in b/oshmem/shmem/man/man3/shmem_float_sum_to_all.3in
new file mode 100644
index 0000000000..f75a494841
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_float_sum_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_sum_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_float_swap.3in b/oshmem/shmem/man/man3/shmem_float_swap.3in
new file mode 100644
index 0000000000..a038a3f31e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_float_swap.3in
@@ -0,0 +1 @@
+.so man3/shmem_swap.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_get128.3in b/oshmem/shmem/man/man3/shmem_get128.3in
new file mode 100644
index 0000000000..6d7c165d2e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_get128.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_get.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_get32.3in b/oshmem/shmem/man/man3/shmem_get32.3in
new file mode 100644
index 0000000000..6d7c165d2e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_get32.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_get.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_get64.3in b/oshmem/shmem/man/man3/shmem_get64.3in
new file mode 100644
index 0000000000..6d7c165d2e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_get64.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_get.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_getmem.3in b/oshmem/shmem/man/man3/shmem_getmem.3in
new file mode 100644
index 0000000000..6d7c165d2e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_getmem.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_get.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_iget128.3in b/oshmem/shmem/man/man3/shmem_iget128.3in
new file mode 100644
index 0000000000..48dee9db50
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_iget128.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_iget.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_iget32.3in b/oshmem/shmem/man/man3/shmem_iget32.3in
new file mode 100644
index 0000000000..48dee9db50
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_iget32.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_iget.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_iget64.3in b/oshmem/shmem/man/man3/shmem_iget64.3in
new file mode 100644
index 0000000000..48dee9db50
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_iget64.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_iget.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_int_add.3in b/oshmem/shmem/man/man3/shmem_int_add.3in
new file mode 100644
index 0000000000..ff4728492a
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_int_add.3in
@@ -0,0 +1,76 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_ADD" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_int_add\fP(3),
+\fIshmem_int4_add\fP(3),
+\fIshmem_int8_add\fP(3),
+\fIshmem_long_add\fP(3),
+\fIshmem_longlong_add\fP(3)
+\- Performs an atomic add
+operation.
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void shmem_int_add(int *target, int value, int pe);
+void shmem_long_add(long *target, long value, int pe);
+void shmem_longlong_add(long long *target, long long value,
+  int pe);
+.Ve
+Fortran:
+.Vb
+include 'mpp/shmem.h'
+
+INTEGER pe
+
+CALL SHMEM_INT4_ADD(target, value, pe)
+CALL SHMEM_INT8_ADD(target, value, pe)
+.Ve
+.SH DESCRIPTION
+
+The atomic add routines add \fBvalue\fP
+to the data at address \fBtarget\fP
+on PE
+\fBpe\fP\&.
+The operation completes without the possibility of another process updating
+target between the time of the fetch and the update.
+.PP
+The arguments are as follows:
+.TP
+target
+The remotely accessible integer data object to be updated on the remote PE. If
+you are using C/C++, the type of target should match that implied in the SYNOPSIS section. If
+you are using the Fortran compiler, it must be of type integer with an element size of 4 bytes
+for SHMEM_INT4_ADD and 8 bytes for SHMEM_INT8_ADD.
+.TP
+value
+The value to be atomically added to target. If you are using C/C++, the type of
+value should match that implied in the SYNOPSIS section. If you are using Fortran, it must be
+of type integer with an element size of target.
+.TP
+pe
+An integer that indicates the PE number upon which target is to be updated. If you
+are using Fortran, it must be a default integer value.
+.PP
+.SH NOTES
+
+The term remotely accessible is defined in \fIintro_shmem\fP(3)\&.
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3),
+\fIshmem_cache\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_int_and_to_all.3in b/oshmem/shmem/man/man3/shmem_int_and_to_all.3in
new file mode 100644
index 0000000000..5b5103cd48
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_int_and_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_and_to_all.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_int_cswap.3in b/oshmem/shmem/man/man3/shmem_int_cswap.3in
new file mode 100644
index 0000000000..568e66cc2a
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_int_cswap.3in
@@ -0,0 +1,127 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_CSWAP" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_int_cswap\fP(3),
+\fIshmem_int4_cswap\fP(3),
+\fIshmem_int8_cswap\fP(3),
+\fIshmem_long_cswap\fP(3),
+\fIshmem_longlong_cswap\fP(3)
+\- Performs an atomic conditional swap to a remote data object
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+int shmem_int_cswap(int *target, int cond, int value, int pe);
+
+long shmem_long_cswap(long *target, long cond, long value,
+  int pe);
+
+long long shmem_longlong_cswap(longlong *target,
+  longlong cond, longlong value, int pe);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER pe
+
+INTEGER(KIND=4) SHMEM_INT4_CSWAP
+ires = SHMEM_INT4_CSWAP(target, cond, value, pe)
+
+INTEGER(KIND=8) SHMEM_INT8_CSWAP
+ires = SHMEM_INT8_CSWAP(target, cond, value, pe)
+.Ve
+.SH DESCRIPTION
+
+The conditional swap routines conditionally update a target data object on an arbitrary
+processing element (PE) and return the prior contents of the data object in one atomic
+operation.
+.PP
+The arguments are as follows:
+.TP
+target
+The remotely accessible integer data object to be updated on the remote PE. If
+you are using C/C++, the type of target should match that implied in the SYNOPSIS section. If
+you are using the Fortran compiler, it must be of type integer with an element size of 4 bytes
+for SHMEM_INT4_ADD and 8 bytes for SHMEM_INT8_ADD.
+.TP
+value
+The value to be atomically added to target. If you are using C/C++, the type of
+value should match that implied in the SYNOPSIS section. If you are using Fortran, it must be
+of type integer with an element size of target.
+.TP
+pe
+An integer that indicates the PE number upon which target is to be updated. If you
+are using Fortran, it must be a default integer value.
+.TP
+target
+The remotely accessible integer data object to be updated on the remote PE. If
+you are using C/C++, the data type of target should match that implied in the SYNOPSIS
+section. If you are using Fortran, it must be of the following type:
+.RS
+.TP
+\fBSHMEM_INT4_CSWAP\fP: 4\-byte integer
+.TP
+\fBSHMEM_INT8_CSWAP\fP: 8\-byte integer
+.RE
+.RS
+.PP
+.RE
+.TP
+cond
+cond is compared to the remote target value. If cond and the remote target are
+equal, then value is swapped into the remote target. Otherwise, the remote target is
+unchanged. In either case, the old value of the remote target is returned as the function return
+value. cond must be of the same data type as target.
+.TP
+value
+The value to be atomically written to the remote PE. value must be the same data
+type as target.
+.TP
+pe
+An integer that indicates the PE number upon which target is to be updated. If you
+are using Fortran, it must be a default integer value.
+.PP
+.SH NOTES
+
+The term remotely accessible is defined in \fIintro_shmem\fP(3)\&.
+.SH RETURN VALUES
+
+The contents that had been in the target data object on the remote PE prior to the conditional
+swap.
+.SH EXAMPLES
+
+The following call ensures that the first PE to execute the conditional swap will successfully
+write its PE number to race_winner on PE 0.
+.Vb
+main()
+{
+  static int race_winner = \-1;
+  int oldval;
+
+  start_pes(2);
+  oldval = shmem_int_cswap(&race_winner, \-1, _my_pe(), 0);
+  if (oldval == \-1)
+    printf("pe %d was first\\n",_my_pe());
+}
+.Ve
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3),
+\fIshmem_cache\fP(3),
+\fIshmem_swap\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_int_fadd.3in b/oshmem/shmem/man/man3/shmem_int_fadd.3in
new file mode 100644
index 0000000000..a1eb726924
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_int_fadd.3in
@@ -0,0 +1,79 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_FADD" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_int4_fadd\fP(3),
+\fIshmem_int8_fadd\fP(3),
+\fIshmem_int_fadd\fP(3),
+\fIshmem_long_fadd\fP(3),
+\fIshmem_longlong_fadd\fP(3)
+\- Performs an atomic fetch\-and\-add operation on a remote data object
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+int shmem_int_fadd(int *target, int value, int pe);
+
+long shmem_long_fadd(long *target, long value, int pe);
+
+long long shmem_longlong_fadd(long long *target, longlong value,
+  int pe);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER pe
+
+INTEGER(KIND=4) SHMEM_INT4_FADD, ires, target, value
+ires = SHMEM_INT4_FADD(target, value, pe)
+
+INTEGER(KIND=8) SHMEM_INT8_FADD, ires, target, value
+ires = SHMEM_INT8_FADD(target, value, pe)
+.Ve
+.SH DESCRIPTION
+
+shmem_fadd functions perform an atomic fetch\-and\-add operation. An atomic
+fetch\-and\-add operation fetches the old target and adds value to target without the
+possibility of another process updating target between the time of the fetch and the update.
+These routines add value to target on Processing Element (PE) pe and return the previous
+contents of target as an atomic operation.
+.PP
+The arguments are as follows:
+.TP
+target
+The remotely accessible integer data object to be updated on the remote PE. The
+type of target should match that implied in the SYNOPSIS section.
+.TP
+value
+The value to be atomically added to target. The type of value should match that
+implied in the SYNOPSIS section.
+.TP
+pe
+An integer that indicates the PE number on which target is to be updated. If you are
+using Fortran, it must be a default integer value.
+.PP
+.SH NOTES
+
+The term remotely accessible is defined in \fIintro_shmem\fP(3)\&.
+.SH RETURN VALUES
+
+The contents that had been at the target address on the remote PE prior to the atomic addition
+operation.
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_int_finc.3in b/oshmem/shmem/man/man3/shmem_int_finc.3in
new file mode 100644
index 0000000000..0f1808693e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_int_finc.3in
@@ -0,0 +1,76 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_FINC" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_int4_finc\fP(3),
+\fIshmem_int8_finc\fP(3),
+\fIshmem_int_finc\fP(3),
+\fIshmem_long_finc\fP(3),
+\fIshmem_longlong_finc\fP(3)
+\- Performs an atomic fetch\-and\-increment operation on a remote data object
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+int shmem_int_finc(int *target, int pe);
+
+long shmem_long_finc(long *target, int pe);
+
+long long shmem_longlong_finc(long long *target, int pe);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER pe
+INTEGER(KIND=4) SHMEM_INT4_FINC, target4
+INTEGER(KIND=8) SHMEM_INT8_FINC, target8
+
+ires4 = SHMEM_INT4_FINC(target4, pe)
+
+ires8 = SHMEM_INT8_FINC(target8, pe)
+.Ve
+.SH DESCRIPTION
+
+The fetch and increment routines retrieve the value at address \fBtarget\fP
+on PE
+\fBpe\fP,
+and update \fBtarget\fP
+with the result of incrementing the retrieved value by
+one. The operation must be completed without the possibility of another process updating
+\fBtarget\fP
+between the time of the fetch and the update.
+.PP
+The arguments are as follows:
+.TP
+target
+The remotely accessible integer data object to be updated on the remote PE. The
+type of target should match that implied in the SYNOPSIS section.
+.TP
+pe
+An integer that indicates the PE number upon which target is to be updated. If you
+are using Fortran, it must be a default integer value.
+.PP
+.SH NOTES
+
+The term remotely accessible is defined in \fIintro_shmem\fP(3)\&.
+.SH RETURN VALUES
+
+The contents that had been at the target address on the remote PE prior to the increment.
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_int_g.3in b/oshmem/shmem/man/man3/shmem_int_g.3in
new file mode 100644
index 0000000000..d2bbc4ad8d
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_int_g.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_g.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_int_get.3in b/oshmem/shmem/man/man3/shmem_int_get.3in
new file mode 100644
index 0000000000..6d7c165d2e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_int_get.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_get.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_int_iget.3in b/oshmem/shmem/man/man3/shmem_int_iget.3in
new file mode 100644
index 0000000000..48dee9db50
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_int_iget.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_iget.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_int_inc.3in b/oshmem/shmem/man/man3/shmem_int_inc.3in
new file mode 100644
index 0000000000..0a879d766d
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_int_inc.3in
@@ -0,0 +1,73 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_INC" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_int4_inc\fP(3),
+\fIshmem_int8_inc\fP(3),
+\fIshmem_int_inc\fP(3),
+\fIshmem_long_inc\fP(3),
+\fIshmem_longlong_inc\fP(3)
+\- These routines perform an atomic increment operation on a remote data object.
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+int shmem_int_inc(int *target, int pe);
+
+long shmem_long_inc(long *target, int pe);
+
+long long shmem_longlong_inc(long long *target, int pe);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER pe
+INTEGER(KIND=4) SHMEM_INT4_INC, target4
+INTEGER(KIND=8) SHMEM_INT8_INC, target8
+
+ires4 = SHMEM_INT4_INC(target4, pe)
+
+ires8 = SHMEM_INT8_INC(target8, pe)
+.Ve
+.SH DESCRIPTION
+
+The atomic increment routines replace the value of \fBtarget\fP
+with its value incremented by
+one. The operation must be completed without the possibility of another process updating
+\fBtarget\fP
+between the time of the fetch and the update.
+.PP
+The arguments are as follows:
+.TP
+target
+The remotely accessible integer data object to be updated on the remote PE. The
+type of target should match that implied in the SYNOPSIS section.
+.TP
+pe
+An integer that indicates the PE number upon which target is to be updated. If you
+are using Fortran, it must be a default integer value.
+.PP
+.SH NOTES
+
+The term remotely accessible is defined in \fIintro_shmem\fP(3)\&.
+.SH RETURN VALUES
+
+None.
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_int_iput.3in b/oshmem/shmem/man/man3/shmem_int_iput.3in
new file mode 100644
index 0000000000..c7b4a30e1a
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_int_iput.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_iput.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_int_max_to_all.3in b/oshmem/shmem/man/man3/shmem_int_max_to_all.3in
new file mode 100644
index 0000000000..e4ad3901e8
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_int_max_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_max_to_all.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_int_min_to_all.3in b/oshmem/shmem/man/man3/shmem_int_min_to_all.3in
new file mode 100644
index 0000000000..d688221529
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_int_min_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_min_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_int_or_to_all.3in b/oshmem/shmem/man/man3/shmem_int_or_to_all.3in
new file mode 100644
index 0000000000..c78fc06052
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_int_or_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_or_to_all.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_int_p.3in b/oshmem/shmem/man/man3/shmem_int_p.3in
new file mode 100644
index 0000000000..c08d60a543
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_int_p.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_p.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_int_prod_to_all.3in b/oshmem/shmem/man/man3/shmem_int_prod_to_all.3in
new file mode 100644
index 0000000000..39b196d082
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_int_prod_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_prod_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_int_put.3in b/oshmem/shmem/man/man3/shmem_int_put.3in
new file mode 100644
index 0000000000..e3ca73d483
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_int_put.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_put.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_int_sum_to_all.3in b/oshmem/shmem/man/man3/shmem_int_sum_to_all.3in
new file mode 100644
index 0000000000..f75a494841
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_int_sum_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_sum_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_int_swap.3in b/oshmem/shmem/man/man3/shmem_int_swap.3in
new file mode 100644
index 0000000000..a038a3f31e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_int_swap.3in
@@ -0,0 +1 @@
+.so man3/shmem_swap.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_int_wait.3in b/oshmem/shmem/man/man3/shmem_int_wait.3in
new file mode 100644
index 0000000000..03267ffbc5
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_int_wait.3in
@@ -0,0 +1 @@
+.so man3/shmem_wait.3
diff --git a/oshmem/shmem/man/man3/shmem_int_wait_until.3in b/oshmem/shmem/man/man3/shmem_int_wait_until.3in
new file mode 100644
index 0000000000..03267ffbc5
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_int_wait_until.3in
@@ -0,0 +1 @@
+.so man3/shmem_wait.3
diff --git a/oshmem/shmem/man/man3/shmem_int_xor_to_all.3in b/oshmem/shmem/man/man3/shmem_int_xor_to_all.3in
new file mode 100644
index 0000000000..cd2d696b96
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_int_xor_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_xor_all.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_iput128.3in b/oshmem/shmem/man/man3/shmem_iput128.3in
new file mode 100644
index 0000000000..c7b4a30e1a
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_iput128.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_iput.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_iput32.3in b/oshmem/shmem/man/man3/shmem_iput32.3in
new file mode 100644
index 0000000000..c7b4a30e1a
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_iput32.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_iput.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_iput64.3in b/oshmem/shmem/man/man3/shmem_iput64.3in
new file mode 100644
index 0000000000..c7b4a30e1a
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_iput64.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_iput.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_long_add.3in b/oshmem/shmem/man/man3/shmem_long_add.3in
new file mode 100644
index 0000000000..b356added7
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_long_add.3in
@@ -0,0 +1 @@
+.so man3/shmem_int_add.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_long_and_to_all.3in b/oshmem/shmem/man/man3/shmem_long_and_to_all.3in
new file mode 100644
index 0000000000..5b5103cd48
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_long_and_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_and_to_all.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_long_cswap.3in b/oshmem/shmem/man/man3/shmem_long_cswap.3in
new file mode 100644
index 0000000000..b89ae230be
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_long_cswap.3in
@@ -0,0 +1 @@
+.so man3/shmem_int_cswap.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_long_fadd.3in b/oshmem/shmem/man/man3/shmem_long_fadd.3in
new file mode 100644
index 0000000000..bebae84923
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_long_fadd.3in
@@ -0,0 +1 @@
+.so man3/shmem_int_fadd.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_long_finc.3in b/oshmem/shmem/man/man3/shmem_long_finc.3in
new file mode 100644
index 0000000000..2c75c2ab68
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_long_finc.3in
@@ -0,0 +1 @@
+.so man3/shmem_int_finc.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_long_g.3in b/oshmem/shmem/man/man3/shmem_long_g.3in
new file mode 100644
index 0000000000..d2bbc4ad8d
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_long_g.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_g.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_long_get.3in b/oshmem/shmem/man/man3/shmem_long_get.3in
new file mode 100644
index 0000000000..6d7c165d2e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_long_get.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_get.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_long_iget.3in b/oshmem/shmem/man/man3/shmem_long_iget.3in
new file mode 100644
index 0000000000..48dee9db50
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_long_iget.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_iget.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_long_inc.3in b/oshmem/shmem/man/man3/shmem_long_inc.3in
new file mode 100644
index 0000000000..aa2f265e1b
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_long_inc.3in
@@ -0,0 +1 @@
+.so man3/shmem_int_inc.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_long_iput.3in b/oshmem/shmem/man/man3/shmem_long_iput.3in
new file mode 100644
index 0000000000..c7b4a30e1a
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_long_iput.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_iput.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_long_max_to_all.3in b/oshmem/shmem/man/man3/shmem_long_max_to_all.3in
new file mode 100644
index 0000000000..e4ad3901e8
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_long_max_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_max_to_all.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_long_min_to_all.3in b/oshmem/shmem/man/man3/shmem_long_min_to_all.3in
new file mode 100644
index 0000000000..d688221529
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_long_min_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_min_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_long_or_to_all.3in b/oshmem/shmem/man/man3/shmem_long_or_to_all.3in
new file mode 100644
index 0000000000..c78fc06052
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_long_or_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_or_to_all.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_long_p.3in b/oshmem/shmem/man/man3/shmem_long_p.3in
new file mode 100644
index 0000000000..c08d60a543
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_long_p.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_p.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_long_prod_to_all.3in b/oshmem/shmem/man/man3/shmem_long_prod_to_all.3in
new file mode 100644
index 0000000000..39b196d082
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_long_prod_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_prod_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_long_put.3in b/oshmem/shmem/man/man3/shmem_long_put.3in
new file mode 100644
index 0000000000..e3ca73d483
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_long_put.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_put.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_long_sum_to_all.3in b/oshmem/shmem/man/man3/shmem_long_sum_to_all.3in
new file mode 100644
index 0000000000..f75a494841
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_long_sum_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_sum_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_long_swap.3in b/oshmem/shmem/man/man3/shmem_long_swap.3in
new file mode 100644
index 0000000000..a038a3f31e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_long_swap.3in
@@ -0,0 +1 @@
+.so man3/shmem_swap.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_long_wait.3in b/oshmem/shmem/man/man3/shmem_long_wait.3in
new file mode 100644
index 0000000000..03267ffbc5
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_long_wait.3in
@@ -0,0 +1 @@
+.so man3/shmem_wait.3
diff --git a/oshmem/shmem/man/man3/shmem_long_wait_until.3in b/oshmem/shmem/man/man3/shmem_long_wait_until.3in
new file mode 100644
index 0000000000..03267ffbc5
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_long_wait_until.3in
@@ -0,0 +1 @@
+.so man3/shmem_wait.3
diff --git a/oshmem/shmem/man/man3/shmem_long_xor_to_all.3in b/oshmem/shmem/man/man3/shmem_long_xor_to_all.3in
new file mode 100644
index 0000000000..cd2d696b96
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_long_xor_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_xor_all.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longdouble_g.3in b/oshmem/shmem/man/man3/shmem_longdouble_g.3in
new file mode 100644
index 0000000000..d2bbc4ad8d
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longdouble_g.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_g.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longdouble_get.3in b/oshmem/shmem/man/man3/shmem_longdouble_get.3in
new file mode 100644
index 0000000000..6d7c165d2e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longdouble_get.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_get.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longdouble_iget.3in b/oshmem/shmem/man/man3/shmem_longdouble_iget.3in
new file mode 100644
index 0000000000..48dee9db50
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longdouble_iget.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_iget.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longdouble_iput.3in b/oshmem/shmem/man/man3/shmem_longdouble_iput.3in
new file mode 100644
index 0000000000..c7b4a30e1a
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longdouble_iput.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_iput.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longdouble_max_to_all.3in b/oshmem/shmem/man/man3/shmem_longdouble_max_to_all.3in
new file mode 100644
index 0000000000..e4ad3901e8
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longdouble_max_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_max_to_all.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longdouble_min_to_all.3in b/oshmem/shmem/man/man3/shmem_longdouble_min_to_all.3in
new file mode 100644
index 0000000000..d688221529
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longdouble_min_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_min_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_longdouble_p.3in b/oshmem/shmem/man/man3/shmem_longdouble_p.3in
new file mode 100644
index 0000000000..c08d60a543
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longdouble_p.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_p.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longdouble_prod_to_all.3in b/oshmem/shmem/man/man3/shmem_longdouble_prod_to_all.3in
new file mode 100644
index 0000000000..39b196d082
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longdouble_prod_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_prod_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_longdouble_put.3in b/oshmem/shmem/man/man3/shmem_longdouble_put.3in
new file mode 100644
index 0000000000..e3ca73d483
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longdouble_put.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_put.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longdouble_sum_to_all.3in b/oshmem/shmem/man/man3/shmem_longdouble_sum_to_all.3in
new file mode 100644
index 0000000000..f75a494841
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longdouble_sum_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_sum_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_longlong_add.3in b/oshmem/shmem/man/man3/shmem_longlong_add.3in
new file mode 100644
index 0000000000..b356added7
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longlong_add.3in
@@ -0,0 +1 @@
+.so man3/shmem_int_add.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longlong_and_to_all.3in b/oshmem/shmem/man/man3/shmem_longlong_and_to_all.3in
new file mode 100644
index 0000000000..5b5103cd48
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longlong_and_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_and_to_all.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longlong_cswap.3in b/oshmem/shmem/man/man3/shmem_longlong_cswap.3in
new file mode 100644
index 0000000000..b89ae230be
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longlong_cswap.3in
@@ -0,0 +1 @@
+.so man3/shmem_int_cswap.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longlong_fadd.3in b/oshmem/shmem/man/man3/shmem_longlong_fadd.3in
new file mode 100644
index 0000000000..bebae84923
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longlong_fadd.3in
@@ -0,0 +1 @@
+.so man3/shmem_int_fadd.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longlong_finc.3in b/oshmem/shmem/man/man3/shmem_longlong_finc.3in
new file mode 100644
index 0000000000..2c75c2ab68
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longlong_finc.3in
@@ -0,0 +1 @@
+.so man3/shmem_int_finc.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longlong_g.3in b/oshmem/shmem/man/man3/shmem_longlong_g.3in
new file mode 100644
index 0000000000..d2bbc4ad8d
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longlong_g.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_g.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longlong_get.3in b/oshmem/shmem/man/man3/shmem_longlong_get.3in
new file mode 100644
index 0000000000..6d7c165d2e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longlong_get.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_get.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longlong_iget.3in b/oshmem/shmem/man/man3/shmem_longlong_iget.3in
new file mode 100644
index 0000000000..48dee9db50
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longlong_iget.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_iget.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longlong_inc.3in b/oshmem/shmem/man/man3/shmem_longlong_inc.3in
new file mode 100644
index 0000000000..aa2f265e1b
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longlong_inc.3in
@@ -0,0 +1 @@
+.so man3/shmem_int_inc.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longlong_iput.3in b/oshmem/shmem/man/man3/shmem_longlong_iput.3in
new file mode 100644
index 0000000000..c7b4a30e1a
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longlong_iput.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_iput.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longlong_max_to_all.3in b/oshmem/shmem/man/man3/shmem_longlong_max_to_all.3in
new file mode 100644
index 0000000000..e4ad3901e8
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longlong_max_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_max_to_all.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longlong_min_to_all.3in b/oshmem/shmem/man/man3/shmem_longlong_min_to_all.3in
new file mode 100644
index 0000000000..d688221529
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longlong_min_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_min_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_longlong_or_to_all.3in b/oshmem/shmem/man/man3/shmem_longlong_or_to_all.3in
new file mode 100644
index 0000000000..c78fc06052
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longlong_or_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_or_to_all.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longlong_p.3in b/oshmem/shmem/man/man3/shmem_longlong_p.3in
new file mode 100644
index 0000000000..c08d60a543
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longlong_p.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_p.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longlong_prod_to_all.3in b/oshmem/shmem/man/man3/shmem_longlong_prod_to_all.3in
new file mode 100644
index 0000000000..39b196d082
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longlong_prod_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_prod_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_longlong_put.3in b/oshmem/shmem/man/man3/shmem_longlong_put.3in
new file mode 100644
index 0000000000..e3ca73d483
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longlong_put.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_put.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longlong_sum_to_all.3in b/oshmem/shmem/man/man3/shmem_longlong_sum_to_all.3in
new file mode 100644
index 0000000000..f75a494841
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longlong_sum_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_sum_to_all.3
diff --git a/oshmem/shmem/man/man3/shmem_longlong_swap.3in b/oshmem/shmem/man/man3/shmem_longlong_swap.3in
new file mode 100644
index 0000000000..a038a3f31e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longlong_swap.3in
@@ -0,0 +1 @@
+.so man3/shmem_swap.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_longlong_wait.3in b/oshmem/shmem/man/man3/shmem_longlong_wait.3in
new file mode 100644
index 0000000000..03267ffbc5
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longlong_wait.3in
@@ -0,0 +1 @@
+.so man3/shmem_wait.3
diff --git a/oshmem/shmem/man/man3/shmem_longlong_wait_until.3in b/oshmem/shmem/man/man3/shmem_longlong_wait_until.3in
new file mode 100644
index 0000000000..03267ffbc5
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longlong_wait_until.3in
@@ -0,0 +1 @@
+.so man3/shmem_wait.3
diff --git a/oshmem/shmem/man/man3/shmem_longlong_xor_to_all.3in b/oshmem/shmem/man/man3/shmem_longlong_xor_to_all.3in
new file mode 100644
index 0000000000..cd2d696b96
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_longlong_xor_to_all.3in
@@ -0,0 +1 @@
+.so man3/shmem_short_xor_all.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_pe_accessible.3in b/oshmem/shmem/man/man3/shmem_pe_accessible.3in
new file mode 100644
index 0000000000..66b38b9224
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_pe_accessible.3in
@@ -0,0 +1,54 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_PE\\_ACCESSIBLE" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+shmem_pe_accessible \- Determines whether a processing element (PE) is accessible via
+SHMEM data transfer operations.
+.SH SYNOPSIS
+
+C:
+.Vb
+#include <mpp/shmem.h>
+
+int shmem_pe_accessible(int pe);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+LOGICAL LOG, SHMEM_PE_ACCESSIBLE
+INTEGER pe
+
+LOG = SHMEM_PE_ACCESSIBLE(pe)
+.Ve
+.SH DESCRIPTION
+
+shmem_pe_accessible returns a value that indicates whether the calling PE is able to perform
+OpenSHMEM communication operations with the remote PE.
+.SH RETURN VALUES
+
+.TP
+C/C++
+The return value is 1 if the specified PE is a valid remote PE for SHMEM functions;
+otherwise,it is 0.
+.TP
+Fortran
+The return value is \&.TRUE. if the specified PE is a valid remote PE for SHMEM
+functions; otherwise, it is \&.FALSE..
+.PP
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3),
+\fIshmem_addr_accessible\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_ptr.3in b/oshmem/shmem/man/man3/shmem_ptr.3in
new file mode 100644
index 0000000000..bfc1a0170c
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_ptr.3in
@@ -0,0 +1,129 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_PTR" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_ptr\fP(3)
+\- Returns a pointer to a data object on a specified processing element
+(PE).
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void *shmem_ptr(void *target, int pe);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+POINTER (PTR, POINTEE)
+INTEGER pe
+
+PTR = SHMEM_PTR(target, pe)
+.Ve
+.SH DESCRIPTION
+
+The shmem_ptr routine returns an address that can be used to directly reference
+\fBtarget\fP
+on the remote PE \fBpe\fP\&.
+With this address we can perform ordinary loads
+and stores to the remote address.
+.PP
+When a sequence of loads (gets) and stores (puts) to a data object on a remote PE does not
+match the access pattern provided in a SHMEM data transfer routine like
+\fIshmem_put32\fP(3)
+or \fIshmem_real_iget\fP(3),
+the shmem_ptr function can
+provide an efficient means to accomplish the communication.
+.PP
+The arguments are as follows:
+.TP
+target
+The symmetric data object to be referenced.
+.TP
+pe
+An integer that indicates the PE number on which target is to be accessed. If you
+are using Fortran, it must be a default integer value.
+.PP
+.SH EXAMPLES
+
+This Fortran program calls shmem_ptr and then PE 0 writes to the BIGD array on PE 1:
+.Vb
+PROGRAM REMOTEWRITE
+  INCLUDE 'mpp/shmem.fh'
+
+  INTEGER BIGD(100)
+  SAVE BIGD
+  INTEGER POINTEE(*)
+
+  POINTER (PTR,POINTEE)
+  CALL START_PES(0)
+  IF (MY_PE() .EQ. 0) THEN
+                             ! initialize PE 1's BIGD array
+    PTR = SHMEM_PTR(BIGD, 1) ! get address of PE 1's BIGD
+                             ! array
+    DO I=1,100
+      POINTEE(I) = I
+    ENDDO
+  ENDIF
+  CALL SHMEM_BARRIER_ALL
+  IF (MY_PE() .EQ. 1) THEN
+    PRINT *, 'BIGD on PE 1 is: '
+    PRINT *, BIGD
+  ENDIF
+END
+.Ve
+This is the equivalent program written in C:
+.Vb
+#include <mpp/shmem.h>
+main()
+{
+  static int bigd[100];
+  int *ptr;
+  int i;
+
+  start_pes(0);
+  if (_my_pe() == 0) {
+  /* initialize PE 1's bigd array */
+    ptr = shmem_ptr(bigd, 1);
+    for (i=0; i<100; i++)
+      *ptr++ = i+1;
+  }
+  shmem_barrier_all();
+  if (_my_pe() == 1) {
+    printf("bigd on PE 1 is:\\n");
+    for (i=0; i<100; i++)
+      printf(" %d\\n",bigd[i]);
+    printf("\\n");
+  }
+}
+.Ve
+.SH NOTES
+
+The shmem_ptr function is available only on systems where ordinary memory loads and
+stores are used to implement SHMEM put and get operations.
+.PP
+.SH RETURN VALUES
+
+shmem_ptr returns a pointer to the data object on the specified remote PE. If target is not
+remotely accessible, a NULL pointer is returned.
+.PP
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3),
+\fIshmem_put\fP(3),
+\fIshmem_get\fP(3)
+.PP
diff --git a/oshmem/shmem/man/man3/shmem_put128.3in b/oshmem/shmem/man/man3/shmem_put128.3in
new file mode 100644
index 0000000000..e3ca73d483
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_put128.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_put.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_put32.3in b/oshmem/shmem/man/man3/shmem_put32.3in
new file mode 100644
index 0000000000..e3ca73d483
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_put32.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_put.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_put64.3in b/oshmem/shmem/man/man3/shmem_put64.3in
new file mode 100644
index 0000000000..e3ca73d483
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_put64.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_put.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_putmem.3in b/oshmem/shmem/man/man3/shmem_putmem.3in
new file mode 100644
index 0000000000..e3ca73d483
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_putmem.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_put.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_quiet.3in b/oshmem/shmem/man/man3/shmem_quiet.3in
new file mode 100644
index 0000000000..cd92ae1794
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_quiet.3in
@@ -0,0 +1,84 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_QUIET" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_quiet\fP(3)
+\- Waits for completion of all outstanding remote writes issued by a
+processing element (PE).
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void shmem_quiet(void);
+.Ve
+Fortran:
+.Vb
+CALL SHMEM_QUIET
+.Ve
+.SH DESCRIPTION
+
+shmem_quiet ensures ordering of put (remote write) operations. All put operations issued to
+any processing element (PE) prior to the call to shmem_quiet are guaranteed to be visible to
+all other PEs no later than any subsequent memory load or store, remote put or get, or
+synchronization operations that follow the call to shmem_quiet.
+.SH NOTES
+
+shmem_quiet is most useful as a way of ensuring ordering of delivery of several put
+operations. For example, you might use shmem_quiet to await delivery of a block of data
+before issuing another put, which sets a completion flag on another PE.
+.br
+shmem_quiet is not usually needed if \fIshmem_barrier_all\fP(3)
+or
+\fIshmem_barrier\fP(3)
+are called. The barrier routines all wait for the completion of
+outstanding remote writes (puts).
+.SH EXAMPLES
+
+.Vb
+PROGRAM COMPFLAG
+  INCLUDE "mpp/shmem.fh"
+
+  INTEGER FLAG_VAR, ARRAY(100), RECEIVER, SENDER
+  COMMON/FLAG/FLAG_VAR
+  COMMON/DATA/ARRAY
+  INTRINSIC MY_PE
+
+  FLAG_VAR = 0
+  CALL SHMEM_BARRIER_ALL ! wait for FLAG_VAR to be initialized
+  SENDER = 0                        ! PE 0 sends the data
+  RECEIVER = 1                      ! PE 1 receives the data
+
+  IF (MY_PE() .EQ. 0) THEN
+    ARRAY = 33
+    CALL SHMEM_PUT(ARRAY, ARRAY, 100, RECEIVER) ! start sending data
+    CALL SHMEM_QUIET                ! wait for delivery
+    CALL SHMEM_PUT(FLAG_VAR, 1, 1, RECEIVER) ! send completion flag
+  ELSE IF (MY_PE() .EQ. RECEIVER) THEN
+    CALL SHMEM_UDCFLUSH
+    CALL SHMEM_WAIT(FLAG_VAR, 0)
+    PRINT *,ARRAY                       ! ARRAY has been delivered
+  ENDIF
+END
+.Ve
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3),
+\fIshmem_barrier\fP(3),
+\fIshmem_barrier_all\fP(3),
+\fIshmem_fence\fP(3),
+\fIshmem_put\fP(3),
+\fIshmem_wait\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_set_cache_inv.3in b/oshmem/shmem/man/man3/shmem_set_cache_inv.3in
new file mode 100644
index 0000000000..4a6a361ef9
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_set_cache_inv.3in
@@ -0,0 +1 @@
+.so man3/shmem_udcflush.3
diff --git a/oshmem/shmem/man/man3/shmem_set_cache_line_inv.3in b/oshmem/shmem/man/man3/shmem_set_cache_line_inv.3in
new file mode 100644
index 0000000000..4a6a361ef9
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_set_cache_line_inv.3in
@@ -0,0 +1 @@
+.so man3/shmem_udcflush.3
diff --git a/oshmem/shmem/man/man3/shmem_set_lock.3in b/oshmem/shmem/man/man3/shmem_set_lock.3in
new file mode 100644
index 0000000000..750e83a21b
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_set_lock.3in
@@ -0,0 +1,78 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_LOCK" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_set_lock\fP(3),
+\fIshmem_clear_lock\fP(3),
+\fIshmem_test_lock\fP(3)
+\- Releases, locks, and tests a mutual exclusion memory lock.
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void shmem_clear_lock(long *lock);
+
+void shmem_set_lock(long *lock);
+
+int shmem_test_lock(long *lock);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER lock, SHMEM_TEST_LOCK
+
+CALL SHMEM_CLEAR_LOCK(lock)
+
+CALL SHMEM_SET_LOCK(lock)
+
+I = SHMEM_TEST_LOCK(lock)
+.Ve
+.SH DESCRIPTION
+
+The shmem_set_lock routine sets a mutual exclusion lock after waiting for the lock to be
+freed by any other PE currently holding the lock. Waiting PEs are assured of getting the lock
+in a first\-come, first\-served manner.
+.PP
+The shmem_clear_lock routine releases a lock previously set by shmem_set_lock after
+ensuring that all local and remote stores initiated in the critical region are complete.
+.PP
+The shmem_test_lock function sets a mutual exclusion lock only if it is currently cleared.
+By using this function, a PE can avoid blocking on a set lock. If the lock is currently set, the
+function returns without waiting.
+These routines are appropriate for protecting a critical region from simultaneous update by
+multiple PEs.
+They accept the following arguments:
+.TP
+lock
+A symmetric data object that is a scalar variable or an array of length 1. This
+data object must be set to 0 on all processing elements (PEs) prior to the first use. lock must
+be of type integer. If you are using Fortran, it must be of default kind.
+.PP
+.SH NOTES
+
+The term symmetric data object is defined on \fIintro_shmem\fP(3)\&.
+.PP
+.SH RETURN VALUES
+
+The shmem_test_lock function returns 0 if the lock was originally cleared and this call
+was able to set the lock. A value of 1 is returned if the lock had been set and the call returned
+without waiting to set the lock.
+.PP
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_short_and_to_all.3in b/oshmem/shmem/man/man3/shmem_short_and_to_all.3in
new file mode 100644
index 0000000000..d9204fcfe2
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_short_and_to_all.3in
@@ -0,0 +1,206 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_AND" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_int_and_to_all\fP(3),
+\fIshmem_int4_and_to_all\fP(3),
+\fIshmem_int8_and_to_all\fP(3),
+\fIshmem_long_and_to_all\fP(3),
+\fIshmem_longlong_and_to_all\fP(3),
+\fIshmem_short_and_to_all\fP(3)
+\- Performs a bitwise AND operation on symmetric
+arrays over the active set of PEs.
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void shmem_int_and_to_all(int *target, int *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  int *pWrk, long *pSync);
+
+void shmem_long_and_to_all(long *target, long *source,
+  int nreduce, int PE_start, int  logPE_stride, int PE_size,
+  long *pWrk, long *pSync);
+
+void shmem_longlong_and_to_all(long long *target,
+  long long *source, int nreduce, int PE_start, int logPE_stride,
+  int PE_size, long long *pWrk, long *pSync);
+
+void shmem_short_and_to_all(short *target, short *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  short *pWrk, long *pSync);
+
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER pSync(SHMEM_REDUCE_SYNC_SIZE)
+INTEGER nreduce, PE_start, logPE_stride, PE_size
+
+CALL SHMEM_INT4_AND_TO_ALL(target, source, nreduce,
+& PE_start, logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_INT8_AND_TO_ALL(target, source, nreduce,
+& PE_start, logPE_stride, PE_size, pWrk, pSync)
+.Ve
+.PP
+.SH DESCRIPTION
+
+The shared memory (SHMEM) reduction routines compute one or more reductions across
+symmetric arrays on multiple virtual PEs. A reduction performs an associative binary
+operation across a set of values. For a list of other SHMEM reduction routines, see
+\fIintro_shmem\fP(3)\&.
+.PP
+The nreduce argument determines the number of separate reductions to perform. The source
+array on all PEs in the active set provides one element for each reduction. The results of the
+reductions are placed in the target array on all PEs in the active set. The active set is defined
+by the PE_start, logPE_stride, PE_size triplet.
+.PP
+The source and target arrays may be the same array, but they may not be overlapping arrays.
+As with all SHMEM collective routines, each of these routines assumes that only PEs in the
+active set call the routine. If a PE not in the active set calls a SHMEM collective routine,
+undefined behavior results.
+.PP
+The arguments are as follows:
+.TP
+target
+A symmetric array, of length nreduce elements, to receive the result of the
+reduction operations. The data type of target varies with the version of the reduction routine
+being called. When calling from C/C++, refer to the SYNOPSIS section for data type
+information. When calling from Fortran, the target data types are as follows:
+.RS
+.TP
+\fBshmem_int8_and_to_all\fP: Integer, with an element size of 8 bytes
+.TP
+\fBshmem_int4_and_to_all\fP: Integer, with an element size of 4 bytes
+.RE
+.RS
+.PP
+.RE
+.TP
+source
+A symmetric array, of length nreduce elements, that contains one element for
+each separate reduction operation. The source argument must have the same data type as
+target.
+.TP
+nreduce
+The number of elements in the target and source arrays. nreduce must be of
+type integer. If you are using Fortran, it must be a default integer value.
+.TP
+PE_start
+The lowest virtual PE number of the active set of PEs. PE_start must be of
+type integer. If you are using Fortran, it must be a default integer value.
+.TP
+logPE_stride
+The log (base 2) of the stride between consecutive virtual PE numbers in
+the active set. logPE_stride must be of type integer. If you are using Fortran, it must be a
+default integer value.
+.TP
+PE_size
+The number of PEs in the active set. PE_size must be of type integer. If you
+are using Fortran, it must be a default integer value.
+.TP
+pWrk
+A symmetric work array. The pWrk argument must have the same data type as
+target. In C/C++, this contains max(nreduce/2 + 1,
+_SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements. In Fortran, this contains
+max(nreduce/2 + 1, SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements.
+.TP
+pSync
+A symmetric work array. In C/C++, pSync must be of type long and size
+_SHMEM_REDUCE_SYNC_SIZE. In Fortran, pSync must be of type integer and size
+SHMEM_REDUCE_SYNC_SIZE. If you are using Fortran, it must be a default integer value.
+Every element of this array must be initialized with the value _SHMEM_SYNC_VALUE (in
+C/C++) or SHMEM_SYNC_VALUE (in Fortran) before any of the PEs in the active set enter
+the reduction routine.
+.PP
+The values of arguments nreduce, PE_start, logPE_stride, and PE_size must be equal on all
+PEs in the active set. The same target and source arrays, and the same pWrk and pSync work
+arrays, must be passed to all PEs in the active set.
+.PP
+Before any PE calls a reduction routine, you must ensure that the following conditions exist
+(synchronization via a barrier or some other method is often needed to ensure this): The
+pWrk and pSync arrays on all PEs in the active set are not still in use from a prior call to a
+collective SHMEM routine. The target array on all PEs in the active set is ready to accept the
+results of the reduction.
+.PP
+Upon return from a reduction routine, the following are true for the local PE: The target array
+is updated. The values in the pSync array are restored to the original values.
+.PP
+.SH NOTES
+
+The terms collective, symmetric, and cache aligned are defined in \fIintro_shmem\fP(3)\&.
+All SHMEM reduction routines reset the values in pSync before they return, so a particular
+pSync buffer need only be initialized the first time it is used.
+.PP
+You must ensure that the pSync array is not being updated on any PE in the active set while
+any of the PEs participate in processing of a SHMEM reduction routine. Be careful to avoid the
+following situations: If the pSync array is initialized at run time, some type of
+synchronization is needed to ensure that all PEs in the working set have initialized pSync
+before any of them enter a SHMEM routine called with the pSync synchronization array. A
+pSync or pWrk array can be reused in a subsequent reduction routine call only if none of the
+PEs in the active set are still processing a prior reduction routine call that used the same
+pSync or pWrk arrays. In general, this can be assured only by doing some type of
+synchronization. However, in the special case of reduction routines being called with the
+same active set, you can allocate two pSync and pWrk arrays and alternate between them on
+successive calls.
+.PP
+.SH EXAMPLES
+
+\fBExample 1\fP:
+This Fortran example statically initializes the pSync array and finds the logical AND of the integer variable FOO across all even PEs.
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE)
+DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/
+PARAMETER (NR=1)
+REAL PWRK(MAX(NR/2+1, SHMEM_REDUCE_MIN_WRKDATA_SIZE))
+INTEGER FOO, FOOAND
+COMMON /COM/ FOO, FOOAND, PWRK
+INTRINSIC MY_PE
+
+IF ( MOD(MY_PE(),2) .EQ. 0) THEN
+  CALL SHMEM_INT8_AND_TO_ALL(FOOAND, FOO, NR, 0, 1, N$PES/2,
+  & PWRK, PSYNC)
+  PRINT *, 'Result on PE ', MY_PE(), ' is ', FOOAND
+ENDIF
+.Ve
+\fBExample 2\fP:
+Consider the following C call:
+.Vb
+shmem_int_and_to_all( target, source, 3, 0, 0, 8, pwrk, psync );
+.Ve
+The preceding call is more efficient, but semantically equivalent to, the combination of the
+following calls:
+.Vb
+shmem_int_and_to_all(&(target[0]), &(source[0]), 1, 0, 0, 8,
+  pwrk1, psync1);
+
+shmem_int_and_to_all(&(target[1]), &(source[1]), 1, 0, 0, 8,
+  pwrk2, psync2);
+
+shmem_int_and_to_all(&(target[2]), &(source[2]), 1, 0, 0, 8,
+  pwrk1, psync1);
+.Ve
+Note that two sets of pWrk and pSync arrays are used alternately because no
+synchronization is done between calls.
+.SH SEE ALSO
+
+\fIf90\fP(1),
+\fIintro_shmem\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_short_g.3in b/oshmem/shmem/man/man3/shmem_short_g.3in
new file mode 100644
index 0000000000..d2bbc4ad8d
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_short_g.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_g.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_short_get.3in b/oshmem/shmem/man/man3/shmem_short_get.3in
new file mode 100644
index 0000000000..6d7c165d2e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_short_get.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_get.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_short_iget.3in b/oshmem/shmem/man/man3/shmem_short_iget.3in
new file mode 100644
index 0000000000..5538cd707b
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_short_iget.3in
@@ -0,0 +1,217 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_IGET" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_complex_iget\fP(3),
+\fIshmem_double_iget\fP(3),
+\fIshmem_float_iget\fP(3),
+\fIshmem_iget4\fP(3),
+\fIshmem_iget8\fP(3),
+\fIshmem_iget32\fP(3),
+\fIshmem_iget64\fP(3),
+\fIshmem_iget128\fP(3),
+\fIshmem_int_iget\fP(3),
+\fIshmem_integer_iget\fP(3),
+\fIshmem_logical_iget\fP(3),
+\fIshmem_long_iget\fP(3),
+\fIshmem_longdouble_iget\fP(3),
+\fIshmem_longlong_iget\fP(3),
+\fIshmem_real_iget\fP(3),
+\fIshmem_short_iget\fP(3)
+\- Transfers strided data from a specified processing element (PE)
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+void shmem_iget32(void *target, const void *source,
+  ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe);
+
+void shmem_iget64(void *target, const void *source,
+  ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe);
+
+void shmem_iget128(void *target, const void *source,
+  ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe);
+
+void shmem_int_iget(int *target, const int *source,
+  ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe);
+
+void shmem_double_iget(double *target, const double *source,
+  ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe);
+
+void shmem_float_iget(float *target, const float *source,
+  ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe);
+
+void shmem_long_iget(long *target, const long *source,
+  ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe);
+
+void shmem_longdouble_iget(long double *target,
+  const long double *source, ptrdiff_t tst, ptrdiff_t sst,size_t len, int pe);
+
+void shmem_longlong_iget(long long *target,
+  const long long *source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe);
+
+void shmem_short_iget(short *target,
+  const short *source, ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER tst, sst, len, pe
+
+CALL SHMEM_COMPLEX_IGET(target, source, tst, sst, len,
+& pe)
+
+CALL SHMEM_DOUBLE_IGET(target, source, tst, sst, len,
+& pe)
+
+CALL SHMEM_IGET4(target, source, tst, sst, len, pe)
+
+CALL SHMEM_IGET8(target, source, tst, sst, len, pe)
+
+CALL SHMEM_IGET32(target, source, tst, sst, len, pe)
+
+CALL SHMEM_IGET64(target, source, tst, sst, len, pe)
+
+CALL SHMEM_IGET128(target, source, tst, sst, len, pe)
+
+CALL SHMEM_INTEGER_IGET(target, source, tst, sst, len,
+& pe)
+
+CALL SHMEM_LOGICAL_IGET(target, source, tst, sst, len,
+& pe)
+
+CALL SHMEM_REAL_IGET(target, source, tst, sst, len, pe)
+.Ve
+.SH DESCRIPTION
+
+The strided get routines retrieve array data available at address source on remote PE (pe).
+The elements of the \fBsource\fP
+array are separated by a stride \fBsst\fP\&.
+Once the data is received,
+it is stored at the local memory address \fBtarget\fP,
+separated by stride \fBtst\fP\&.
+The routines return
+when the data has been copied into the local \fBtarget\fP
+array.
+.PP
+The arguments are as follows:
+.TP
+target
+Array to be updated on the local PE.
+.TP
+source
+Array containing the data to be copied on the remote PE.
+.TP
+tst
+The stride between consecutive elements of the target array. The stride is scaled by
+the element size of the target array. A value of 1 indicates contiguous data. tst must be of
+type integer. If you are calling from Fortran, it must be a default integer value.
+.TP
+sst
+The stride between consecutive elements of the source array. The stride is scaled
+by the element size of the source array. A value of 1 indicates contiguous data. sst must be
+of type integer. If you are calling from Fortran, it must be a default integer value.
+.TP
+len
+Number of elements in the target and source arrays. len must be of type integer. If
+you are using Fortran, it must be a constant, variable, or array element of default integer
+type.
+.TP
+pe
+PE number of the remote PE. pe must be of type integer. If you are using Fortran, it
+must be a constant, variable, or array element of default integer type.
+.PP
+The target and source data objects must conform to typing constraints, which are as
+follows:
+.TP
+\fBshmem_iget32, shmem_iget4\fP: Any noncharacter type that has a storage size
+equal to 32 bits.
+.TP
+\fBshmem_iget64, shmem_iget8\fP: Any noncharacter type that has a storage size
+equal to 64 bits.
+.TP
+\fBshmem_iget128\fP: Any noncharacter type that has a storage size equal to
+128 bits.
+.TP
+\fBshmem_short_iget\fP: Elements of type short.
+.TP
+\fBshmem_int_iget\fP: Elements of type int.
+.TP
+\fBshmem_long_iget\fP: Elements of type long.
+.TP
+\fBshmem_longlong_iget\fP: Elements of type long long.
+.TP
+\fBshmem_float_iget\fP: Elements of type float.
+.TP
+\fBshmem_double_iget\fP: Elements of type double.
+.TP
+\fBshmem_longdouble_iget\fP: Elements of type long double.
+.TP
+\fBSHMEM_COMPLEX_IGET\fP: Elements of type complex of default size.
+.TP
+\fBSHMEM_DOUBLE_IGET\fP: (Fortran) Elements of type double precision.
+.TP
+\fBSHMEM_INTEGER_IGET\fP: Elements of type integer.
+.TP
+\fBSHMEM_LOGICAL_IGET\fP: Elements of type logical.
+.TP
+\fBSHMEM_REAL_IGET\fP: Elements of type real.
+.TP
+\fBshmem_longdouble_iget\fP: Elements of type long double.
+.TP
+\fBSHMEM_COMPLEX_IGET\fP: Elements of type complex of default size.
+.TP
+\fBSHMEM_DOUBLE_IGET\fP: (Fortran) Elements of type double precision.
+.TP
+\fBSHMEM_INTEGER_IGET\fP: Elements of type integer.
+.TP
+\fBSHMEM_LOGICAL_IGET\fP: Elements of type logical.
+.TP
+\fBSHMEM_REAL_IGET\fP: Elements of type real.
+.PP
+If you are using Fortran, data types must be of default size. For example, a real variable must
+be declared as REAL, REAL*4, or REAL(KIND=4).
+.PP
+.SH NOTES
+
+See \fIintro_shmem\fP(3)
+for a definition of the term remotely accessible.
+.PP
+.SH EXAMPLES
+
+The following simple example uses shmem_logical_iget in a Fortran program. Compile
+this example with the \-lsma compiler option.
+.Vb
+PROGRAM STRIDELOGICAL
+  LOGICAL SOURCE(10), TARGET(5)
+  SAVE SOURCE ! SAVE MAKES IT REMOTELY ACCESSIBLE
+  DATA SOURCE /.T.,.F.,.T.,.F.,.T.,.F.,.T.,.F.,.T.,.F./
+  DATA TARGET / 5*.F. /
+
+  CALL START_PES(2)
+  IF (MY_PE() .EQ. 0) THEN
+    CALL SHMEM_LOGICAL_IGET(TARGET, SOURCE, 1, 2, 5, 1)
+    PRINT*,'TARGET AFTER SHMEM_LOGICAL_IGET:',TARGET
+  ENDIF
+  CALL SHMEM_BARRIER_ALL
+END
+.Ve
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3),
+\fIshmem_get\fP(3),
+\fIshmem_quiet\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_short_iput.3in b/oshmem/shmem/man/man3/shmem_short_iput.3in
new file mode 100644
index 0000000000..6582c084ed
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_short_iput.3in
@@ -0,0 +1,220 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_IPUT" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_complex_iput\fP(3),
+\fIshmem_double_iput\fP(3),
+\fIshmem_float_iput\fP(3),
+\fIshmem_int_iput\fP(3),
+\fIshmem_integer_iput\fP(3),
+\fIshmem_iput4\fP(3),
+\fIshmem_iput8\fP(3),
+\fIshmem_iput32\fP(3),
+\fIshmem_iput64\fP(3),
+\fIshmem_iput128\fP(3),
+\fIshmem_logical_iput\fP(3),
+\fIshmem_long_iput\fP(3),
+\fIshmem_longdouble_iput\fP(3),
+\fIshmem_longlong_iput\fP(3),
+\fIshmem_real_iput\fP(3),
+\fIshmem_short_iput\fP(3)
+\- Transfer strided data to a specified processing element (PE).
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void shmem_double_iput(double *target, const double *source,
+  ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe);
+
+void shmem_float_iput(float *target, const float *source,
+  ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe);
+
+void shmem_int_iput(int *target, const int *source,
+  ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe);
+
+void shmem_iput32(void *target, const void *source,
+  ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe);
+
+void shmem_iput64(void *target, const void *source,
+  ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe);
+
+void shmem_iput128(void *target, const void *source,
+  ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe);
+
+void shmem_long_iput(long *target, const long *source,
+  ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe);
+
+void shmem_longdouble_iput(long double *target,
+  const long double *source, ptrdiff_t tst, ptrdiff_t sst,
+  size_t len, int pe);
+
+void shmem_longlong_iput(long long *target,
+  const long long *source, ptrdiff_t tst, ptrdiff_t sst,
+  size_t len, int pe);
+
+void shmem_short_iput(short *target, const short *source,
+  ptrdiff_t tst, ptrdiff_t sst, size_t len, int pe);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER tst, sst, len, pe
+
+CALL SHMEM_COMPLEX_IPUT(target, source, tst, sst, len,
+& pe)
+
+CALL SHMEM_DOUBLE_IPUT(target, source, tst, sst, len,
+& pe)
+
+CALL SHMEM_INTEGER_IPUT(target, source, tst, sst, len,
+& pe)
+
+CALL SHMEM_IPUT4(target, source, tst, sst, len, pe)
+
+CALL SHMEM_IPUT8(target, source, tst, sst, len, pe)
+
+CALL SHMEM_IPUT32(target, source, tst, sst, len, pe)
+
+CALL SHMEM_IPUT64(target, source, tst, sst, len, pe)
+
+CALL SHMEM_IPUT128(target, source, tst, sst, len, pe)
+
+CALL SHMEM_LOGICAL_IPUT(target, source, tst, sst, len,
+& pe)
+
+CALL SHMEM_REAL_IPUT(target, source, tst, sst, len, pe)
+.Ve
+.PP
+.SH DESCRIPTION
+
+The shmem_iput routines read the elements of a local array (\fBsource\fP)
+and write them
+to a remote array (\fBtarget\fP)
+on the PE indicated by \fBpe\fP\&.
+These routines return
+when the data has been copied out of the source array on the local PE but not necessarily
+before the data has been delivered to the remote data object.
+.PP
+The arguments are as follows:
+.TP
+target
+Array to be updated on the remote PE. This data object must be remotely
+accessible.
+.TP
+source
+Array containing the data to be copied.
+.TP
+tst
+The stride between consecutive elements of the target array. The stride is scaled by
+the element size of the target array. A value of 1 indicates contiguous data. tst must be of
+type integer. If you are using Fortran, it must be a default integer value.
+.TP
+sst
+The stride between consecutive elements of the source array. The stride is scaled
+by the element size of the source array. A value of 1 indicates contiguous data. sst must be
+of type integer. If you are using Fortran, it must be a default integer value.
+.TP
+len
+Number of elements in the target and source arrays. len must be of type integer. If
+you are using Fortran, it must be a constant, variable, or array element of default integer
+type.
+.TP
+pe
+PE number of the remote PE. pe must be of type integer. If you are using Fortran, it
+must be a constant, variable, or array element of default integer type.
+.PP
+The target and source data objects must conform to typing constraints, which are as follows:
+.PP
+.TP
+\fBshmem_iput32, shmem_iput4\fP: Any noncharacter type that has a storage size equal
+to 32 bits.
+.TP
+\fBshmem_iput64, shmem_iput8\fP: Any noncharacter type that has a storage size equal
+to 64 bits.
+.TP
+\fBshmem_iput128\fP: Any noncharacter type that has a storage size equal to 128 bits.
+.TP
+\fBshmem_short_iput\fP: Elements of type short.
+.TP
+\fBshmem_int_iput\fP: Elements of type int.
+.TP
+\fBshmem_long_iput\fP: Elements of type long.
+.TP
+\fBshmem_longlong_iput\fP: Elements of type long long.
+.TP
+\fBshmem_float_iput\fP: Elements of type float.
+.TP
+\fBshmem_double_iput\fP: Elements of type double.
+.TP
+\fBshmem_longdouble_iput\fP: Elements of type long double.
+.TP
+\fBSHMEM_COMPLEX_IPUT\fP: Elements of type complex of default size.
+.TP
+\fBSHMEM_DOUBLE_IPUT\fP: (Fortran) Elements of type double precision.
+.TP
+\fBSHMEM_INTEGER_IPUT\fP: Elements of type integer.
+.TP
+\fBSHMEM_LOGICAL_IPUT\fP: Elements of type logical.
+.TP
+\fBSHMEM_REAL_IPUT\fP: Elements of type real.
+.TP
+\fBSHMEM_LOGICAL_IPUT\fP: Elements of type logical.
+.TP
+\fBSHMEM_REAL_IPUT\fP: Elements of type real.
+.PP
+If you are using Fortran, data types must be of default size. For example, a real variable must
+be declared as REAL, REAL*4 or REAL(KIND=4).
+.PP
+.SH NOTES
+
+See \fIintro_shmem\fP(3)
+for a definition of the term remotely accessible.
+.PP
+.SH EXAMPLES
+
+Consider the following simple shmem_long_iput example for C/C++ programs.
+.Vb
+#include <mpp/shmem.h>
+
+main()
+{
+  short source[10] = { 1, 2, 3, 4, 5,
+  6, 7, 8, 9, 10 };
+  static short target[10];
+
+  start_pes(2);
+  if (_my_pe() == 0) {
+    /* put 10 words into target on PE 1 */
+    shmem_short_iput(target, source, 1, 2, 5, 1);
+  }
+  shmem_barrier_all(); /* sync sender and receiver */
+  if (_my_pe() == 1) {
+    shmem_udcflush(); /* not required on IRIX systems */
+    printf("target on PE %d is %d %d %d %d %d0, _my_pe(),
+    (int)target[0], (int)target[1], (int)target[2],
+    (int)target[3], (int)target[4] );
+  }
+  shmem_barrier_all(); /* sync before exiting */
+}
+.Ve
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3),
+\fIshmem_iget\fP(3),
+\fIshmem_put\fP(3),
+\fIshmem_quiet\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_short_max_to_all.3in b/oshmem/shmem/man/man3/shmem_short_max_to_all.3in
new file mode 100644
index 0000000000..760dd45eec
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_short_max_to_all.3in
@@ -0,0 +1,238 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_MAX" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_double_max_to_all\fP(3),
+\fIshmem_float_max_to_all\fP(3),
+\fIshmem_int_max_to_all\fP(3),
+\fIshmem_int4_max_to_all\fP(3),
+\fIshmem_int8_max_to_all\fP(3),
+\fIshmem_long_max_to_all\fP(3),
+\fIshmem_longdouble_max_to_all\fP(3),
+\fIshmem_longlong_max_to_all\fP(3),
+\fIshmem_real4_max_to_all\fP(3),
+\fIshmem_real8_max_to_all\fP(3),
+\fIshmem_real16_max_to_all\fP(3),
+\fIshmem_short_max_to_all\fP(3)
+\- Performs a maximum function reduction across a set of processing elements (PEs).
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void shmem_double_max_to_all(double *target, double *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  double *pWrk, long *pSync);
+
+void shmem_float_max_to_all(float *target, float *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  float *pWrk, long *pSync);
+
+void shmem_int_max_to_all(int *target, int *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  int *pWrk, long *pSync);
+
+void shmem_long_max_to_all(long *target, long *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  long *pWrk, long *pSync);
+
+void shmem_longdouble_max_to_all(long double *target,
+  long double *source, int nreduce, int PE_start,
+  int logPE_stride, int PE_size, long double *pWrk, long *pSync);
+
+void shmem_longlong_max_to_all(long long *target,
+  long long *source, int nreduce,  int PE_start,
+  int logPE_stride, int PE_size, long long *pWrk, long *pSync);
+
+ void shmem_short_max_to_all(short *target, short *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  short *pWrk, long *pSync);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER pSync(SHMEM_REDUCE_SYNC_SIZE)
+
+INTEGER nreduce, PE_start, logPE_stride, PE_size
+
+CALL SHMEM_INT4_MAX_TO_ALL(target, source, nreduce,
+& PE_start, logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_INT8_MAX_TO_ALL(target, source, nreduce,
+& PE_start, logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_REAL4_MAX_TO_ALL(target, source, nreduce,
+& PE_start, logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_REAL8_MAX_TO_ALL(target, source, nreduce,
+& PE_start, logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_REAL16_MAX_TO_ALL(target, source, nreduce,
+& PE_start, logPE_stride, PE_size, pWrk, pSync)
+.Ve
+.SH DESCRIPTION
+
+The shared memory (SHMEM) reduction routines compute one or more reductions across
+symmetric arrays on multiple virtual PEs. A reduction performs an associative binary
+operation across a set of values. For a list of other SHMEM reduction routines, see
+\fIintro_shmem\fP(3)\&.
+.PP
+As with all SHMEM collective routines, each of these routines assumes that only PEs in the
+active set call the routine. If a PE not in the active set calls a SHMEM collective routine,
+undefined behavior results.
+.PP
+The nreduce argument determines the number of separate reductions to perform. The source
+array on all PEs in the active set provides one element for each reduction. The results of the
+reductions are placed in the target array on all PEs in the active set. The active set is defined
+by the PE_start, logPE_stride, PE_size triplet.
+.PP
+The source and target arrays may be the same array, but they may not be overlapping arrays.
+.PP
+The arguments are as follows:
+.TP
+target
+A symmetric array of length nreduce elements to receive the results of the
+reduction operations. The data type of target varies with the version of the reduction routine
+being called. When calling from C, refer to the SYNOPSIS section for data type information.
+.PP
+When calling from Fortran, the target data types are as follows:
+.RS
+.TP
+\fBshmem_comp8_max_to_all\fP: Complex, with an element size equal to two
+8\-byte real values.
+.TP
+\fBshmem_int4_max_to_all\fP: Integer, with an element size of 4 bytes.
+.TP
+\fBshmem_int8_max_to_all\fP: Integer, with an element size of 8 bytes.
+.TP
+\fBshmem_real4_max_to_all\fP: Real, with an element size of 4 bytes.
+.TP
+\fBshmem_real16_max_to_all\fP: Real, with an element size of 16 bytes.
+.RE
+.RS
+.PP
+.RE
+.TP
+source
+A symmetric array of length nreduce elements that contains one element for
+each separate reduction operation. The source argument must have the same data type as
+target.
+.TP
+nreduce
+The number of elements in the target and source arrays. nreduce must be of
+type integer. If you are using Fortran, it must be a default integer value.
+.TP
+PE_start
+The lowest virtual PE number of the active set of PEs. PE_start must be of
+type integer. If you are using Fortran, it must be a default integer value.
+.TP
+logPE_stride
+The log (base 2) of the stride between consecutive virtual PE numbers in
+the active set. logPE_stride must be of type integer. If you are using Fortran, it must be a
+default integer value.
+.TP
+PE_size
+The number of PEs in the active set. PE_size must be of type integer. If you
+are using Fortran, it must be a default integer value.
+.TP
+pWrk
+A symmetric work array. The pWrk argument must have the same data type as
+target. In C/C++, this contains max(nreduce/2 + 1,
+_SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements. In Fortran, this contains
+max(nreduce/2 + 1, SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements.
+.TP
+pSync
+A symmetric work array. In C/C++, pSync is of type long and size
+_SHMEM_REDUCE_SYNC_SIZE. In Fortran, pSync is of type integer and size
+SHMEM_REDUCE_SYNC_SIZE. If you are using Fortran, it must be a default integer value.
+Every element of this array must be initialized with the value _SHMEM_SYNC_VALUE (in
+C/C++) or SHMEM_SYNC_VALUE (in Fortran) before any of the PEs in the active set enter
+the reduction routine.
+.PP
+The values of arguments nreduce, PE_start, logPE_stride, and PE_size must be equal on all
+PEs in the active set. The same target and source arrays, and the same pWrk and pSync work
+arrays, must be passed to all PEs in the active set.
+.PP
+Before any PE calls a reduction routine, you must ensure that the following conditions exist
+(synchronization via a barrier or some other method is often needed to ensure this): The
+pWrk and pSync arrays on all PEs in the active set are not still in use from a prior call to a
+collective SHMEM routine. The target array on all PEs in the active set is ready to accept the
+results of the reduction.
+.PP
+Upon return from a reduction routine, the following are true for the local PE: The target array
+is updated. The values in the pSync array are restored to the original values.
+.PP
+.SH NOTES
+
+The terms collective, symmetric, and cache aligned are defined in \fIintro_shmem\fP(3)\&.
+All SHMEM reduction routines reset the values in pSync before they return, so a particular
+pSync buffer need only be initialized the first time it is used.
+.PP
+You must ensure that the pSync array is not being updated on any PE in the active set while
+any of the PEs participate in processing of a SHMEM reduction routine. Be careful of the
+following situations: If the pSync array is initialized at run time, some type of
+synchronization is needed to ensure that all PEs in the working set have initialized pSync
+before any of them enter a SHMEM routine called with the pSync synchronization array. A
+pSync or pWrk array can be reused in a subsequent reduction routine call only if none
+of the PEs in the active set are still processing a prior reduction routine call that used the
+same pSync or pWrk arrays.
+.PP
+In general, this can be assured only by doing some type of synchronization. However, in the
+special case of reduction routines being called with the same active set, you can allocate two
+pSync and pWrk arrays and alternate between them on successive calls.
+.PP
+.SH EXAMPLES
+
+\fBExample 1:\fP
+This Fortran example statically initializes the pSync array and finds the
+maximum value of real variable FOO across all even PEs.
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE)
+DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/
+PARAMETER (NR=1)
+REAL FOO, FOOMAX, PWRK(MAX(NR/2+1,SHMEM_REDUCE_MIN_WRKDATA_SIZE))
+COMMON /COM/ FOO, FOOMAX, PWRK
+INTRINSIC MY_PE
+
+IF ( MOD(MY_PE(),2) .EQ. 0) THEN
+  CALL SHMEM_REAL8_MAX_TO_ALL(FOOMAX, FOO, NR, 0, 1, N$PES/2,
+  & PWRK, PSYNC)
+  PRINT *, 'Result on PE ', MY_PE(), ' is ', FOOMAX
+ENDIF
+.Ve
+\fBExample 2:\fP
+Consider the following C/C++ call:
+.Vb
+shmem_int_max_to_all( target, source, 3, 0, 0, 8, pwrk, psync );
+.Ve
+The preceding call is more efficient, but semantically equivalent to, the combination of the
+following calls:
+.Vb
+shmem_int_max_to_all(&(target[0]), &(source[0]), 1, 0, 0, 8,
+  pwrk1, psync1);
+shmem_int_max_to_all(&(target[1]), &(source[1]), 1, 0, 0, 8,
+  pwrk2, psync2);
+shmem_int_max_to_all(&(target[2]), &(source[2]), 1, 0, 0, 8,
+  pwrk1, psync1);
+.Ve
+Note that two sets of pWrk and pSync arrays are used alternately because no synchronization
+is done between calls.
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_short_min_to_all.3in b/oshmem/shmem/man/man3/shmem_short_min_to_all.3in
new file mode 100644
index 0000000000..8bdaae4e9b
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_short_min_to_all.3in
@@ -0,0 +1,234 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_MIN" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_double_min_to_all\fP(3),
+\fIshmem_float_min_to_all\fP(3),
+\fIshmem_int_min_to_all\fP(3),
+\fIshmem_int4_min_to_all\fP(3),
+\fIshmem_int8_min_to_all\fP(3),
+\fIshmem_long_min_to_all\fP(3),
+\fIshmem_longdouble_min_to_all\fP(3),
+\fIshmem_longlong_min_to_all\fP(3),
+\fIshmem_real4_min_to_all\fP(3),
+\fIshmem_real8_min_to_all\fP(3),
+\fIshmem_real16_min_to_all\fP(3),
+\fIshmem_short_min_to_all\fP(3)
+\- Performs a minimum function reduction across a set of processing elements (PEs)
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void shmem_double_min_to_all(double *target, double *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  double *pWrk, long *pSync);
+
+void shmem_float_min_to_all(float *target, float *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  float *pWrk, long *pSync);
+
+void shmem_int_min_to_all(int *target, int *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  int *pWrk, long *pSync);
+
+void shmem_long_min_to_all(long *target, long *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  long *pWrk, long *pSync);
+
+void shmem_longdouble_min_to_all(long double *target,
+  long double *source, int nreduce, int PE_start,
+  int logPE_stride, int PE_size, long double *pWrk,
+  long *pSync);
+
+void shmem_longlong_min_to_all(long long *target,
+  long long *source, int nreduce, int PE_start, int logPE_stride,
+  int PE_size, long long *pWrk, long *pSync);
+
+void shmem_short_min_to_all(short *target, short *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  short *pWrk, long *pSync);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER pSync(SHMEM_REDUCE_SYNC_SIZE)
+INTEGER nreduce, PE_start, logPE_stride, PE_size
+
+CALL SHMEM_INT4_MIN_TO_ALL(target, source, nreduce, PE_start,
+& logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_INT8_MIN_TO_ALL(target, source, nreduce, PE_start,
+& logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_REAL4_MIN_TO_ALL(target, source, nreduce, PE_start,
+& logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_REAL8_MIN_TO_ALL(target, source, nreduce, PE_start,
+& logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_REAL16_MIN_TO_ALL(target, source, nreduce, PE_start,
+& logPE_stride, PE_size, pWrk, pSync)
+.Ve
+.SH DESCRIPTION
+
+The shared memory (SHMEM) reduction routines compute one or more reductions across
+symmetric arrays on multiple virtual PEs. A reduction performs an associative binary
+operation across a set of values. For a list of other SHMEM reduction routines, see
+\fIintro_shmem\fP(3)\&.
+.PP
+As with all SHMEM collective routines, each of these routines assumes that only PEs in the
+active set call the routine. If a PE not in the active set calls a SHMEM collective routine,
+undefined behavior results.
+.PP
+The nreduce argument determines the number of separate reductions to perform. The source
+array on all PEs in the active set provides one element for each reduction. The results of the
+reductions are placed in the target array on all PEs in the active set. The active set is defined
+by the PE_start, logPE_stride, PE_size triplet.
+.PP
+The source and target arrays may be the same array, but they may not be overlapping arrays.
+.PP
+The arguments are as follows:
+.TP
+target
+A symmetric array of length nreduce elements to receive the results of the
+reduction operations. The data type of target varies with the version of the reduction routine
+being called. When calling from C/C++, refer to the SYNOPSIS section for data type
+information. When calling from Fortran, the target data types are as follows:
+.RS
+.TP
+\fBshmem_int4_min_to_all\fP: Integer, with an element size of 4 bytes
+.TP
+\fBshmem_int8_min_to_all\fP: Integer, with an element size of 8 bytes
+.TP
+\fBshmem_real4_min_to_all\fP: Real, with an element size of 4 bytes
+.TP
+\fBshmem_real8_min_to_all\fP: Real, with an element size of 8 bytes
+.TP
+\fBshmem_real16_min_to_all\fP: Real, with an element size of 16 bytes
+.TP
+\fBsource A symmetric array\fP: of length nreduce elements, that contains one
+element for each separate reduction operation. The source argument must have the same
+data type as target.
+.RE
+.RS
+.PP
+.RE
+.TP
+nreduce
+The number of elements in the target and source arrays. nreduce must be of
+type integer. If you are using Fortran, it must be a default integer value.
+.TP
+PE_start
+The lowest virtual PE number of the active set of PEs. PE_start must be of
+type integer. If you are using Fortran, it must be a default integer value.
+.TP
+logPE_stride
+The log (base 2) of the stride between consecutive virtual PE numbers in
+the active set. logPE_stride must be of type integer. If you are using Fortran, it must be a
+default integer value.
+.TP
+PE_size
+The number of PEs in the active set. PE_size must be of type integer. If you
+are using Fortran, it must be a default integer value.
+.TP
+pWrk
+A symmetric work array. The pWrk argument must have the same data type as
+target. In C/C++, this contains max(nreduce/2 + 1,
+_SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements. In Fortran, this contains
+max(nreduce/2 + 1, SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements.
+.TP
+pSync
+A symmetric work array. In C/C++, pSync is of type long and size
+_SHMEM_REDUCE_SYNC_SIZE. In Fortran, pSync is of type integer and size
+SHMEM_REDUCE_SYNC_SIZE. If you are using Fortran, it must be a default integer value.
+Every element of this array must be initialized with the value _SHMEM_SYNC_VALUE (in
+C/C++) or SHMEM_SYNC_VALUE (in Fortran) before any of the PEs in the active set enter
+the reduction routine.
+.PP
+The values of arguments nreduce, PE_start, logPE_stride, and PE_size must be equal on all
+PEs in the active set. The same target and source arrays, and the same pWrk and pSync work
+arrays, must be passed to all PEs in the active set.
+.PP
+Before any PE calls a reduction routine, you must ensure that the following conditions exist
+(synchronization via a barrier or some other method is often needed to ensure this): The
+pWrk and pSync arrays on all PEs in the active set are not still in use from a prior call to a
+collective SHMEM routine. The target array on all PEs in the active set is ready to accept the
+results of the reduction.
+.PP
+Upon return from a reduction routine, the following are true for the local PE: The target array
+is updated. The values in the pSync array are restored to the original values.
+.PP
+.SH NOTES
+
+The terms collective, symmetric, and cache aligned are defined in \fIintro_shmem\fP(3)\&.
+All SHMEM reduction routines reset the values in pSync before they return, so a particular
+pSync buffer need only be initialized the first time it is used.
+.PP
+You must ensure that the pSync array is not being updated on any PE in the active set while
+any of the PEs participate in processing of a SHMEM reduction routine. Be careful of the
+following situations: If the pSync array is initialized at run time, some type of
+synchronization is needed to ensure that all PEs in the working set have initialized pSync
+before any of them enter a SHMEM routine called with the pSync synchronization array. A
+pSync or pWrk array can be reused in a subsequent reduction routine call only if none
+of the PEs in the active set are still processing a prior reduction routine call that used the
+same pSync or pWrk arrays. In general, this can be assured only by doing some type of
+synchronization. However, in the special case of reduction routines being called with the
+same active set, you can allocate two pSync and pWrk arrays and alternate between them on
+successive calls.
+.PP
+.SH EXAMPLES
+
+\fBExample 1:\fP
+This Fortran example statically initializes the pSync array and finds the
+minimum value of real variable FOO across all the even PEs.
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE)
+DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/
+PARAMETER (NR=1)
+REAL FOO, FOOMIN, PWRK(MAX(NR/2+1,SHMEM_REDUCE_MIN_WRKDATA_SIZE))
+COMMON /COM/ FOO, FOOMIN, PWRK
+INTRINSIC MY_PE
+
+IF ( MOD(MY_PE(),2) .EQ. 0) THEN
+  CALL SHMEM_REAL8_MIN_TO_ALL(FOOMIN, FOO, NR, 0, 1, N$PES/2,
+  & PWRK, PSYNC)
+  PRINT *, 'Result on PE ', MY_PE(), ' is ', FOOMIN
+ENDIF
+.Ve
+\fBExample 2:\fP
+Consider the following C/C++ call:
+.Vb
+shmem_int_min_to_all( target, source, 3, 0, 0, 8, pwrk, psync );
+.Ve
+The preceding call is more efficient, but semantically equivalent to, the combination of the
+following calls:
+.Vb
+shmem_int_min_to_all(&(target[0]), &(source[0]), 1, 0, 0, 8,
+  pwrk1, psync1);
+shmem_int_min_to_all(&(target[1]), &(source[1]), 1, 0, 0, 8,
+  pwrk2, psync2);
+shmem_int_min_to_all(&(target[2]), &(source[2]), 1, 0, 0, 8,
+  pwrk1, psync1);
+.Ve
+Note that two sets of pWrk and pSync arrays are used alternately because no synchronization
+is done between calls.
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_short_or_to_all.3in b/oshmem/shmem/man/man3/shmem_short_or_to_all.3in
new file mode 100644
index 0000000000..77ebef6889
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_short_or_to_all.3in
@@ -0,0 +1,202 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_OR" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_int_or_to_all\fP(3),
+\fIshmem_int4_or_to_all\fP(3),
+\fIshmem_int8_or_to_all\fP(3),
+\fIshmem_long_or_to_all\fP(3),
+\fIshmem_longlong_or_to_all\fP(3),
+\fIshmem_short_or_to_all\fP(3)
+\- Performs a bitwise OR function reduction across a set of processing elements (PEs)
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void shmem_int_or_to_all(int *target, int *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  int *pWrk, long *pSync);
+
+void shmem_long_or_to_all(long *target, long *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  long *pWrk, long *pSync);
+
+void shmem_longlong_or_to_all(long long *target,
+  long long *source, int nreduce, int PE_start, int logPE_stride,
+  int PE_size, long long *pWrk, long *pSync);
+
+void shmem_short_or_to_all(short *target, short *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  short *pWrk, long *pSync);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER pSync(SHMEM_REDUCE_SYNC_SIZE)
+INTEGER nreduce, PE_start, logPE_stride, PE_size
+
+CALL SHMEM_INT4_OR_TO_ALL(target, source, nreduce, PE_start,
+& logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_INT8_OR_TO_ALL(target, source, nreduce, PE_start,
+& logPE_stride, PE_size, pWrk, pSync)
+.Ve
+.SH DESCRIPTION
+
+The shared memory (SHMEM) reduction routines compute one or more reductions across
+symmetric arrays on multiple virtual PEs. A reduction performs an associative binary
+operation across a set of values. For a list of other SHMEM reduction routines, see
+intro_shmem(3).
+.PP
+As with all SHMEM collective routines, each of these routines assumes that only PEs in the
+active set call the routine. If a PE not in the active set calls a SHMEM collective routine,
+undefined behavior results.
+.PP
+The nreduce argument determines the number of separate reductions to perform. The source
+array on all PEs in the active set provides one element for each reduction. The results of the
+reductions are placed in the target array on all PEs in the active set. The active set is defined
+by the PE_start, logPE_stride, PE_size triplet.
+.PP
+The source and target arrays may be the same array, but they may not be overlapping arrays.
+.PP
+The arguments are as follows:
+.TP
+target
+A symmetric array of length nreduce elements to receive the results of the
+reduction operations. The data type of target varies with the version of the reduction routine
+being called. When calling from C/C++, refer to the SYNOPSIS section for data type
+information. When calling from Fortran, the target data types are as follows:
+.RS
+.TP
+\fBshmem_int8_or_to_all\fP Integer, with an element size of 8 bytes.
+.TP
+\fBshmem_int4_or_to_all\fP Integer, with an element size of 4 bytes.
+.RE
+.RS
+.PP
+.RE
+.TP
+source
+A symmetric array, of length nreduce elements, that contains one element for
+each separate reduction operation. The source argument must have the same data type as
+target.
+.TP
+nreduce
+The number of elements in the target and source arrays. nreduce must be of
+type integer. If you are using Fortran, it must be a default integer value.
+.TP
+PE_start
+The lowest virtual PE number of the active set of PEs. PE_start must be of
+type integer. If you are using Fortran, it must be a default integer value.
+.TP
+logPE_stride
+The log (base 2) of the stride between consecutive virtual PE numbers in
+the active set. logPE_stride must be of type integer. If you are using Fortran, it must be a
+default integer value.
+.TP
+PE_size
+The number of PEs in the active set. PE_size must be of type integer. If you
+are using Fortran, it must be a default integer value.
+.TP
+pWrk
+A symmetric work array. The pWrk argument must have the same data type as
+target. In C/C++, this contains max(nreduce/2 + 1,
+_SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements. In Fortran, this contains
+max(nreduce/2 + 1, SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements.
+.TP
+pSync
+A symmetric work array. In C/C++, pSync is of type long and size
+_SHMEM_REDUCE_SYNC_SIZE. In Fortran, pSync is of type integer and size
+SHMEM_REDUCE_SYNC_SIZE. If you are using Fortran, it must be a default integer value.
+Every element of this array must be initialized with the value _SHMEM_SYNC_VALUE (in
+C/C++) or SHMEM_SYNC_VALUE (in Fortran) before any of the PEs in the active set enter
+the reduction routine.
+.PP
+The values of arguments nreduce, PE_start, logPE_stride, and PE_size must be equal on
+all PEs in the active set. The same target and source arrays, and the same pWrk and pSync
+work arrays, must be passed to all PEs in the active set.
+.PP
+Before any PE calls a reduction routine, you must ensure that the following conditions exist
+(synchronization via a barrier or some other method is often needed to ensure this): The
+pWrk and pSync arrays on all PEs in the active set are not still in use from a prior call to a
+collective SHMEM routine. The target array on all PEs in the active set is ready to accept the
+results of the reduction.
+.PP
+Upon return from a reduction routine, the following are true: The target array is updated. The
+values in the pSync array are restored to the original values.
+.PP
+.SH NOTES
+
+The terms collective, symmetric, and cache aligned are defined in \fIintro_shmem\fP(3)\&.
+All SHMEM reduction routines reset the values in pSync before they return, so a particular
+pSync buffer need only be initialized the first time it is used.
+.PP
+You must ensure that the pSync array is not being updated on any PE in the active set while
+any of the PEs participate in processing of a SHMEM reduction routine. Be careful to avoid
+these situations: If the pSync array is initialized at run time, some type of synchronization is
+needed to ensure that all PEs in the working set have initialized pSync before any of them
+enter a SHMEM routine called with the pSync synchronization array. A pSync or pWrk array
+can be reused in a subsequent reduction routine call only if none of the PEs in the active set
+are still processing a prior reduction routine call that used the same pSync or pWrk arrays. In
+general, this can be assured only by doing some type of synchronization. However, in the
+special case of reduction routines being called with the same active set, you can allocate two
+pSync and pWrk arrays and alternate between them on successive calls.
+.PP
+.SH EXAMPLES
+
+\fBExample 1:\fP
+This Fortran example statically initializes the pSync array and finds the
+logical OR of the integer variable FOO across all even PEs.
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE)
+DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/
+PARAMETER (NR=1)
+REAL PWRK(MAX(NR/2+1,SHMEM_REDUCE_MIN_WRKDATA_SIZE))
+INTEGER FOO, FOOOR
+COMMON /COM/ FOO, FOOOR, PWRK
+INTRINSIC MY_PE
+
+IF ( MOD(MY_PE(),2) .EQ. 0) THEN
+  CALL SHMEM_INT8_OR_TO_ALL(FOOOR, FOO, NR, 0, 1, N$PES/2,
+  & PWRK, PSYNC)
+  PRINT *,'Result on PE ',MY_PE(),' is ',FOOOR
+ENDIF
+.Ve
+\fBExample 2:\fP
+Consider the following C/C++ call:
+.Vb
+shmem_int_or_to_all( target, source, 3, 0, 0, 8, pwrk, psync );
+.Ve
+The preceding call is more efficient, but semantically equivalent to, the combination of the
+following calls:
+.Vb
+shmem_int_or_to_all(&(target[0]), &(source[0]), 1, 0, 0, 8,
+  pwrk1, psync1);
+shmem_int_or_to_all(&(target[1]), &(source[1]), 1, 0, 0, 8,
+  pwrk2, psync2);
+shmem_int_or_to_all(&(target[2]), &(source[2]), 1, 0, 0, 8,
+  pwrk1, psync1);
+.Ve
+Note that two sets of pWrk and pSync arrays are used alternately because no synchronization
+is done between calls.
+.PP
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_short_p.3in b/oshmem/shmem/man/man3/shmem_short_p.3in
new file mode 100644
index 0000000000..c08d60a543
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_short_p.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_p.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_short_prod_to_all.3in b/oshmem/shmem/man/man3/shmem_short_prod_to_all.3in
new file mode 100644
index 0000000000..b7544a9af8
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_short_prod_to_all.3in
@@ -0,0 +1,259 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_PROD" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_comp4_prod_to_all\fP(3),
+\fIshmem_comp8_prod_to_all\fP(3),
+\fIshmem_complexd_prod_to_all\fP(3),
+\fIshmem_complexf_prod_to_all\fP(3),
+\fIshmem_double_prod_to_all\fP(3),
+\fIshmem_float_prod_to_all\fP(3),
+\fIshmem_int_prod_to_all\fP(3),
+\fIshmem_int4_prod_to_all\fP(3),
+\fIshmem_int8_prod_to_all\fP(3),
+\fIshmem_long_prod_to_all\fP(3),
+\fIshmem_longdouble_prod_to_all\fP(3),
+\fIshmem_longlong_prod_to_all\fP(3),
+\fIshmem_real8_prod_to_all\fP(3),
+\fIshmem_real16_prod_to_all\fP(3),
+\fIshmem_real4_prod_to_all\fP(3),
+\fIshmem_short_prod_to_all\fP(3)
+\- Performs
+a product reduction across a set of processing elements (PEs)
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void shmem_complexd_prod_to_all(double complex *target,
+  double complex *source, int nreduce, int PE_start,
+  int logPE_stride, int PE_size, double complex *pWrk,
+  long *pSync);
+
+void shmem_complexf_prod_to_all(float complex *target,
+  float complex *source, int nreduce, int PE_start,
+  int logPE_stride, int PE_size, float complex *pWrk,
+  long *pSync);
+
+void shmem_double_prod_to_all(double *target, double *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  double *pWrk, long *pSync);
+
+void shmem_float_prod_to_all(float *target, float *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  float *pWrk, long *pSync);
+
+void shmem_int_prod_to_all(int *target, int *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  int *pWrk, long *pSync);
+
+void shmem_long_prod_to_all(long *target, long *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  long *pWrk, long *pSync);
+
+void shmem_longdouble_prod_to_all(long double *target,
+  long double *source, int nreduce, int PE_start,
+  int logPE_stride, int PE_size, long double *pWrk,
+  long *pSync);
+
+void shmem_longlong_prod_to_all(long long *target,
+  long long *source, int nreduce, int PE_start,
+  int logPE_stride, int PE_size, long long *pWrk,
+  long *pSync);
+
+void shmem_short_prod_to_all(short *target, short *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  short *pWrk, long *pSync);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER pSync(SHMEM_REDUCE_SYNC_SIZE)
+INTEGER nreduce, PE_start, logPE_stride, PE_size
+
+CALL SHMEM_COMP4_PROD_TO_ALL(target, source, nreduce, PE_start,
+& logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_COMP8_PROD_TO_ALL(target, source, nreduce, PE_start,
+& logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_INT4_PROD_TO_ALL(target, source, nreduce, PE_start,
+& logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_INT8_PROD_TO_ALL(target, source, nreduce, PE_start,
+& logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_REAL4_PROD_TO_ALL(target, source, nreduce, PE_start,
+& logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_REAL8_PROD_TO_ALL(target, source, nreduce, PE_start,
+& logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_REAL16_PROD_TO_ALL(target, source, nreduce, PE_start,
+& logPE_stride, PE_size, pWrk, pSync)
+.Ve
+.SH DESCRIPTION
+
+The shared memory (SHMEM) reduction routines compute one or more reductions across
+symmetric arrays on multiple virtual PEs. A reduction performs an associative binary
+operation across a set of values. For a list of other SHMEM reduction routines, see
+\fIintro_shmem\fP(3)\&.
+.PP
+As with all SHMEM collective routines, each of these routines assumes that only PEs in the
+active set call the routine. If a PE not in the active set calls a SHMEM collective routine,
+undefined behavior results.
+.PP
+The nreduce argument determines the number of separate reductions to perform. The source
+array on all PEs in the active set provides one element for each reduction. The results of the
+reductions are placed in the target array on all PEs in the active set. The active set is defined
+by the PE_start, logPE_stride, PE_size triplet.
+.PP
+The source and target arrays may be the same array, but they may not be overlapping arrays.
+.PP
+The arguments are as follows:
+.TP
+target
+A symmetric array of length nreduce elements to receive the results of the
+reduction operations. The data type of target varies with the version of the reduction routine
+being called and the language used. When calling from C/C++, refer to the SYNOPSIS section
+for data type information. When calling from Fortran, the target data types are as follows:
+.RS
+.TP
+\fBshmem_comp4_prod_to_all\fP: Complex, with an element size equal to two
+4\-byte real values.
+.TP
+\fBshmem_comp8_prod_to_all\fP: Complex, with an element size equal to two
+8\-byte real values.
+.TP
+\fBshmem_int4_prod_to_all\fP: Integer, with an element size of 4 bytes
+.TP
+\fBshmem_int8_prod_to_all\fP: Integer, with an element size of 8 bytes
+.TP
+\fBshmem_real4_prod_to_all\fP: Real, with an element size of 4 bytes
+.TP
+\fBshmem_real8_prod_to_all\fP: Real, with an element size of 8 bytes
+.TP
+\fBshmem_real16_prod_to_all\fP: Real, with an element size of 16 bytes
+.RE
+.RS
+.PP
+.RE
+.TP
+source
+A symmetric array, of length nreduce elements, that contains one element for
+each separate reduction operation. The source argument must have the same data type as
+target.
+.TP
+nreduce
+The number of elements in the target and source arrays. nreduce must be of
+type integer. If you are using Fortran, it must be a default integer value.
+.TP
+PE_start
+The lowest virtual PE number of the active set of PEs. PE_start must be of
+type integer. If you are using Fortran, it must be a default integer value.
+.TP
+logPE_stride
+The log (base 2) of the stride between consecutive virtual PE numbers in
+the active set. logPE_stride must be of type integer. If you are using Fortran, it must be a
+default integer value.
+.TP
+PE_size
+The number of PEs in the active set. PE_size must be of type integer. If you
+are using Fortran, it must be a default integer value.
+.TP
+pWrk
+A symmetric work array. The pWrk argument must have the same data type as
+target. In C/C++, this contains max(nreduce/2 + 1,
+_SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements. In Fortran, this contains
+max(nreduce/2 + 1, SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements.
+.TP
+pSync
+A symmetric work array. In C/C++, pSync is of type long and size
+_SHMEM_REDUCE_SYNC_SIZE. In Fortran, pSync is of type integer and size
+SHMEM_REDUCE_SYNC_SIZE. If you are using Fortran, it must be a default integer value.
+Before any of the PEs in the active set enter the reduction routine, every element of this array
+must be initialized with the value _SHMEM_SYNC_VALUE (in C/C++) or
+SHMEM_SYNC_VALUE (in Fortran).
+.PP
+The values of arguments nreduce, PE_start, logPE_stride, and PE_size must be equal on all
+PEs in the active set. The same target and source arrays, and the same pWrk and pSync work
+arrays, must be passed to all PEs in the active set. Before any PE calls a reduction routine, you
+must ensure that the following conditions exist (synchronization via a barrier or some
+other method is often needed to ensure this): The pWrk and pSync arrays on all PEs in the
+active set are not still in use from a prior call to a collective SHMEM routine. The target array
+on all PEs in the active set is ready to accept the results of the reduction.
+.PP
+Upon return from a reduction routine, the following are true for the local PE: The target array
+is updated. The values in the pSync array are restored to the original values.
+.SH NOTES
+
+The terms collective, symmetric, and cache aligned are defined in \fIintro_shmem\fP(3)\&.
+All SHMEM reduction routines reset the values in pSync before they return, so a particular
+pSync buffer need only be initialized the first time it is used.
+.PP
+You must ensure that the pSync array is not being updated on any PE in the active set while
+any of the PEs participate in processing of a SHMEM reduction routine. Be careful of the
+following situations: If the pSync array is initialized at run time, some type of
+synchronization is needed to ensure that all PEs in the working set have initialized pSync
+before any of them enter a SHMEM routine called with the pSync synchronization array. A
+pSync or pWrk array can be reused in a subsequent reduction routine call only if none of the
+PEs in the active set are still processing a prior reduction routine call that used the same
+pSync or pWrk arrays. In general, this can be assured only by doing some type of
+synchronization. However, in the special case of reduction routines being called with the
+same active set, you can allocate two pSync and pWrk arrays and alternate between them on
+successive calls.
+.SH EXAMPLES
+
+\fBExample 1:\fP
+This Fortran example statically initializes the pSync array and finds the
+product of the real variable FOO across all the even PEs.
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE)
+DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/
+PARAMETER (NR=1)
+REAL FOO, FOOPROD, PWRK(MAX(NR/2+1,SHMEM_REDUCE_MIN_WRKDATA_SIZE))
+COMMON /COM/ FOO, FOOPROD, PWRK
+INTRINSIC MY_PE
+
+IF ( MOD(MY_PE(),2) .EQ. 0) THEN
+  CALL SHMEM_COMP8_PROD_TO_ALL(FOOPROD, FOO, NR, 0, 1, N$PES/2,
+  & PWRK, PSYNC)
+  PRINT *, 'Result on PE ', MY_PE(), ' is ', FOOPROD
+ENDIF
+.Ve
+\fBExample 2:\fP
+Consider the following C/C++ call:
+.Vb
+shmem_short_prod_to_all(target, source, 3, 0, 0, 8, pwrk, psync);
+.Ve
+The preceding call is more efficient, but semantically equivalent to, the combination of the
+following calls:
+.Vb
+shmem_short_prod_to_all(&(target[0]), &(source[0]), 1, 0, 0, 8,
+  pwrk1, psync1);
+shmem_short_prod_to_all(&(target[1]), &(source[1]), 1, 0, 0, 8,
+  pwrk2, psync2);
+shmem_short_prod_to_all(&(target[2]), &(source[2]), 1, 0, 0, 8,
+  pwrk1, psync1);
+.Ve
+Note that two sets of pWrk and pSync arrays are used alternately because no synchronization
+is done between calls.
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_short_put.3in b/oshmem/shmem/man/man3/shmem_short_put.3in
new file mode 100644
index 0000000000..e3ca73d483
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_short_put.3in
@@ -0,0 +1 @@
+.so man3/shmem_char_put.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_short_sum_to_all.3in b/oshmem/shmem/man/man3/shmem_short_sum_to_all.3in
new file mode 100644
index 0000000000..3467a882fe
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_short_sum_to_all.3in
@@ -0,0 +1,281 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_SUM" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_comp4_sum_to_all\fP(3),
+\fIshmem_comp8_sum_to_all\fP(3),
+\fIshmem_complexd_sum_to_all\fP(3),
+\fIshmem_complexf_sum_to_all\fP(3),
+\fIshmem_double_sum_to_all\fP(3),
+\fIshmem_float_sum_to_all\fP(3),
+\fIshmem_int_sum_to_all\fP(3),
+\fIshmem_int4_sum_to_all\fP(3),
+\fIshmem_int8_sum_to_all\fP(3),
+\fIshmem_long_sum_to_all\fP(3),
+\fIshmem_longdouble_sum_to_all\fP(3),
+\fIshmem_longlong_sum_to_all\fP(3),
+\fIshmem_real4_sum_to_all\fP(3),
+\fIshmem_real8_sum_to_all\fP(3),
+\fIshmem_real16_sum_to_all\fP(3),
+\fIshmem_short_sum_to_all\fP(3)
+\- Performs
+a sum reduction across a set of processing elements (PEs)
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void shmem_complexd_sum_to_all(double complex *target,
+  double complex *source, int nreduce, int PE_start,
+  int logPE_stride, int PE_size, double complex *pWrk,
+  long *pSync);
+
+void shmem_complexf_sum_to_all(float complex *target,
+  float complex *source, int nreduce, int PE_start,
+  int logPE_stride, int PE_size, float complex *pWrk,
+  long *pSync);
+
+void shmem_double_sum_to_all(double *target,
+  double *source, int nreduce, int PE_start, int logPE_stride,
+  int PE_size, double *pWrk, long *pSync);
+
+void shmem_float_sum_to_all(float *target, float *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  float *pWrk, long *pSync);
+
+void shmem_int_sum_to_all(int *target, int *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  int *pWrk, long *pSync);
+
+void shmem_long_sum_to_all(long *target, long *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  long *pWrk, long *pSync);
+
+void shmem_longdouble_sum_to_all(long double *target,
+  long double *source, int nreduce, int PE_start, int
+  logPE_stride, int PE_size, long double *pWrk, long *pSync);
+
+void shmem_longlong_sum_to_all(long long *target,
+  long long *source, int nreduce, int PE_start,
+  int logPE_stride, int PE_size, long long *pWrk,
+  long *pSync);
+
+void shmem_short_sum_to_all(short *target, short *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  short *pWrk, long *pSync);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER pSync(SHMEM_REDUCE_SYNC_SIZE)
+INTEGER nreduce, PE_start, logPE_stride, PE_size
+
+CALL SHMEM_COMP4_SUM_TO_ALL(target, source, nreduce,
+& PE_start, logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_COMP8_SUM_TO_ALL(target, source, nreduce,
+& PE_start, logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_INT4_SUM_TO_ALL(target, source, nreduce,
+& PE_start, logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_INT8_SUM_TO_ALL(target, source, nreduce,
+& PE_start, logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_REAL4_SUM_TO_ALL(target, source, nreduce,
+& PE_start, logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_REAL8_SUM_TO_ALL(target, source, nreduce,
+& PE_start, logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_REAL16_SUM_TO_ALL(target, source, nreduce,
+& PE_start, logPE_stride, PE_size, pWrk, pSync)
+.Ve
+.SH DESCRIPTION
+
+The shared memory (SHMEM) reduction routines compute one or more reductions across
+symmetric arrays on multiple virtual PEs. A reduction performs an associative binary
+operation across a set of values. For a list of other SHMEM reduction routines, see
+\fIintro_shmem\fP(3)\&.
+.PP
+As with all SHMEM collective routines, each of these routines assumes that only PEs in the
+active set call the routine. If a PE not in the active set calls a SHMEM collective routine,
+undefined behavior results.
+.PP
+The nreduce argument determines the number of separate reductions to perform. The source
+array on all PEs in the active set provides one element for each reduction. The results of the
+reductions are placed in the target array on all PEs in the active set. The active set is defined
+by the PE_start, logPE_stride, PE_size triplet.
+.PP
+The source and target arrays may be the same array, but they may not be overlapping arrays.
+.PP
+The arguments are as follows:
+.TP
+target
+The remotely accessible integer data object to be updated on the remote PE. If
+you are using C/C++, the type of target should match that implied in the SYNOPSIS section.
+If you are using the Fortran compiler, it must be of type integer with an element size of 4
+bytes for SHMEM_INT4_ADD and 8 bytes for SHMEM_INT8_ADD.
+.TP
+value
+The value to be atomically added to target. If you are using C/C++, the type of
+value should match that implied in the SYNOPSIS section. If you are using Fortran, it must be
+of type integer with an element size of target.
+.TP
+pe
+An integer that indicates the PE number upon which target is to be updated. If you
+are using Fortran, it must be a default integer value.
+.TP
+target
+A symmetric array of length nreduce elements to receive the results of the
+reduction operations.
+.br
+The data type of target varies with the version of the reduction routine being called and the
+language used. When calling from C/C++, refer to the SYNOPSIS section for data type
+information. When calling from Fortran, the target data types are as follows:
+.RS
+.TP
+\fBshmem_comp4_sum_to_all:\fP COMPLEX(KIND=4).
+.TP
+\fBshmem_comp8_sum_to_all:\fP Complex. If you are using Fortran, it must be
+a default complex value.
+.TP
+\fBshmem_int4_sum_to_all:\fP INTEGER(KIND=4).
+.TP
+\fBshmem_int8_sum_to_all:\fP Integer. If you are using Fortran, it must be a
+default integer value.
+.TP
+\fBshmem_real4_sum_to_all:\fP REAL(KIND=4).
+.TP
+\fBshmem_real8_sum_to_all:\fP Real. If you are using Fortran, it must be a
+default real value.
+.TP
+\fBshmem_real16_sum_to_all:\fP Real. If you are using Fortran, it must be a
+default real value.
+.RE
+.RS
+.PP
+.RE
+.TP
+source
+A symmetric array, of length nreduce elements, that contains one element for
+each separate reduction operation. The source argument must have the same data type as
+target.
+.TP
+nreduce
+The number of elements in the target and source arrays. nreduce must be of
+type integer. If you are using Fortran, it must be a default integer value.
+.TP
+PE_start
+The lowest virtual PE number of the active set of PEs. PE_start must be of
+type integer. If you are using Fortran, it must be a default integer value.
+.TP
+logPE_stride
+The log (base 2) of the stride between consecutive virtual PE numbers in
+the active set. logPE_stride must be of type integer. If you are using Fortran, it must be a
+default integer value.
+.TP
+PE_size
+The number of PEs in the active set. PE_size must be of type integer. If you
+are using Fortran, it must be a default integer value.
+.TP
+pWrk
+A symmetric work array. The pWrk argument must have the same data type as
+target. In C/C++, this contains max(nreduce/2 + 1,
+_SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements. In Fortran, this contains
+max(nreduce/2 + 1, SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements.
+.TP
+pSync
+A symmetric work array. In C/C++, pSync is of type long and size
+_SHMEM_REDUCE_SYNC_SIZE. In Fortran, pSync is of type integer and size
+SHMEM_REDUCE_SYNC_SIZE. It must be a default integer value. Every element of this array
+must be initialized with the value _SHMEM_SYNC_VALUE (in C/C++) or
+SHMEM_SYNC_VALUE (in Fortran) before any of the PEs in the active set enter the reduction
+routine.
+.PP
+The values of arguments nreduce, PE_start, logPE_stride, and PE_size must be equal on all
+PEs in the active set. The same target and source arrays, and the same pWrk and pSync work
+arrays, must be passed to all PEs in the active set.
+.PP
+Before any PE calls a reduction routine, you must ensure that the following conditions exist
+(synchronization via a barrier or some other method is often needed to ensure this): The
+pWrk and pSync arrays on all PEs in the active set are not still in use from a prior call to a
+collective SHMEM routine. The target array on all PEs in the active set is ready to accept the
+results of the reduction.
+.PP
+Upon return from a reduction routine, the following are true for the local PE: The target array
+is updated. The values in the pSync array are restored to the original values.
+.SH NOTES
+
+The terms collective, symmetric, and cache aligned are defined in \fIintro_shmem\fP(3)\&.
+.PP
+All SHMEM reduction routines reset the values in pSync before they return, so a particular
+pSync buffer need only be initialized the first time it is used.
+.PP
+You must ensure that the pSync array is not being updated on any PE in the active set while
+any of the PEs participate in processing of a SHMEM reduction routine. Be careful of the
+following situations: If the pSync array is initialized at run time, some type of
+synchronization is needed to ensure that all PEs in the working set have initialized pSync
+before any of them enter a SHMEM routine called with the pSync synchronization array. A
+pSync or pWrk array can be reused in a subsequent reduction routine call only if none
+of the PEs in the active set are still processing a prior reduction routine call that used the
+same pSync or pWrk arrays. In general, this can be assured only by doing some
+type of synchronization. However, in the special case of reduction routines being called with
+the same active set, you can allocate two pSync and pWrk arrays and alternate between them
+on successive calls.
+.SH EXAMPLES
+
+\fBExample 1:\fP
+This Fortran example statically initializes the pSync array and finds the
+sum of the real variable FOO across all even PEs.
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE)
+DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/
+PARAMETER (NR=1)
+REAL FOO, FOOSUM, PWRK(MAX(NR/2+1,SHMEM_REDUCE_MIN_WRKDATA_SIZE))
+COMMON /COM/ FOO, FOOSUM, PWRK
+INTRINSIC MY_PE
+
+IF ( MOD(MY_PE(),2) .EQ. 0) THEN
+  CALL SHMEM_INT4_SUM_TO_ALL(FOOSUM, FOO, NR, 0, 1, N$PES/2,
+  & PWRK, PSYNC)
+  PRINT *, 'Result on PE ', MY_PE(), ' is ', FOOSUM
+ENDIF
+.Ve
+\fBExample 2:\fP
+Consider the following C/C++ call:
+.Vb
+shmem_int_sum_to_all( target, source, 3, 0, 0, 8, pwrk, psync );
+.Ve
+The preceding call is more efficient, but semantically equivalent to, the combination of the
+following calls:
+.Vb
+shmem_int_sum_to_all(&(target[0]), &(source[0]), 1, 0, 0, 8,
+  pwrk1, psync1);
+shmem_int_sum_to_all(&(target[1]), &(source[1]), 1, 0, 0, 8,
+  pwrk2, psync2);
+shmem_int_sum_to_all(&(target[2]), &(source[2]), 1, 0, 0, 8,
+  pwrk1, psync1);
+
+Note that two sets of pWrk and pSync arrays are used alternately because no
+synchronization is done between calls.
+.Ve
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_short_wait.3in b/oshmem/shmem/man/man3/shmem_short_wait.3in
new file mode 100644
index 0000000000..03267ffbc5
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_short_wait.3in
@@ -0,0 +1 @@
+.so man3/shmem_wait.3
diff --git a/oshmem/shmem/man/man3/shmem_short_wait_until.3in b/oshmem/shmem/man/man3/shmem_short_wait_until.3in
new file mode 100644
index 0000000000..03267ffbc5
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_short_wait_until.3in
@@ -0,0 +1 @@
+.so man3/shmem_wait.3
diff --git a/oshmem/shmem/man/man3/shmem_short_xor_to_all.3in b/oshmem/shmem/man/man3/shmem_short_xor_to_all.3in
new file mode 100644
index 0000000000..7d02702a87
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_short_xor_to_all.3in
@@ -0,0 +1,215 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_XOR" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_comp4_xor_to_all\fP(3),
+\fIshmem_int_xor_to_all\fP(3),
+\fIshmem_int4_xor_to_all\fP(3),
+\fIshmem_int8_xor_to_all\fP(3),
+\fIshmem_long_xor_to_all\fP(3),
+\fIshmem_longlong_xor_to_all\fP(3),
+\fIshmem_short_xor_to_all\fP(3)
+\- Performs a bitwise XOR operation on symmetric
+arrays over the active set of PEs.
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void shmem_int_xor_to_all(int *target, int *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  int *pWrk, long *pSync);
+
+void shmem_long_xor_to_all(long *target, long *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  long *pWrk, long *pSync);
+
+void shmem_longlong_xor_to_all(long long *target,
+  long long *source, int nreduce, int PE_start, int logPE_stride,
+  int PE_size, long long *pWrk, long *pSync);
+
+void shmem_short_xor_to_all(short *target, short *source,
+  int nreduce, int PE_start, int logPE_stride, int PE_size,
+  short *pWrk, long *pSync);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER pSync(SHMEM_REDUCE_SYNC_SIZE)
+INTEGER nreduce, PE_start, logPE_stride, PE_size
+
+CALL SHMEM_COMP4_XOR_TO_ALL(target, source, nreduce,
+& PE_start, logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_INT4_XOR_TO_ALL(target, source, nreduce,
+& PE_start, logPE_stride, PE_size, pWrk, pSync)
+
+CALL SHMEM_INT8_XOR_TO_ALL(target, source, nreduce,
+& PE_start, logPE_stride, PE_size, pWrk, pSync)
+.Ve
+.SH DESCRIPTION
+
+The shared memory (SHMEM) reduction routines compute one or more reductions across
+symmetric arrays on multiple virtual PEs. A reduction performs an associative binary
+operation across a set of values. For a list of other SHMEM reduction routines, see
+\fIintro_shmem\fP(3)\&.
+.PP
+As with all SHMEM collective routines, each of these routines assumes that only PEs in the
+active set call the routine. If a PE not in the active set calls a SHMEM collective routine,
+undefined behavior results.
+.PP
+The nreduce argument determines the number of separate reductions to perform. The source
+array on all PEs in the active set provides one element for each reduction. The results of the
+reductions are placed in the target array on all PEs in the active set. The active set is defined
+by the PE_start, logPE_stride, PE_size triplet.
+.PP
+The source and target arrays may be the same array, but they may not be overlapping arrays.
+.PP
+The arguments are as follows:
+.TP
+target
+A symmetric array of length nreduce elements to receive the results of the
+reduction operations.
+The data type of target varies with the version of the reduction routine being called and the
+language used. When calling from C/C++, refer to the SYNOPSIS section for data type
+information. When calling from Fortran, the target data types are as follows:
+.RS
+.TP
+\fBshmem_comp8_xor_to_all:\fP Complex, with an element size equal to two 8\-
+byte real values
+.TP
+\fBshmem_comp4_xor_to_all:\fP Complex, with an element size equal to two 4\-
+byte real values
+.TP
+\fBshmem_int8_xor_to_all:\fP Integer, with an element size of 8 bytes
+.TP
+\fBshmem_int4_xor_to_all:\fP Integer, with an element size of 4 bytes
+.TP
+\fBshmem_real8_xor_to_all:\fP Real, with an element size of 8 bytes
+.TP
+\fBshmem_real4_xor_to_all:\fP Real, with an element size of 4 bytes
+.RE
+.RS
+.PP
+.RE
+.TP
+source
+A symmetric array, of length nreduce elements, that contains one element for
+each separate reduction operation. The source argument must have the same data type as
+target.
+.TP
+nreduce
+The number of elements in the target and source arrays. nreduce must be of
+type integer. If you are using Fortran, it must be a default integer value.
+.TP
+PE_start
+The lowest virtual PE number of the active set of PEs. PE_start must be of
+type integer. If you are using Fortran, it must be a default integer value.
+.TP
+logPE_stride
+The log (base 2) of the stride between consecutive virtual PE numbers in
+the active set. logPE_stride must be of type integer. If you are using Fortran, it must be a
+default integer value.
+.TP
+PE_size
+The number of PEs in the active set. PE_size must be of type integer. If you
+are using Fortran, it must be a default integer value.
+.TP
+pWrk
+A symmetric work array. The pWrk argument must have the same data type as
+target. In C/C++, this contains max(nreduce/2 + 1,
+_SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements. In Fortran, this contains
+max(nreduce/2 + 1, SHMEM_REDUCE_MIN_WRKDATA_SIZE) elements.
+.TP
+pSync
+A symmetric work array. In C/C++, pSync is of type long and size
+_SHMEM_REDUCE_SYNC_SIZE. In Fortran, pSync is of type integer and size
+SHMEM_REDUCE_SYNC_SIZE. If you are using Fortran, it must be a default integer value.
+Every element of this array must be initialized with the value _SHMEM_SYNC_VALUE (in
+C/C++) or SHMEM_SYNC_VALUE (in Fortran) before any of the PEs in the active set enter
+the reduction routine.
+.PP
+The values of arguments nreduce, PE_start, logPE_stride, and PE_size must be equal on all
+PEs in the active set. The same target and source arrays, and the same pWrk and pSync
+work arrays, must be passed to all PEs in the active set.
+.PP
+Before any PE calls a reduction routine, you must ensure that the following conditions exist
+(synchronization via a barrier or some other method is often needed to ensure this): The
+pWrk and pSync arrays on all PEs in the active set are not still in use from a prior call to a
+collective SHMEM routine. The target array on all PEs in the active set is ready to accept the
+results of the reduction.
+.PP
+Upon return from a reduction routine, the following are true for the local PE: The target array
+is updated. The values in the pSync array are restored to the original values.
+.SH NOTES
+
+The terms collective, symmetric, and cache aligned are defined in \fIintro_shmem\fP(3)\&.
+All SHMEM reduction routines reset the values in pSync before they return, so a particular
+pSync buffer need only be initialized the first time it is used.
+.PP
+You must ensure that the pSync array is not being updated on any PE in the active set while
+any of the PEs participate in processing of a SHMEM reduction routine. Be careful of the
+following situations: If the pSync array is initialized at run time, some type of
+synchronization is needed to ensure that all PEs in the working set have initialized pSync
+before any of them enter a SHMEM routine called with the pSync synchronization array. A
+pSync or pWrk array can be reused in a subsequent reduction routine call only if none of the
+PEs in the active set are still processing a prior reduction routine call that used the same
+pSync or pWrk arrays. In general, this can be assured only by doing some type of
+synchronization. However, in the special case of reduction routines being called with the
+same active set, you can allocate two pSync and pWrk arrays and alternate between them on
+successive calls.
+.SH EXAMPLES
+
+\fBExample 1:\fP
+This Fortran example statically initializes the pSync array and computes
+the exclusive OR of variable FOO across all even PEs.
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE)
+DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/
+PARAMETER (NR=1)
+REAL FOO, FOOXOR, PWRK(MAX(NR/2+1,SHMEM_REDUCE_MIN_WRKDATA_SIZE))
+COMMON /COM/ FOO, FOOXOR, PWRK
+INTRINSIC MY_PE
+
+IF ( MOD(MY_PE(),2) .EQ. 0) THEN
+  CALL SHMEM_REAL8_XOR_TO_ALL(FOOXOR, FOO, NR, 0, 1, N$PES/2,
+  & PWRK, PSYNC)
+  PRINT *, 'Result on PE ', MY_PE(), ' is ', FOOXOR
+ENDIF
+.Ve
+\fBExample 2:\fP
+Consider the following C/C++ call:
+.Vb
+shmem_short_xor_to_all( target, source, 3, 0, 0, 8, pwrk, psync );
+.Ve
+The preceding call is more efficient, but semantically equivalent to, the combination of the
+following calls:
+.Vb
+shmem_short_xor_to_all(&(target[0]), &(source[0]), 1, 0, 0, 8,
+  pwrk1, psync1);
+shmem_short_xor_to_all(&(target[1]), &(source[1]), 1, 0, 0, 8,
+  pwrk2, psync2);
+shmem_short_xor_to_all(&(target[2]), &(source[2]), 1, 0, 0, 8,
+  pwrk1, psync1);
+.Ve
+Note that two sets of pWrk and pSync arrays are used alternately because no synchronization
+is done between calls.
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_swap.3in b/oshmem/shmem/man/man3/shmem_swap.3in
new file mode 100644
index 0000000000..bc8daafe0e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_swap.3in
@@ -0,0 +1,115 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_SWAP" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_double_swap\fP(3),
+\fIshmem_float_swap\fP(3),
+\fIshmem_int_swap\fP(3),
+\fIshmem_long_swap\fP(3),
+\fIshmem_swap\fP(3),
+\fIshmem_int4_swap\fP(3),
+\fIshmem_int8_swap\fP(3),
+\fIshmem_real4_swap\fP(3),
+\fIshmem_real8_swap\fP(3),
+\fIshmem_longlong_swap\fP(3)
+\- Performs an atomic swap to a remote data object
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+double shmem_double_swap(double *target, double value,
+  int pe);
+
+float shmem_float_swap(float *target, float value, int pe);
+
+int shmem_int_swap(int *target, int value, int pe);
+
+long shmem_long_swap(long *target, long value, int pe);
+
+long long shmem_longlong_swap(long long *target,
+  long long value, int pe);
+
+long shmem_swap(long *target, long value, int pe);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+INTEGER pe
+
+INTEGER SHMEM_SWAP
+ires = SHMEM_SWAP(target, value, pe)
+
+INTEGER(KIND=4) SHMEM_INT4_SWAP
+ires = SHMEM_INT4_SWAP(target, value, pe)
+
+INTEGER(KIND=8) SHMEM_INT8_SWAP
+ires = SHMEM_INT8_SWAP(target, value, pe)
+
+REAL(KIND=4) SHMEM_REAL4_SWAP
+res = SHMEM_REAL4_SWAP(target, value, pe)
+
+REAL(KIND=8) SHMEM_REAL8_SWAP
+res = SHMEM_REAL8_SWAP(target, value, pe)
+.Ve
+.SH DESCRIPTION
+
+The atomic swap routines write \fBvalue\fP
+to address target on PE \fBpe\fP,
+and return
+the previous contents of \fBtarget\fP
+in one atomic operation.
+.PP
+The arguments are as follows:
+.TP
+target
+The remotely accessible integer data object to be updated on the remote PE. If
+you are using C/C++, the type of target should match that implied in the SYNOPSIS section. If
+you are using Fortran, it must be of the following type:
+.RS
+.TP
+\fBSHMEM_SWAP:\fP Integer of default kind
+.TP
+\fBSHMEM_INT4_SWAP:\fP 4\-byte integer
+.TP
+\fBSHMEM_INT8_SWAP:\fP 8\-byte integer
+.TP
+\fBSHMEM_REAL4_SWAP:\fP 4\-byte real
+.TP
+\fBSHMEM_REAL8_SWAP:\fP 8\-byte real
+.RE
+.RS
+.PP
+.RE
+.TP
+value
+Value to be atomically written to the remote PE. value is the same type as target.
+.TP
+pe
+An integer that indicates the PE number on which target is to be updated. If you are
+using Fortran, it must be a default integer value.
+.PP
+.SH NOTES
+
+The term remotely accessible is defined in \fIintro_shmem\fP(3)\&.
+.SH RETURN VALUES
+
+The contents that had been at the target address on the remote PE prior to the swap is
+returned.
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_test_lock.3in b/oshmem/shmem/man/man3/shmem_test_lock.3in
new file mode 100644
index 0000000000..49974c4f17
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_test_lock.3in
@@ -0,0 +1 @@
+.so man3/shmem_set_lock.3
\ No newline at end of file
diff --git a/oshmem/shmem/man/man3/shmem_udcflush.3in b/oshmem/shmem/man/man3/shmem_udcflush.3in
new file mode 100644
index 0000000000..3edeb74023
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_udcflush.3in
@@ -0,0 +1,94 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_CACHE" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_clear_cache_inv\fP(3),
+\fIshmem_set_cache_inv\fP(3),
+\fIshmem_set_cache_line_inv\fP(3),
+\fIshmem_udcflush\fP(3),
+\fIshmem_udcflush_line\fP(3)
+\- Controls data cache utilities
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void shmem_clear_cache_inv(void);
+void shmem_clear_cache_line_inv(void *target);
+void shmem_set_cache_inv(void);
+void shmem_set_cache_line_inv(void *target);
+void shmem_udcflush(void);
+void shmem_udcflush_line(void *target);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+CALL SHMEM_CLEAR_CACHE_INV
+CALL SHMEM_CLEAR_CACHE_LINE_INV(target)
+CALL SHMEM_SET_CACHE_INV
+CALL SHMEM_SET_CACHE_LINE_INV(target)
+
+CALL SHMEM_UDCFLUSH
+CALL SHMEM_UDCFLUSH_LINE(target)
+.Ve
+.SH DESCRIPTION
+
+The following argument is passed to the cache line control routines:
+.TP
+target
+A data object that is local to the processing element (PE). target can be of
+any noncharacter type. If you are using Fortran, it can be of any kind.
+.PP
+\fBshmem_clear_cache_inv\fP
+disables automatic cache coherency mode previously
+enabled by shmem_set_cache_inv or shmem_set_cache_line_inv.
+.PP
+\fBshmem_clear_cache_line_inv\fP
+disables automatic cache coherency mode for the
+cache line associated with the address of \fBtarget\fP
+only.
+.PP
+\fBshmem_set_cache_inv\fP
+enables the OpenSHMEM API to automatically decide the
+best strategy for cache coherency.
+.PP
+\fBshmem_set_cache_line_inv\fP
+enables automatic cache coherency mode for the
+cache line associated with the address of \fBtarget\fP
+only.
+.PP
+\fBshmem_clear_cache_inv\fP
+disables automatic cache coherency mode previously
+enabled by shmem_set_cache_inv or shmem_set_cache_line_inv.
+.PP
+\fBshmem_udcflush\fP
+makes the entire user data cache coherent.
+.PP
+\fBshmem_udcflush_line\fP
+makes coherent the cache line that corresponds with
+the address specified by target.
+.PP
+.SH NOTES
+
+These routines have been retained for improved backward compatability with legacy
+architectures.
+.PP
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3),
+\fIshmem_put\fP(3),
+\fIshmem_swap\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_udcflush_line.3in b/oshmem/shmem/man/man3/shmem_udcflush_line.3in
new file mode 100644
index 0000000000..4a6a361ef9
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_udcflush_line.3in
@@ -0,0 +1 @@
+.so man3/shmem_udcflush.3
diff --git a/oshmem/shmem/man/man3/shmem_wait.3in b/oshmem/shmem/man/man3/shmem_wait.3in
new file mode 100644
index 0000000000..f1c6aa5769
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_wait.3in
@@ -0,0 +1,205 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "SHMEM\\_WAIT" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIshmem_int_wait\fP(3),
+\fIshmem_int_wait\fP(3)_until,
+\fIshmem_int4_wait\fP(3),
+\fIshmem_int4_wait\fP(3)_until,
+\fIshmem_int8_wait\fP(3),
+\fIshmem_int8_wait\fP(3)_until,
+\fIshmem_long_wait\fP(3),
+\fIshmem_long_wait\fP(3)_until,
+\fIshmem_longlong_wait\fP(3),
+\fIshmem_longlong_wait\fP(3)_until,
+\fIshmem_short_wait\fP(3),
+\fIshmem_short_wait\fP(3)_until,
+\fIshmem_wait\fP(3),
+\fIshmem_wait\fP(3)_until
+\- Waits for a variable on the local processing element (PE) to change
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+#include <mpp/shmem.h>
+
+void shmem_int_wait(int *var, int value);
+
+void shmem_int_wait_until(int *var, int cond, int value);
+
+void shmem_long_wait(long *var, long value);
+
+void shmem_long_wait_until(long *var, int cond, long value);
+
+void shmem_longlong_wait(long long *var, long long value);
+
+void shmem_longlong_wait_until(long long *var, int cond,
+  long long value);
+
+void shmem_short_wait(short *var, short value);
+
+void shmem_short_wait_until(short *var, int cond,
+  short value);
+
+void shmem_wait(long *ivar, long cmp_value);
+
+void shmem_wait_until(long *ivar, int cmp, long value);
+.Ve
+Fortran:
+.Vb
+INCLUDE "mpp/shmem.fh"
+
+CALL SHMEM_INT4_WAIT(ivar, cmp_value)
+
+CALL SHMEM_INT4_WAIT_UNTIL(ivar, cmp, cmp_value)
+
+CALL SHMEM_INT8_WAIT(ivar, cmp_value)
+
+CALL SHMEM_INT8_WAIT_UNTIL(ivar, cmp, cmp_value)
+
+CALL SHMEM_WAIT(ivar, cmp_value)
+
+CALL SHMEM_WAIT_UNTIL(ivar, cmp, cmp_value)
+.Ve
+.SH DESCRIPTION
+
+shmem_wait and shmem_wait_until wait for \fBivar\fP
+to be changed by a remote write
+or atomic swap issued by a different processor. These routines can be used for point\-to\-
+point directed synchronization. A call to shmem_wait does not return until some other
+processor writes a value, not equal to cmp_value, into \fBivar\fP
+on the waiting
+processor. A call to shmem_wait_until does not return until some other processor changes
+\fBivar\fP
+to satisfy the condition implied by cmp and cmp_value. This mechanism is
+useful when a processor needs to tell another processor that it has completed some action.
+.PP
+The arguments are as follows:
+.TP
+target
+The remotely accessible integer data object to be updated on the remote PE. If
+you are using C/C++, the type of target should match that implied in the SYNOPSIS section.
+If you are using the Fortran compiler, it must be of type integer with an element size of 4
+bytes for SHMEM_INT4_ADD and 8 bytes for SHMEM_INT8_ADD.
+.TP
+value
+The value to be atomically added to target. If you are using C/C++, the type of
+value should match that implied in the SYNOPSIS section. If you are using Fortran, it must be
+of type integer with an element size of target.
+.TP
+pe
+An integer that indicates the PE number upon which target is to be updated. If you
+are using Fortran, it must be a default integer value.
+.TP
+ivar
+A remotely accessible integer variable that is being updated by another PE. If you
+are using C/C++, the type of ivar should match that implied in the SYNOPSIS section. If you
+are using Fortran, ivar must be a specific sized integer type according to
+the function being called, as follows:
+.RS
+.TP
+\fBshmem_wait, shmem_wait_until:\fP default INTEGER
+.TP
+\fBshmem_int4_wait, shmem_int4_wait_until:\fP INTEGER*4
+.TP
+\fBshmem_int8_wait, shmem_int8_wait_until:\fP INTEGER*8
+.RE
+.RS
+.PP
+.RE
+.TP
+cmp
+The compare operator that compares ivar with cmp_value. cmp must be of type
+integer. If you are using Fortran, it must be of default kind. If you are using C/C++, the type
+of cmp should match that implied in the SYNOPSIS section. The following cmp values are
+supported:
+.RS
+.TP
+SHMEM_CMP_EQ
+Equal
+.TP
+SHMEM_CMP_NE
+Not equal
+.TP
+SHMEM_CMP_GT
+Greater than
+.TP
+SHMEM_CMP_LE
+Less than or equal to
+.TP
+SHMEM_CMP_LT
+Less than
+.TP
+SHMEM_CMP_GE
+Greater than or equal to
+.RE
+.RS
+.PP
+.RE
+.TP
+cmp_value
+cmp_value must be of type integer. If you are using C/C++, the type of
+cmp_value should match thatimplied in the SYNOPSIS section. If you are using Fortran,
+cmp_value must be an integer of the same size and kind as ivar.
+The shmem_wait routines return when ivar is no longer equal to cmp_value.
+The shmem_wait_until routines return when the compare condition is true. The compare
+condition is defined by the ivar argument compared with the cmp_value using the
+comparison operator, cmp.
+.PP
+.SH EXAMPLES
+
+\fBExample 1:\fP
+The following call returns when variable ivar is not equal to 100:
+.Vb
+INTEGER*8 IVAR
+
+CALL SHMEM_INT8_WAIT(IVAR, INT8(100))
+.Ve
+\fBExample 2:\fP
+The following call to SHMEM_INT8_WAIT_UNTIL is equivalent to the
+call to SHMEM_INT8_WAIT in example 1:
+.Vb
+INTEGER*8 IVAR
+
+CALL SHMEM_INT8_WAIT_UNTIL(IVAR, SHMEM_CMP_NE, INT8(100))
+.Ve
+\fBExample 3:\fP
+The following C/C++ call waits until the sign bit in ivar is set by a
+transfer from a remote PE:
+.Vb
+int ivar;
+
+shmem_int_wait_until(&ivar, SHMEM_CMP_LT, 0);
+.Ve
+\fBExample 4:\fP
+The following Fortran example is in the context of a subroutine:
+.Vb
+SUBROUTINE EXAMPLE()
+  INTEGER FLAG_VAR
+  COMMON/FLAG/FLAG_VAR
+  . . .
+  FLAG_VAR = FLAG_VALUE ! initialize the event variable
+  . . .
+  IF (FLAG_VAR .EQ. FLAG_VALUE) THEN
+    CALL SHMEM_WAIT(FLAG_VAR, FLAG_VALUE)
+  ENDIF
+  FLAG_VAR = FLAG_VALUE ! reset the event variable for next time
+  . . .
+END
+.Ve
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3),
+\fIshmem_put\fP(3)
diff --git a/oshmem/shmem/man/man3/shmem_wait_until.3in b/oshmem/shmem/man/man3/shmem_wait_until.3in
new file mode 100644
index 0000000000..03267ffbc5
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmem_wait_until.3in
@@ -0,0 +1 @@
+.so man3/shmem_wait.3
diff --git a/oshmem/shmem/man/man3/shmemalign.3in b/oshmem/shmem/man/man3/shmemalign.3in
new file mode 100644
index 0000000000..63a8ff4e8e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shmemalign.3in
@@ -0,0 +1 @@
+.so man3/shmalloc.3
diff --git a/oshmem/shmem/man/man3/shrealloc.3in b/oshmem/shmem/man/man3/shrealloc.3in
new file mode 100644
index 0000000000..63a8ff4e8e
--- /dev/null
+++ b/oshmem/shmem/man/man3/shrealloc.3in
@@ -0,0 +1 @@
+.so man3/shmalloc.3
diff --git a/oshmem/shmem/man/man3/start_pes.3in b/oshmem/shmem/man/man3/start_pes.3in
new file mode 100644
index 0000000000..0901e38302
--- /dev/null
+++ b/oshmem/shmem/man/man3/start_pes.3in
@@ -0,0 +1,82 @@
+.\" -*- nroff -*-
+.\" Copyright (c) 2015      University of Houston.  All rights reserved.
+.\" Copyright (c) 2015      Mellanox Technologies, Inc.
+.\" $COPYRIGHT$
+.de Vb
+.ft CW
+.nf
+..
+.de Ve
+.ft R
+
+.fi
+..
+.TH "START\\_PES" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+
+\fIstart_pes\fP(3)
+\- Allocates a block of memory from the symmetric heap.
+.SH SYNOPSIS
+
+C or C++:
+.Vb
+void start_pes(int npes);
+.Ve
+Fortran:
+.Vb
+CALL START_PES(npes)
+.Ve
+.SH DESCRIPTION
+
+The start_pes routine should be the first statement in a SHMEM parallel program.
+.PP
+The start_pes routine accepts the following argument:
+.TP
+npes
+Unused. Should be set to 0.
+.PP
+This routine initializes the SHMEM API, therefore it must be called before calling any
+other SHMEM routine.
+This routine is responsible inter alia for setting up the symmetric heap on the calling PE, and
+the creation of the virtual PE numbers. Upon successful return from this routine, the calling PE
+will be able to communicate with and transfer data to other PEs.
+.PP
+Multiple calls to this function are not allowed.
+.PP
+For an overview of programming with SHMEM communication routines, example SHMEM
+programs, and instructions for compiling SHMEM programs, see the \fIintro_shmem\fP(3)
+man page.
+.SH EXAMPLES
+
+This is a simple program that calls \fIshmem_integer_put\fP(3):
+.Vb
+PROGRAM PUT
+  INCLUDE "mpp/shmem.fh"
+
+  INTEGER TARG, SRC, RECEIVER, BAR
+  COMMON /T/ TARG
+  PARAMETER (RECEIVER=1)
+
+  CALL START_PES(0)
+  IF (MY_PE() .EQ. 0) THEN
+    SRC = 33
+    CALL SHMEM_INTEGER_PUT(TARG, SRC, 1, RECEIVER)
+  ENDIF
+  CALL SHMEM_BARRIER_ALL ! SYNCHRONIZES SENDER AND RECEIVER
+  IF (MY_PE() .EQ. RECEIVER) THEN
+    PRINT *,'PE ', MY_PE(),' TARG=',TARG,' (expect 33)'
+  ENDIF
+END
+.Ve
+.SH NOTES
+
+If the start_pes call is not the first statement in a program, unexpected results may occur on
+some architectures.
+.SH SEE ALSO
+
+\fIintro_shmem\fP(3),
+\fIshmem_barrier\fP(3),
+\fIshmem_barrier_all\fP(3),
+\fIshmem_put\fP(3),
+\fImy_pe\fP(3I),
+\fInum_pes\fP(3I)