diff --git a/oshmem/shmem/c/Makefile.am b/oshmem/shmem/c/Makefile.am
index e8abf46e38..d68901a432 100644
--- a/oshmem/shmem/c/Makefile.am
+++ b/oshmem/shmem/c/Makefile.am
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2013-2015 Mellanox Technologies, Inc.
+# Copyright (c) 2013-2016 Mellanox Technologies, Inc.
 #                         All rights reserved
 # Copyright (c) 2014 Cisco Systems, Inc.  All rights reserved.
 # $COPYRIGHT$
@@ -40,6 +40,8 @@ OSHMEM_API_SOURCES = \
 	shmem_wait.c \
 	shmem_iget.c \
 	shmem_iput.c \
+	shmem_get_nb.c \
+	shmem_put_nb.c \
 	shmem_udcflush.c \
 	shmem_udcflush_line.c \
 	shmem_set_cache_inv.c \
diff --git a/oshmem/shmem/c/profile/Makefile.am b/oshmem/shmem/c/profile/Makefile.am
index d4574b6845..68e2005f9c 100644
--- a/oshmem/shmem/c/profile/Makefile.am
+++ b/oshmem/shmem/c/profile/Makefile.am
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2013      Mellanox Technologies, Inc.
+# Copyright (c) 2013-2016 Mellanox Technologies, Inc.
 #                         All rights reserved
 # Copyright (c) 2014 Cisco Systems, Inc.  All rights reserved.
 # $COPYRIGHT$
@@ -52,6 +52,8 @@ OSHMEM_API_SOURCES = \
 	pshmem_wait.c \
 	pshmem_iget.c \
 	pshmem_iput.c \
+	pshmem_get_nb.c \
+	pshmem_put_nb.c \
 	pshmem_udcflush.c \
 	pshmem_udcflush_line.c \
 	pshmem_set_cache_inv.c \
diff --git a/oshmem/shmem/c/profile/defines.h b/oshmem/shmem/c/profile/defines.h
index a1522b1912..76a8465c48 100644
--- a/oshmem/shmem/c/profile/defines.h
+++ b/oshmem/shmem/c/profile/defines.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2015 Mellanox Technologies, Inc.
+ * Copyright (c) 2013-2016 Mellanox Technologies, Inc.
  *                         All rights reserved.
  * $COPYRIGHT$
  *
@@ -107,6 +107,24 @@
 #define shmem_iput64                 pshmem_iput64
 #define shmem_iput128                pshmem_iput128
 
+/*
+ * Non-block data put routines
+ */
+#define shmem_char_put_nbi           pshmem_char_put_nbi
+#define shmem_short_put_nbi          pshmem_short_put_nbi
+#define shmem_int_put_nbi            pshmem_int_put_nbi
+#define shmem_long_put_nbi           pshmem_long_put_nbi
+#define shmem_float_put_nbi          pshmem_float_put_nbi
+#define shmem_double_put_nbi         pshmem_double_put_nbi
+#define shmem_longlong_put_nbi       pshmem_longlong_put_nbi
+#define shmem_longdouble_put_nbi     pshmem_longdouble_put_nbi
+#define shmem_put8_nbi               pshmem_put8_nbi
+#define shmem_put16_nbi              pshmem_put16_nbi
+#define shmem_put32_nbi              pshmem_put32_nbi
+#define shmem_put64_nbi              pshmem_put64_nbi
+#define shmem_put128_nbi             pshmem_put128_nbi
+#define shmem_putmem_nbi             pshmem_putmem_nbi
+
 /*
  * Elemental get routines
  */
@@ -154,6 +172,24 @@
 #define shmem_iget64                 pshmem_iget64
 #define shmem_iget128                pshmem_iget128
 
+/*
+ * Non-block data get routines
+ */
+#define shmem_char_get_nbi           pshmem_char_get_nbi
+#define shmem_short_get_nbi          pshmem_short_get_nbi
+#define shmem_int_get_nbi            pshmem_int_get_nbi
+#define shmem_long_get_nbi           pshmem_long_get_nbi
+#define shmem_float_get_nbi          pshmem_float_get_nbi
+#define shmem_double_get_nbi         pshmem_double_get_nbi
+#define shmem_longlong_get_nbi       pshmem_longlong_get_nbi
+#define shmem_longdouble_get_nbi     pshmem_longdouble_get_nbi
+#define shmem_get8_nbi               pshmem_get8_nbi
+#define shmem_get16_nbi              pshmem_get16_nbi
+#define shmem_get32_nbi              pshmem_get32_nbi
+#define shmem_get64_nbi              pshmem_get64_nbi
+#define shmem_get128_nbi             pshmem_get128_nbi
+#define shmem_getmem_nbi             pshmem_getmem_nbi
+
 /*
  * Atomic operations
  */
diff --git a/oshmem/shmem/c/shmem_get_nb.c b/oshmem/shmem/c/shmem_get_nb.c
new file mode 100644
index 0000000000..ba6e401b19
--- /dev/null
+++ b/oshmem/shmem/c/shmem_get_nb.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2016      Mellanox Technologies, Inc.
+ *                         All rights reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+#include "oshmem_config.h"
+
+#include "oshmem/constants.h"
+#include "oshmem/include/shmem.h"
+
+#include "oshmem/runtime/runtime.h"
+
+#include "oshmem/mca/spml/spml.h"
+
+/*
+ * These routines retrieve data from a contiguous data object on a remote PE.
+ * The shmem_get() routines transfer nelems elements of the data object at address source
+ * on the remote PE (pe), to the data object at address target on the local PE. These routines
+ * return after the data has been copied to address target on the local pe.
+ */
+#define SHMEM_TYPE_GET_NB(type_name, type)    \
+    void shmem##type_name##_get_nbi(type *target, const type *source, size_t nelems, int pe) \
+    {                                                               \
+        int rc = OSHMEM_SUCCESS;                                    \
+        size_t size = 0;                                            \
+                                                                    \
+        RUNTIME_CHECK_INIT();                                       \
+        RUNTIME_CHECK_PE(pe);                                       \
+        RUNTIME_CHECK_ADDR(source);                                 \
+                                                                    \
+        size = nelems * sizeof(type);                               \
+        rc = MCA_SPML_CALL(get_nb(                                  \
+            (void *)source,                                         \
+            size,                                                   \
+            (void *)target,                                         \
+            pe, NULL));                                             \
+        RUNTIME_CHECK_RC(rc);                                       \
+                                                                    \
+        return ;                                                    \
+    }
+
+#if OSHMEM_PROFILING
+#include "oshmem/include/pshmem.h"
+#pragma weak shmem_char_get_nbi = pshmem_char_get_nbi
+#pragma weak shmem_short_get_nbi = pshmem_short_get_nbi
+#pragma weak shmem_int_get_nbi = pshmem_int_get_nbi
+#pragma weak shmem_long_get_nbi = pshmem_long_get_nbi
+#pragma weak shmem_longlong_get_nbi = pshmem_longlong_get_nbi
+#pragma weak shmem_float_get_nbi = pshmem_float_get_nbi
+#pragma weak shmem_double_get_nbi = pshmem_double_get_nbi
+#pragma weak shmem_longdouble_get_nbi = pshmem_longdouble_get_nbi
+#pragma weak shmem_get8_nbi = pshmem_get8_nbi
+#pragma weak shmem_get16_nbi = pshmem_get16_nbi
+#pragma weak shmem_get32_nbi = pshmem_get32_nbi
+#pragma weak shmem_get64_nbi = pshmem_get64_nbi
+#pragma weak shmem_get128_nbi = pshmem_get128_nbi
+#pragma weak shmem_getmem_nbi = pshmem_getmem_nbi
+#include "oshmem/shmem/c/profile/defines.h"
+#endif
+
+SHMEM_TYPE_GET_NB(_char, char)
+SHMEM_TYPE_GET_NB(_short, short)
+SHMEM_TYPE_GET_NB(_int, int)
+SHMEM_TYPE_GET_NB(_long, long)
+SHMEM_TYPE_GET_NB(_longlong, long long)
+SHMEM_TYPE_GET_NB(_float, float)
+SHMEM_TYPE_GET_NB(_double, double)
+SHMEM_TYPE_GET_NB(_longdouble, long double)
+
+#define SHMEM_TYPE_GETMEM_NB(name, element_size, prefix)    \
+    void prefix##name##_nbi(void *target, const void *source, size_t nelems, int pe) \
+    {                                                               \
+        int rc = OSHMEM_SUCCESS;                                    \
+        size_t size = 0;                                            \
+                                                                    \
+        RUNTIME_CHECK_INIT();                                       \
+        RUNTIME_CHECK_PE(pe);                                       \
+        RUNTIME_CHECK_ADDR(source);                                 \
+                                                                    \
+        size = nelems * element_size;                               \
+        rc = MCA_SPML_CALL(get_nb(                                  \
+            (void *)source,                                         \
+            size,                                                   \
+            (void *)target,                                         \
+            pe, NULL));                                             \
+       RUNTIME_CHECK_RC(rc);                                        \
+                                                                    \
+        return ;                                                    \
+    }
+
+SHMEM_TYPE_GETMEM_NB(_get8, 1, shmem)
+SHMEM_TYPE_GETMEM_NB(_get16, 2, shmem)
+SHMEM_TYPE_GETMEM_NB(_get32, 4, shmem)
+SHMEM_TYPE_GETMEM_NB(_get64, 8, shmem)
+SHMEM_TYPE_GETMEM_NB(_get128, 16, shmem)
+SHMEM_TYPE_GETMEM_NB(_getmem, 1, shmem)
diff --git a/oshmem/shmem/c/shmem_put_nb.c b/oshmem/shmem/c/shmem_put_nb.c
new file mode 100644
index 0000000000..925e26e33f
--- /dev/null
+++ b/oshmem/shmem/c/shmem_put_nb.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2016      Mellanox Technologies, Inc.
+ *                         All rights reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+#include "oshmem_config.h"
+
+#include "oshmem/constants.h"
+#include "oshmem/include/shmem.h"
+
+#include "oshmem/runtime/runtime.h"
+
+#include "oshmem/mca/spml/spml.h"
+
+/*
+ * The nonblocking put routines provide a method for copying data from a contiguous local data
+ * object to a data object on a specified PE.
+ * These routines transfer nelems elements of the data object at address source on the calling
+ * PE, to the data object at address target on the remote PE pe. These routines start the
+ * remote transfer and may return before the data is delivered to the remote PE. The delivery
+ * of data into the data object on the destination PE from different put calls may occur in any
+ * order. Because of this, two successive put operations may deliver data out of order unless a
+ * call to shmem_fence() is introduced between the two calls.
+ * The routines return after posting the operation. The operation is considered complete after a
+ * subsequent call to shmem_quiet. At the completion of shmem_quiet, the data has been copied
+ * into the dest array on the destination PE.
+ */
+#define SHMEM_TYPE_PUT_NB(type_name, type)    \
+    void shmem##type_name##_put_nbi(type *target, const type *source, size_t len, int pe) \
+    {                                                               \
+        int rc = OSHMEM_SUCCESS;                                    \
+        size_t size = 0;                                            \
+                                                                    \
+        RUNTIME_CHECK_INIT();                                       \
+        RUNTIME_CHECK_PE(pe);                                       \
+        RUNTIME_CHECK_ADDR(target);                                 \
+                                                                    \
+        size = len * sizeof(type);                                  \
+        rc = MCA_SPML_CALL(put_nb(                                  \
+            (void *)target,                                         \
+            size,                                                   \
+            (void *)source,                                         \
+            pe, NULL));                                             \
+        RUNTIME_CHECK_RC(rc);                                       \
+                                                                    \
+        return ;                                                    \
+    }
+
+#if OSHMEM_PROFILING
+#include "oshmem/include/pshmem.h"
+#pragma weak shmem_char_put_nbi = pshmem_char_put_nbi
+#pragma weak shmem_short_put_nbi = pshmem_short_put_nbi
+#pragma weak shmem_int_put_nbi = pshmem_int_put_nbi
+#pragma weak shmem_long_put_nbi = pshmem_long_put_nbi
+#pragma weak shmem_longlong_put_nbi = pshmem_longlong_put_nbi
+#pragma weak shmem_float_put_nbi = pshmem_float_put_nbi
+#pragma weak shmem_double_put_nbi = pshmem_double_put_nbi
+#pragma weak shmem_longdouble_put_nbi = pshmem_longdouble_put_nbi
+#pragma weak shmem_put8_nbi = pshmem_put8_nbi
+#pragma weak shmem_put16_nbi = pshmem_put16_nbi
+#pragma weak shmem_put32_nbi = pshmem_put32_nbi
+#pragma weak shmem_put64_nbi = pshmem_put64_nbi
+#pragma weak shmem_put128_nbi = pshmem_put128_nbi
+#pragma weak shmem_putmem_nbi = pshmem_putmem_nbi
+#include "oshmem/shmem/c/profile/defines.h"
+#endif
+
+SHMEM_TYPE_PUT_NB(_char, char)
+SHMEM_TYPE_PUT_NB(_short, short)
+SHMEM_TYPE_PUT_NB(_int, int)
+SHMEM_TYPE_PUT_NB(_long, long)
+SHMEM_TYPE_PUT_NB(_longlong, long long)
+SHMEM_TYPE_PUT_NB(_float, float)
+SHMEM_TYPE_PUT_NB(_double, double)
+SHMEM_TYPE_PUT_NB(_longdouble, long double)
+
+#define SHMEM_TYPE_PUTMEM_NB(name, element_size, prefix)    \
+    void prefix##name##_nbi(void *target, const void *source, size_t nelems, int pe) \
+    {                                                               \
+        int rc = OSHMEM_SUCCESS;                                    \
+        size_t size = 0;                                            \
+                                                                    \
+        RUNTIME_CHECK_INIT();                                       \
+        RUNTIME_CHECK_PE(pe);                                       \
+        RUNTIME_CHECK_ADDR(target);                                 \
+                                                                    \
+        size = nelems * element_size;                               \
+        rc = MCA_SPML_CALL(put_nb(                                  \
+            (void *)target,                                         \
+            size,                                                   \
+            (void *)source,                                         \
+            pe, NULL));                                             \
+        RUNTIME_CHECK_RC(rc);                                       \
+                                                                    \
+        return ;                                                    \
+    }
+
+SHMEM_TYPE_PUTMEM_NB(_put8, 1, shmem)
+SHMEM_TYPE_PUTMEM_NB(_put16, 2, shmem)
+SHMEM_TYPE_PUTMEM_NB(_put32, 4, shmem)
+SHMEM_TYPE_PUTMEM_NB(_put64, 8, shmem)
+SHMEM_TYPE_PUTMEM_NB(_put128, 16, shmem)
+SHMEM_TYPE_PUTMEM_NB(_putmem, 1, shmem)