From c1ee4fec23571256b666801af9082f428e93bbf9 Mon Sep 17 00:00:00 2001
From: Tim Woodall <twoodall@lanl.gov>
Date: Fri, 26 Mar 2004 14:15:20 +0000
Subject: [PATCH] - initial integration with datatypes - p2p mpi i/f functions
 - adding doxygen comments

This commit was SVN r976.
---
 include/mpi.h                          |   7 +-
 src/errhandler/errhandler.h            |   6 +-
 src/mca/pml/base/Makefile.am           |   4 +-
 src/mca/pml/base/pml_base_request.c    |  15 +-
 src/mca/pml/base/pml_base_request.h    |  56 ++---
 src/mca/pml/pml.h                      | 309 +++++++++++++++++++++++--
 src/mca/pml/teg/src/pml_ptl_array.h    |  57 ++++-
 src/mca/pml/teg/src/pml_teg.c          |   8 +-
 src/mca/pml/teg/src/pml_teg.h          |  36 +--
 src/mca/pml/teg/src/pml_teg_irecv.c    |  20 +-
 src/mca/pml/teg/src/pml_teg_isend.c    |  16 +-
 src/mca/pml/teg/src/pml_teg_module.c   |   9 +-
 src/mca/pml/teg/src/pml_teg_proc.h     |  30 ++-
 src/mca/pml/teg/src/pml_teg_progress.c |   2 +
 src/mca/pml/teg/src/pml_teg_recvreq.h  |  44 +++-
 src/mca/pml/teg/src/pml_teg_sendreq.c  |  10 +-
 src/mca/pml/teg/src/pml_teg_sendreq.h  |   8 +-
 src/mca/pml/teg/src/pml_teg_start.c    |  32 ++-
 src/mca/pml/teg/src/pml_teg_test.c     |  72 +++++-
 src/mca/pml/teg/src/pml_teg_wait.c     |  92 ++++----
 src/mca/ptl/base/base.h                |   4 +-
 src/mca/ptl/base/ptl_base_comm.c       |   1 +
 src/mca/ptl/base/ptl_base_comm.h       |  71 +++---
 src/mca/ptl/base/ptl_base_fragment.h   |  19 +-
 src/mca/ptl/base/ptl_base_header.h     |  80 +++----
 src/mca/ptl/base/ptl_base_match.c      |   8 +-
 src/mca/ptl/base/ptl_base_match.h      |  14 +-
 src/mca/ptl/base/ptl_base_recvfrag.h   |  91 ++++++--
 src/mca/ptl/base/ptl_base_recvreq.c    |   6 +-
 src/mca/ptl/base/ptl_base_recvreq.h    |  53 +++--
 src/mca/ptl/base/ptl_base_sendfrag.h   |  11 +-
 src/mca/ptl/base/ptl_base_sendreq.c    |   3 +-
 src/mca/ptl/base/ptl_base_sendreq.h    | 100 +++++---
 src/mca/ptl/ptl.h                      | 309 +++++++++++++++++++++----
 src/mca/ptl/tcp/src/ptl_tcp.c          |  51 ++--
 src/mca/ptl/tcp/src/ptl_tcp.h          | 191 ++++++++++-----
 src/mca/ptl/tcp/src/ptl_tcp_addr.h     |  16 +-
 src/mca/ptl/tcp/src/ptl_tcp_module.c   |  56 +++--
 src/mca/ptl/tcp/src/ptl_tcp_peer.c     |   5 +-
 src/mca/ptl/tcp/src/ptl_tcp_peer.h     |  32 +--
 src/mca/ptl/tcp/src/ptl_tcp_proc.c     |   3 +-
 src/mca/ptl/tcp/src/ptl_tcp_proc.h     |  27 +--
 src/mca/ptl/tcp/src/ptl_tcp_recvfrag.c |  15 +-
 src/mca/ptl/tcp/src/ptl_tcp_recvfrag.h |  26 ++-
 src/mca/ptl/tcp/src/ptl_tcp_sendfrag.c |  67 ++++--
 src/mca/ptl/tcp/src/ptl_tcp_sendfrag.h |  21 +-
 src/mca/ptl/tcp/src/ptl_tcp_sendreq.h  |  13 +-
 src/mpi/c/bsend.c                      |  18 +-
 src/mpi/c/bsend_init.c                 |  18 +-
 src/mpi/c/ibsend.c                     |  14 +-
 src/mpi/c/irecv.c                      |  18 +-
 src/mpi/c/irsend.c                     |  14 +-
 src/mpi/c/isend.c                      |  14 +-
 src/mpi/c/issend.c                     |  15 +-
 src/mpi/c/recv.c                       |  15 +-
 src/mpi/c/recv_init.c                  |  21 +-
 src/mpi/c/rsend.c                      |  15 +-
 src/mpi/c/rsend_init.c                 |  14 +-
 src/mpi/c/send.c                       |  12 +-
 src/mpi/c/send_init.c                  |  14 +-
 src/mpi/c/sendrecv.c                   |  61 ++++-
 src/mpi/c/sendrecv_replace.c           |   3 +-
 src/mpi/c/ssend.c                      |  14 +-
 src/mpi/c/ssend_init.c                 |  15 +-
 src/mpi/c/start.c                      |   2 +-
 src/mpi/c/startall.c                   |  21 +-
 src/mpi/c/test.c                       |  14 +-
 src/mpi/c/testall.c                    |   4 +-
 src/mpi/c/testany.c                    |  23 +-
 src/mpi/c/testsome.c                   |  32 ++-
 src/mpi/c/topo_test.c                  |   3 +-
 src/mpi/c/wait.c                       |  25 +-
 src/mpi/c/waitall.c                    |  12 +-
 src/mpi/c/waitany.c                    |  12 +-
 src/mpi/c/waitsome.c                   |   8 +-
 src/proc/proc.c                        |   4 +
 src/proc/proc.h                        |   3 +
 src/runtime/lam_mpi_init.c             |   1 +
 src/runtime/runtime.h                  |   3 +
 79 files changed, 1779 insertions(+), 809 deletions(-)

diff --git a/include/mpi.h b/include/mpi.h
index 9913ef74cc..2068a994cb 100644
--- a/include/mpi.h
+++ b/include/mpi.h
@@ -1271,13 +1271,12 @@ extern "C" {
                               int count);
   int PMPI_Testall(int count, MPI_Request array_of_requests[], int *flag, 
                   MPI_Status array_of_statuses[]);
-  int PMPI_Testany(int count, MPI_Request array_of_requests[], int *index, 
-                  MPI_Status *status);
+  int PMPI_Testany(int count, MPI_Request array_of_requests[], int *index, int *flag, MPI_Status *status);
   int PMPI_Test(MPI_Request *request, int *flag, MPI_Status *status);
   int PMPI_Test_cancelled(MPI_Status *status, int *flag);
   int PMPI_Testsome(int incount, MPI_Request array_of_requests[], 
-                   int *outcount, int array_of_indices, 
-                   MPI_Status array_of_statuses);
+                   int *outcount, int array_of_indices[], 
+                   MPI_Status array_of_statuses[]);
   int PMPI_Topo_test(MPI_Comm comm, int *status);
   MPI_Fint PMPI_Type_c2f(MPI_Datatype datatype);
   int PMPI_Type_commit(MPI_Datatype *type);
diff --git a/src/errhandler/errhandler.h b/src/errhandler/errhandler.h
index f92486abed..4709f482cd 100644
--- a/src/errhandler/errhandler.h
+++ b/src/errhandler/errhandler.h
@@ -110,7 +110,7 @@ extern lam_pointer_array_t *lam_errhandler_f_to_c_table;
  * parallel invocation to LAM_ERRHANDLER_CHECK() and LAM_ERRHANDLER_RETURN().
  */
 #define LAM_ERRHANDLER_INVOKE(mpi_object, err_code, message) \
-  lam_errhandler_invoke((mpi_object)->error_handler, (mpi_object), \
+  lam_errhandler_invoke((mpi_object) != NULL ? (mpi_object)->error_handler : NULL, (mpi_object), \
                         (err_code), (message));
 
 /**
@@ -129,7 +129,7 @@ extern lam_pointer_array_t *lam_errhandler_f_to_c_table;
  */
 #define LAM_ERRHANDLER_CHECK(rc, mpi_object, err_code, message) \
   if (rc != LAM_SUCCESS) { \
-    lam_errhandler_invoke((mpi_object)->error_handler, (mpi_object), \
+    lam_errhandler_invoke((mpi_object) != NULL ? (mpi_object)->error_handler : NULL, (mpi_object), \
                           (err_code), (message)); \
     return (err_code); \
   }
@@ -152,7 +152,7 @@ extern lam_pointer_array_t *lam_errhandler_f_to_c_table;
  */
 #define LAM_ERRHANDLER_RETURN(rc, mpi_object, err_code, message) \
   if (rc != LAM_SUCCESS) { \
-    lam_errhandler_invoke((mpi_object)->error_handler, (mpi_object), \
+    lam_errhandler_invoke((mpi_object != NULL) ? (mpi_object)->error_handler : NULL, (mpi_object), \
                           (err_code), (message)); \
     return (err_code); \
   } else { \
diff --git a/src/mca/pml/base/Makefile.am b/src/mca/pml/base/Makefile.am
index 9a89525138..aff4132f59 100644
--- a/src/mca/pml/base/Makefile.am
+++ b/src/mca/pml/base/Makefile.am
@@ -8,7 +8,9 @@ noinst_LTLIBRARIES = libmca_pml_base.la
 
 # For VPATH builds, have to specify where static-modules.h will be found
 
-AM_CPPFLAGS = -I$(top_builddir)/src
+AM_CPPFLAGS = \
+	-pedantic \
+	-I$(top_builddir)/src
 
 # Source code files
 
diff --git a/src/mca/pml/base/pml_base_request.c b/src/mca/pml/base/pml_base_request.c
index 1207c9e3f0..d5c4bd4c67 100644
--- a/src/mca/pml/base/pml_base_request.c
+++ b/src/mca/pml/base/pml_base_request.c
@@ -5,6 +5,14 @@
 
 #include "mca/pml/base/pml_base_request.h"
 
+static void mca_pml_base_request_construct(mca_pml_base_request_t* req)
+{
+}
+
+static void mca_pml_base_request_destruct(mca_pml_base_request_t* req)
+{
+}
+
 lam_class_t mca_pml_base_request_t_class = { 
     "mca_pml_base_request_t", 
     OBJ_CLASS(lam_request_t),
@@ -12,11 +20,4 @@ lam_class_t mca_pml_base_request_t_class = {
     (lam_destruct_t) mca_pml_base_request_destruct 
 };
                                                                                                  
-void mca_pml_base_request_construct(mca_pml_base_request_t* req)
-{
-}
-
-void mca_pml_base_request_destruct(mca_pml_base_request_t* req)
-{
-}
 
diff --git a/src/mca/pml/base/pml_base_request.h b/src/mca/pml/base/pml_base_request.h
index 467fe9cd66..31dabed524 100644
--- a/src/mca/pml/base/pml_base_request.h
+++ b/src/mca/pml/base/pml_base_request.h
@@ -1,8 +1,9 @@
 /*
  * $HEADER$
  */
-/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-
+/**
+ * @file
+ */
 #ifndef MCA_PML_BASE_REQUEST_H
 #define MCA_PML_BASE_REQUEST_H
 
@@ -13,7 +14,9 @@
 
 extern lam_class_t mca_pml_base_request_t_class;
 
-/* request type */
+/**
+ * Type of request.
+ */
 typedef enum {
     MCA_PML_REQUEST_NULL,
     MCA_PML_REQUEST_SEND,
@@ -21,40 +24,27 @@ typedef enum {
 } mca_pml_base_request_type_t;
 
 
-/* MPI pml (point-to-point) request */
+/**
+ *  Base type for PML P2P requests 
+ */
 struct mca_pml_base_request_t {
-    /* base request */
-    lam_request_t super;
-    /* pointer to application buffer */
-    void *req_addr;
-    /* length of application buffer */
-    size_t req_length;
-    /* peer process - rank w/in this communicator */
-    int32_t req_peer;
-    /* user defined tag */
-    int32_t req_tag;
-    /* communicator pointer */
-    lam_communicator_t *req_comm;
-    /* pointer to data type */
-    lam_datatype_t *req_datatype;
-    /* MPI request type - used for test */
-    mca_pml_base_request_type_t req_type;
-    /* completion status */
-    lam_status_public_t req_status;
-    /* flag indicating if the this is a persistent request */
-    bool req_persistent;
-    /* flag indicating if MPI is done with this request */
-    volatile bool req_mpi_done;
-    /* flag indicating if the pt-2-pt layer is done with this request */
-    volatile bool req_pml_done;
-    /* flag indicating if the user has freed this request */
-    volatile bool req_free_called;
+    lam_request_t super;                  /**< base request */
+    void *req_addr;                       /**< pointer to application buffer */
+    size_t req_count;                     /**< count of user datatype elements */
+    int32_t req_peer;                     /**< peer process - rank w/in this communicator */
+    int32_t req_tag;                      /**< user defined tag */
+    lam_communicator_t *req_comm;         /**< communicator pointer */
+    lam_proc_t* req_proc;                 /**< peer process */
+    lam_datatype_t *req_datatype;         /**< pointer to data type */
+    mca_pml_base_request_type_t req_type; /**< MPI request type - used for test */
+    lam_status_public_t req_status;       /**< completion status */
+    bool req_persistent;                  /**< flag indicating if the this is a persistent request */
+    volatile bool req_mpi_done;           /**< flag indicating if MPI is done with this request */
+    volatile bool req_pml_done;           /**< flag indicating if the pt-2-pt layer is done with this request */
+    volatile bool req_free_called;        /**< flag indicating if the user has freed this request */
 };
 typedef struct mca_pml_base_request_t mca_pml_base_request_t;
 
 
-void mca_pml_base_request_construct(mca_pml_base_request_t*);
-void mca_pml_base_request_destruct(mca_pml_base_request_t*);
-
 #endif
 
diff --git a/src/mca/pml/pml.h b/src/mca/pml/pml.h
index f12805dbc4..ac7fa2f39b 100644
--- a/src/mca/pml/pml.h
+++ b/src/mca/pml/pml.h
@@ -1,7 +1,48 @@
 /*
  * $HEADER$
  */
-
+/**
+ * @file
+ * 
+ * P2P Management Layer (PML)
+ *
+ * An MCA component type that provides the P2P interface functionality required 
+ * by the MPI layer. The PML is a relatively thin layer that primarily provides 
+ * for the fragmentation and scheduling of messages over multiple transports 
+ * (instances of the P2P Transport Layer (PTL) MCA component type) as depicted below:
+ *
+ *   ------------------------------------
+ *   |                MPI               |
+ *   ------------------------------------
+ *   |                PML               |
+ *   ------------------------------------
+ *   | PTL (TCP) | PTL (SM) | PTL (...) |
+ *   ------------------------------------
+ *
+ * A single PML module is selected by the MCA framework during library
+ * initialization. Initially, all available PMLs are loaded (potentially 
+ * as shared libraries) and their module open and init functions called.
+ * The MCA framework selects the module returning the highest priority and
+ * closes/unloads any other PML modules that may have been opened.
+ *
+ * After the PML is selected, the MCA framework loads and initalize
+ * all available PTLs. The PML is notified of the selected PTLs via the
+ * the mca_pml_base_add_ptls_fn_t downcall from the MCA.
+ * 
+ * After all of the MCA components are initialized, the MPI/RTE will make 
+ * downcalls into the PML to provide the initial list of processes 
+ * (lam_proc_t instances), and notification of changes (add/delete).
+ * 
+ * The PML module must select the set of PTL modules that are to be used
+ * to reach a given destination. These should be cached on a PML specific
+ * data structure that is hung off the lam_proc_t.
+ *
+ * The PML should then apply a scheduling algorithm (round-robin,
+ * weighted distribution, etc), to schedule the delivery of messages
+ * over the available PTLs.
+ *
+ */
+                                                                                         
 #ifndef MCA_PML_H
 #define MCA_PML_H
 
@@ -68,27 +109,114 @@ typedef struct mca_pml_base_module_1_0_0_t mca_pml_base_module_1_0_0_t;
 typedef mca_pml_base_module_1_0_0_t mca_pml_base_module_t;
 
 
-/*
- * PML instance interface functions and datatype
+/**
+ * MCA management functions.
  */
 
-typedef int (*mca_pml_base_add_comm_fn_t)(struct lam_communicator_t*);
-typedef int (*mca_pml_base_del_comm_fn_t)(struct lam_communicator_t*);
-typedef int (*mca_pml_base_add_procs_fn_t)(struct lam_proc_t **procs, size_t nprocs);
-typedef int (*mca_pml_base_del_procs_fn_t)(struct lam_proc_t **procs, size_t nprocs);
-typedef int (*mca_pml_base_add_ptls_fn_t)(lam_list_t *ptls);
-typedef int (*mca_pml_base_progress_fn_t)(void);
 
+/**
+ * Downcall from MPI/RTE layer when new processes are created.
+ *
+ * @param  procs   Array of new processes
+ * @param  nprocs  Size of process array
+ * @return         LAM_SUCCESS or failure status.
+ *
+ * Provides a notification to the PML that new processes have been
+ * created, and provides the PML the opportunity to cache data
+ * (e.g. list of PTLs to use) on the lam_proc_t data structure.
+ */
+typedef int (*mca_pml_base_add_procs_fn_t)(struct lam_proc_t **procs, size_t nprocs);
+
+
+/**
+ * Downcall from MPI/RTE layer when processes are terminated.
+ *
+ * @param  procs   Array of processes
+ * @param  nprocs  Size of process array
+ * @return         LAM_SUCCESS or failure status.
+ *
+ * Provides a notification to the PML that processes have 
+ * gone away, and provides the PML the opportunity to cleanup
+ * any data cached on the lam_proc_t data structure.
+ */
+typedef int (*mca_pml_base_del_procs_fn_t)(struct lam_proc_t **procs, size_t nprocs);
+
+
+/**
+ * Downcall from MCA layer after all PTLs have been loaded/selected.
+ *
+ * @param  ptls    List of selected PTLs
+ * @return         LAM_SUCCESS or failure status.
+ *
+ * Provides a notification to the PML that processes have 
+ * gone away, and provides the PML the opportunity to cleanup
+ * any data cached on the lam_proc_t data structure.
+ */
+typedef int (*mca_pml_base_add_ptls_fn_t)(lam_list_t *ptls);
+
+
+/**
+ * MPI Interface Functions
+ */
+
+
+/**
+ * Downcall from MPI layer when a new communicator is created.
+ *
+ * @param comm  Communicator
+ * @return      LAM_SUCCESS or failure status.
+ *
+ * Provides the PML the opportunity to initialize/cache a data structure
+ * on the communicator.
+ */
+typedef int (*mca_pml_base_add_comm_fn_t)(struct lam_communicator_t* comm);
+
+
+/**
+ * Downcall from MPI layer when a communicator is destroyed.
+ *
+ * @param comm  Communicator
+ * @return      LAM_SUCCESS or failure status.
+ *
+ * Provides the PML the opportunity to cleanup any datastructures
+ * associated with the communicator.
+ */
+typedef int (*mca_pml_base_del_comm_fn_t)(struct lam_communicator_t* comm);
+
+/**
+ *  Initialize a persistent receive request. 
+ *
+ *  @param buf (IN)         User buffer.
+ *  @param count (IN)       Number of elements of the specified datatype.
+ *  @param datatype (IN)    User defined datatype.
+ *  @param src (IN)         Source rank w/in communicator.
+ *  @param tag (IN)         User defined tag.
+ *  @param comm (IN)        Communicator.
+ *  @param request (OUT)    Request handle.
+ *  @return                 LAM_SUCCESS or failure status.
+ */
 typedef int (*mca_pml_base_irecv_init_fn_t)(
-    void *buf,
-    size_t count,
-    lam_datatype_t *datatype,
+    void *buf,                           
+    size_t count,                         
+    lam_datatype_t *datatype,              
     int src,
-    int tag,
+    int tag,                                
     struct lam_communicator_t* comm,
-    struct lam_request_t **request
+    struct lam_request_t **request           
 );
 
+/**
+ *  Post a receive request. 
+ *
+ *  @param buf (IN)         User buffer.
+ *  @param count (IN)       Number of elements of the specified datatype.
+ *  @param datatype (IN)    User defined datatype.
+ *  @param src (IN)         Source rank w/in communicator.
+ *  @param tag (IN)         User defined tag.
+ *  @param comm (IN)        Communicator.
+ *  @param request (OUT)    Request handle.
+ *  @return                 LAM_SUCCESS or failure status.
+ */
 typedef int (*mca_pml_base_irecv_fn_t)(
     void *buf,
     size_t count,
@@ -99,6 +227,18 @@ typedef int (*mca_pml_base_irecv_fn_t)(
     struct lam_request_t **request
 );
 
+/**
+ *  Post a receive and wait for completion. 
+ *
+ *  @param buf (IN)         User buffer
+ *  @param count (IN)       Number of elements of the specified datatype
+ *  @param datatype (IN)    User defined datatype
+ *  @param src (IN)         Source rank w/in communicator
+ *  @param tag (IN)         User defined tag
+ *  @param comm (IN)        Communicator
+ *  @param status (OUT)     Completion status
+ *  @return                 LAM_SUCCESS or failure status.
+ */
 typedef int (*mca_pml_base_recv_fn_t)(
     void *buf,
     size_t count,
@@ -109,6 +249,19 @@ typedef int (*mca_pml_base_recv_fn_t)(
     lam_status_public_t* status
 );
 
+/**
+ *  Initialize a persistent send request. 
+ *
+ *  @param buf (IN)         User buffer.
+ *  @param count (IN)       Number of elements of the specified datatype.
+ *  @param datatype (IN)    User defined datatype.
+ *  @param dst (IN)         Peer rank w/in communicator.
+ *  @param tag (IN)         User defined tag.
+ *  @param mode (IN)        Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY)
+ *  @param comm (IN)        Communicator.
+ *  @param request (OUT)    Request handle.
+ *  @return                 LAM_SUCCESS or failure status.
+ */
 typedef int (*mca_pml_base_isend_init_fn_t)(
     void *buf,
     size_t count,
@@ -120,6 +273,20 @@ typedef int (*mca_pml_base_isend_init_fn_t)(
     struct lam_request_t **request
 );
 
+
+/**
+ *  Post a send request. 
+ *
+ *  @param buf (IN)         User buffer.
+ *  @param count (IN)       Number of elements of the specified datatype.
+ *  @param datatype (IN)    User defined datatype.
+ *  @param dst (IN)         Peer rank w/in communicator.
+ *  @param tag (IN)         User defined tag.
+ *  @param mode (IN)        Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY)
+ *  @param comm (IN)        Communicator.
+ *  @param request (OUT)    Request handle.
+ *  @return                 LAM_SUCCESS or failure status.
+ */
 typedef int (*mca_pml_base_isend_fn_t)(
     void *buf,
     size_t count,
@@ -131,6 +298,19 @@ typedef int (*mca_pml_base_isend_fn_t)(
     struct lam_request_t **request
 );
 
+
+/**
+ *  Post a send request and wait for completion.
+ *
+ *  @param buf (IN)         User buffer.
+ *  @param count (IN)       Number of elements of the specified datatype.
+ *  @param datatype (IN)    User defined datatype.
+ *  @param dst (IN)         Peer rank w/in communicator.
+ *  @param tag (IN)         User defined tag.
+ *  @param mode (IN)        Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY)
+ *  @param comm (IN)        Communicator.
+ *  @return                 LAM_SUCCESS or failure status.
+ */
 typedef int (*mca_pml_base_send_fn_t)(
     void *buf,
     size_t count,
@@ -141,16 +321,72 @@ typedef int (*mca_pml_base_send_fn_t)(
     struct lam_communicator_t* comm
 );
 
+/**
+ * Initiate one or more persistent requests.
+ *
+ * @param count    Number of requests
+ * @param request  Array of persistent requests
+ * @return         LAM_SUCCESS or failure status.
+ */
 typedef int (*mca_pml_base_start_fn_t)(
-    lam_request_t** request
+    size_t count,
+    lam_request_t** requests
 );
 
+/**
+ * Non-blocking test for request completion.
+ *
+ * @param count (IN)     Number of requests
+ * @param request (IN)   Array of requests
+ * @param index (OUT)    Index of first completed request.
+ * @param complete (OUT) Flag indicating if index is valid (a request completed).
+ * @param status (OUT)   Status of completed request.
+ * @return               LAM_SUCCESS or failure status.
+ *
+ * Note that upon completion, the request is freed, and the
+ * request handle at index set to NULL.
+ */
 typedef int (*mca_pml_base_test_fn_t)(
-    lam_request_t** request,
+    size_t count,
+    lam_request_t** requests,
+    int *index,
     int *completed,
     lam_status_public_t* status
 );
 
+
+/**
+ * Non-blocking test for request completion.
+ *
+ * @param count (IN)      Number of requests
+ * @param requests (IN)   Array of requests
+ * @param completed (OUT) Flag indicating wether all requests completed.
+ * @param statuses (OUT)  Array of completion statuses.
+ * @return                LAM_SUCCESS or failure status.
+ *
+ * This routine returns completed==true if all requests have completed.
+ * The statuses parameter is only updated if all requests completed. Likewise, 
+ * the requests array is not modified (no requests freed), unless all requests 
+ * have completed.
+ */
+typedef int (*mca_pml_base_test_all_fn_t)(
+    size_t count,
+    lam_request_t** requests,
+    int *completed,
+    lam_status_public_t* statuses
+);
+
+
+/**
+ * Wait (blocking-mode) for one of N requests to complete.
+ *
+ * @param count (IN)      Number of requests
+ * @param requests (IN)   Array of requests
+ * @param index (OUT)     Index into request array of completed request.
+ * @param status (OUT)    Status of completed request.
+ * @return                LAM_SUCCESS or failure status.
+ *
+ */
 typedef int (*mca_pml_base_wait_fn_t)(
     size_t count,
     lam_request_t** request,
@@ -158,37 +394,61 @@ typedef int (*mca_pml_base_wait_fn_t)(
     lam_status_public_t* status
 );
 
+
+/**
+ * Wait (blocking-mode) for all of N requests to complete.
+ *
+ * @param count (IN)      Number of requests
+ * @param requests (IN)   Array of requests
+ * @param statuses (OUT)  Array of completion statuses.
+ * @return                LAM_SUCCESS or failure status.
+ *
+ */
 typedef int (*mca_pml_base_wait_all_fn_t)(
-    size_t count,                 /* count of request/status arrays */
-    lam_request_t** request,      /* array of requests */
-    lam_status_public_t *status   /* array of statuses */
+    size_t count,                
+    lam_request_t** request,    
+    lam_status_public_t *status
 );
 
+
+/**
+ * Release resources held by a persistent mode request.
+ *
+ * @param request (IN)    Request
+ * @return                LAM_SUCCESS or failure status.
+ *
+ */
 typedef int (*mca_pml_base_free_fn_t)(
     lam_request_t** request
 );
 
+
+/**
+ * A special NULL request handle.
+ *
+ * @param request (OUT)   Request
+ * @return                LAM_SUCCESS or failure status.
+ *
+ */
 typedef int (*mca_pml_base_null_fn_t)(
     lam_request_t** request
 );
 
 
-
 /**
- *  PML instance interface functions.
+ *  PML instance.
  */
 
 struct mca_pml_1_0_0_t {
 
     /* downcalls from MCA to PML */
-    mca_pml_base_add_comm_fn_t     pml_add_comm;
-    mca_pml_base_del_comm_fn_t     pml_del_comm;
     mca_pml_base_add_procs_fn_t    pml_add_procs;
     mca_pml_base_del_procs_fn_t    pml_del_procs;
     mca_pml_base_add_ptls_fn_t     pml_add_ptls;
-    mca_pml_base_progress_fn_t     pml_progress;
 
     /* downcalls from MPI to PML */
+    mca_pml_base_add_comm_fn_t     pml_add_comm;
+    mca_pml_base_del_comm_fn_t     pml_del_comm;
     mca_pml_base_irecv_init_fn_t   pml_irecv_init;
     mca_pml_base_irecv_fn_t        pml_irecv;
     mca_pml_base_recv_fn_t         pml_recv;
@@ -197,6 +457,7 @@ struct mca_pml_1_0_0_t {
     mca_pml_base_send_fn_t         pml_send;
     mca_pml_base_start_fn_t        pml_start;
     mca_pml_base_test_fn_t         pml_test;
+    mca_pml_base_test_all_fn_t     pml_test_all;
     mca_pml_base_wait_fn_t         pml_wait;
     mca_pml_base_wait_all_fn_t     pml_wait_all;
     mca_pml_base_free_fn_t         pml_free;
diff --git a/src/mca/pml/teg/src/pml_ptl_array.h b/src/mca/pml/teg/src/pml_ptl_array.h
index 3f077ba6a2..7c35e2c713 100644
--- a/src/mca/pml/teg/src/pml_ptl_array.h
+++ b/src/mca/pml/teg/src/pml_ptl_array.h
@@ -1,7 +1,9 @@
 /*
  * $HEADER$
  */
-
+/**
+ * @file
+ */
 #ifndef LAM_PTL_ARRAY_H
 #define LAM_PTL_ARRAY_H
 
@@ -10,24 +12,40 @@
 
 extern lam_class_t mca_pml_teg_ptl_array_t_class;
 
+/**
+ * A data structure associated with a lam_proc_t that caches
+ * addressing/scheduling attributes for a specific PTL instance
+ * that can be used to reach the process.
+ */
 struct mca_ptl_proc_t {
-    double ptl_weight;               /* PTL weight for scheduling */
-    struct mca_ptl_base_peer_t* ptl_peer; /* PTL addressing info */
-    mca_ptl_t *ptl;                  /* PTL implementation */
+    double ptl_weight;                    /**< PTL weight for scheduling */
+    struct mca_ptl_base_peer_t* ptl_peer; /**< PTL addressing info */
+    mca_ptl_t *ptl;                       /**< PTL implementation */
 };
 typedef struct mca_ptl_proc_t mca_ptl_proc_t;
 
+/**
+ * A dynamically growable array of mca_ptl_proc_t instances.
+ * Maintains an index into the array that is used for round-robin
+ * scheduling across contents.
+ */
 struct mca_ptl_array_t {
     lam_object_t     super;
-    mca_ptl_proc_t*  ptl_procs;   /* array of ptl procs */
-    size_t           ptl_size;    /* number available */
-    size_t           ptl_reserve;
-    size_t           ptl_index;   /* last used index*/
+    mca_ptl_proc_t*  ptl_procs;   /**< array of ptl procs */
+    size_t           ptl_size;    /**< number available */
+    size_t           ptl_reserve; /**< size of allocated ptl_proc array */
+    size_t           ptl_index;   /**< last used index*/
 };
 typedef struct mca_ptl_array_t mca_ptl_array_t;
 typedef struct mca_ptl_array_t mca_pml_teg_ptl_array_t;
 
 
+/**
+ * If required, reallocate (grow) the array to the indicate size.
+ * 
+ * @param array (IN)
+ * @param size (IN)
+ */
 int mca_ptl_array_reserve(mca_ptl_array_t*, size_t);
 
 static inline size_t mca_ptl_array_get_size(mca_ptl_array_t* array)
@@ -35,6 +53,12 @@ static inline size_t mca_ptl_array_get_size(mca_ptl_array_t* array)
     return array->ptl_size;
 }
 
+/**
+ * Grow the array if required, and set the size.
+ * 
+ * @param array (IN)
+ * @param size (IN)
+ */
 static inline void mca_ptl_array_set_size(mca_ptl_array_t* array, size_t size)
 {
     if(array->ptl_size > array->ptl_reserve)
@@ -42,6 +66,11 @@ static inline void mca_ptl_array_set_size(mca_ptl_array_t* array, size_t size)
     array->ptl_size = size;
 }
 
+/**
+ * Grow the array size by one and return the item at that index.
+ * 
+ * @param array (IN)
+ */
 static inline mca_ptl_proc_t* mca_ptl_array_insert(mca_ptl_array_t* array)
 {
 #if LAM_ENABLE_DEBUG
@@ -54,6 +83,12 @@ static inline mca_ptl_proc_t* mca_ptl_array_insert(mca_ptl_array_t* array)
     return &array->ptl_procs[array->ptl_size++];
 }
 
+/**
+ * Return an array item at the specified index.
+ * 
+ * @param array (IN)
+ * @param index (IN)
+ */
 static inline mca_ptl_proc_t* mca_ptl_array_get_index(mca_ptl_array_t* array, size_t index)
 {
 #if LAM_ENABLE_DEBUG
@@ -66,6 +101,12 @@ static inline mca_ptl_proc_t* mca_ptl_array_get_index(mca_ptl_array_t* array, si
     return &array->ptl_procs[index];
 }
 
+/**
+ * Return the next LRU index in the array.
+ * 
+ * @param array (IN)
+ * @param index (IN)
+ */
 static inline mca_ptl_proc_t* mca_ptl_array_get_next(mca_ptl_array_t* array)
 {
     mca_ptl_proc_t* ptl_proc;
diff --git a/src/mca/pml/teg/src/pml_teg.c b/src/mca/pml/teg/src/pml_teg.c
index 658c093341..250bc4bd1f 100644
--- a/src/mca/pml/teg/src/pml_teg.c
+++ b/src/mca/pml/teg/src/pml_teg.c
@@ -19,12 +19,11 @@
 
 mca_pml_teg_t mca_pml_teg = {
     {
-    mca_pml_teg_add_comm,
-    mca_pml_teg_del_comm,
     mca_pml_teg_add_procs,
     mca_pml_teg_del_procs,
     mca_pml_teg_add_ptls,
-    mca_pml_teg_progress,
+    mca_pml_teg_add_comm,
+    mca_pml_teg_del_comm,
     mca_pml_teg_irecv_init,
     mca_pml_teg_irecv,
     mca_pml_teg_recv,
@@ -33,6 +32,7 @@ mca_pml_teg_t mca_pml_teg = {
     mca_pml_teg_send,
     mca_pml_teg_start,
     mca_pml_teg_test,
+    mca_pml_teg_test_all,
     mca_pml_teg_wait,
     mca_pml_teg_wait_all,
     mca_pml_teg_free,
@@ -279,7 +279,7 @@ int mca_pml_teg_module_fini(void)
 
 int mca_pml_teg_null(lam_request_t** request)
 {
-    *request = (lam_request_t*)&mca_pml_teg.teg_null;
+    *request = (lam_request_t*)&mca_pml_teg.teg_request_null;
     return LAM_SUCCESS;
 }
 
diff --git a/src/mca/pml/teg/src/pml_teg.h b/src/mca/pml/teg/src/pml_teg.h
index 34e7de5021..67f885b2d9 100644
--- a/src/mca/pml/teg/src/pml_teg.h
+++ b/src/mca/pml/teg/src/pml_teg.h
@@ -1,11 +1,9 @@
-/** @file 
- *
- *  
- */
-
 /*
  * $HEADER$
  */
+/**
+ *  @file 
+ */
 
 #ifndef MCA_PML_TEG_H
 #define MCA_PML_TEG_H
@@ -51,9 +49,7 @@ struct mca_pml_teg_t {
     lam_mutex_t teg_request_lock;
     lam_condition_t teg_request_cond;
     volatile int teg_request_waiting;
-
-    /* null request */
-    mca_pml_base_request_t teg_null;
+    mca_pml_base_request_t teg_request_null;
 };
 typedef struct mca_pml_teg_t mca_pml_teg_t; 
 
@@ -108,7 +104,7 @@ extern int mca_pml_teg_add_ptls(
 
 extern int mca_pml_teg_isend_init(
     void *buf,
-    size_t size,
+    size_t count,
     lam_datatype_t *datatype,
     int dst,
     int tag,
@@ -119,7 +115,7 @@ extern int mca_pml_teg_isend_init(
 
 extern int mca_pml_teg_isend(
     void *buf,
-    size_t size,
+    size_t count,
     lam_datatype_t *datatype,
     int dst,
     int tag,
@@ -130,7 +126,7 @@ extern int mca_pml_teg_isend(
 
 extern int mca_pml_teg_send(
     void *buf,
-    size_t size,
+    size_t count,
     lam_datatype_t *datatype,
     int dst,
     int tag,
@@ -140,7 +136,7 @@ extern int mca_pml_teg_send(
 
 extern int mca_pml_teg_irecv_init(
     void *buf,
-    size_t size,
+    size_t count,
     lam_datatype_t *datatype,
     int src,
     int tag,
@@ -150,7 +146,7 @@ extern int mca_pml_teg_irecv_init(
 
 extern int mca_pml_teg_irecv(
     void *buf,
-    size_t size,
+    size_t count,
     lam_datatype_t *datatype,
     int src,
     int tag,
@@ -160,7 +156,7 @@ extern int mca_pml_teg_irecv(
 
 extern int mca_pml_teg_recv(
     void *buf,
-    size_t size,
+    size_t count,
     lam_datatype_t *datatype,
     int src,
     int tag,
@@ -171,10 +167,20 @@ extern int mca_pml_teg_recv(
 extern int mca_pml_teg_progress(void);
 
 extern int mca_pml_teg_start(
-    lam_request_t** request
+    size_t count,
+    lam_request_t** requests
 );
 
 extern int mca_pml_teg_test(
+    size_t count,
+    lam_request_t** request,
+    int *index,
+    int *completed,
+    lam_status_public_t* status
+);
+
+extern int mca_pml_teg_test_all(
+    size_t count,
     lam_request_t** request,
     int *completed,
     lam_status_public_t* status
diff --git a/src/mca/pml/teg/src/pml_teg_irecv.c b/src/mca/pml/teg/src/pml_teg_irecv.c
index 2192bc069a..5033fda7f1 100644
--- a/src/mca/pml/teg/src/pml_teg_irecv.c
+++ b/src/mca/pml/teg/src/pml_teg_irecv.c
@@ -3,7 +3,7 @@
                                                                                                              
 int mca_pml_teg_irecv_init(
     void *addr,
-    size_t length,
+    size_t count,
     lam_datatype_t *datatype,
     int src,
     int tag,
@@ -18,7 +18,7 @@ int mca_pml_teg_irecv_init(
     mca_ptl_base_recv_request_init(
         recvreq,
         addr,
-        length,
+        count,
         datatype,
         src,
         tag,
@@ -31,7 +31,7 @@ int mca_pml_teg_irecv_init(
                                                                                                                               
 int mca_pml_teg_irecv(
     void *addr,
-    size_t length,
+    size_t count,
     lam_datatype_t *datatype,
     int src,
     int tag,
@@ -40,10 +40,6 @@ int mca_pml_teg_irecv(
 {
     int rc;
 
-#if 0
-    lam_output(0, "mca_pml_teg_irecv: src=%d tag=%d comm=%d\n", src, tag, comm->c_contextid);
-#endif
-    
     mca_ptl_base_recv_request_t *recvreq = mca_pml_teg_recv_request_alloc(&rc);
     if(NULL == recvreq)
         return rc;
@@ -51,7 +47,7 @@ int mca_pml_teg_irecv(
     mca_ptl_base_recv_request_init(
         recvreq,
         addr,
-        length,
+        count,
         datatype,
         src,
         tag,
@@ -69,16 +65,14 @@ int mca_pml_teg_irecv(
 
 int mca_pml_teg_recv(
     void *addr,
-    size_t length,
+    size_t count,
     lam_datatype_t *datatype,
     int src,
     int tag,
     struct lam_communicator_t* comm,
     lam_status_public_t* status)
 {
-    int rc;
-    int index;
-
+    int rc, index;
     mca_ptl_base_recv_request_t *recvreq = mca_pml_teg_recv_request_alloc(&rc);
     if(NULL == recvreq)
         return rc;
@@ -86,7 +80,7 @@ int mca_pml_teg_recv(
     mca_ptl_base_recv_request_init(
         recvreq,
         addr,
-        length,
+        count,
         datatype,
         src,
         tag,
diff --git a/src/mca/pml/teg/src/pml_teg_isend.c b/src/mca/pml/teg/src/pml_teg_isend.c
index c9da6b5e4f..e93bbed768 100644
--- a/src/mca/pml/teg/src/pml_teg_isend.c
+++ b/src/mca/pml/teg/src/pml_teg_isend.c
@@ -9,7 +9,7 @@
 
 int mca_pml_teg_isend_init(
     void *buf,
-    size_t size,
+    size_t count,
     lam_datatype_t *datatype,
     int dst,
     int tag,
@@ -26,7 +26,7 @@ int mca_pml_teg_isend_init(
     mca_ptl_base_send_request_init(
         sendreq,
         buf,
-        size,
+        count,
         datatype,
         dst,
         tag,
@@ -42,7 +42,7 @@ int mca_pml_teg_isend_init(
 
 int mca_pml_teg_isend(
     void *buf,
-    size_t size,
+    size_t count,
     lam_datatype_t *datatype,
     int dst,
     int tag,
@@ -58,7 +58,7 @@ int mca_pml_teg_isend(
     mca_ptl_base_send_request_init(
         sendreq,
         buf,
-        size,
+        count,
         datatype,
         dst,
         tag,
@@ -76,16 +76,14 @@ int mca_pml_teg_isend(
 
 int mca_pml_teg_send(
     void *buf,
-    size_t size,
+    size_t count,
     lam_datatype_t *datatype,
     int dst,
     int tag,
     mca_pml_base_send_mode_t sendmode,
     lam_communicator_t* comm)
 {
-    int rc;
-    int index;
-
+    int rc, index;
     mca_ptl_base_send_request_t* sendreq = mca_pml_teg_send_request_alloc(comm,dst,&rc);
     if(rc != LAM_SUCCESS)
         return rc;
@@ -93,7 +91,7 @@ int mca_pml_teg_send(
     mca_ptl_base_send_request_init(
         sendreq,
         buf,
-        size,
+        count,
         datatype,
         dst,
         tag,
diff --git a/src/mca/pml/teg/src/pml_teg_module.c b/src/mca/pml/teg/src/pml_teg_module.c
index f4a39f4908..184f65f553 100644
--- a/src/mca/pml/teg/src/pml_teg_module.c
+++ b/src/mca/pml/teg/src/pml_teg_module.c
@@ -59,7 +59,7 @@ static inline int mca_pml_teg_param_register_int(
 
 int mca_pml_teg_module_open(void)
 {
-    mca_pml_base_request_t* teg_null = &mca_pml_teg.teg_null;
+    mca_pml_base_request_t* teg_null = &mca_pml_teg.teg_request_null;
     OBJ_CONSTRUCT(&mca_pml_teg.teg_lock, lam_mutex_t);
     OBJ_CONSTRUCT(&mca_pml_teg.teg_recv_requests, lam_free_list_t);
     OBJ_CONSTRUCT(&mca_pml_teg.teg_procs, lam_list_t);
@@ -96,13 +96,6 @@ int mca_pml_teg_module_close(void)
 }
 
 
-static void* mca_pml_teg_thread(lam_object_t* thread)
-{
-    lam_event_dispatch();
-    return NULL;
-}
-
-
 mca_pml_t* mca_pml_teg_module_init(int* priority, 
                                    bool *allow_multi_user_threads,
                                    bool *have_hidden_threads)
diff --git a/src/mca/pml/teg/src/pml_teg_proc.h b/src/mca/pml/teg/src/pml_teg_proc.h
index 8b388d7852..f2636c3d02 100644
--- a/src/mca/pml/teg/src/pml_teg_proc.h
+++ b/src/mca/pml/teg/src/pml_teg_proc.h
@@ -1,7 +1,9 @@
 /*
  * $HEADER$
  */
-
+/**
+ * @file
+ */
 #ifndef MCA_PML_PROC_H
 #define MCA_PML_PROC_H
 
@@ -11,17 +13,16 @@
 #include "proc/proc.h"
 #include "pml_ptl_array.h"
 
-/*
+/**
  *  Structure associated w/ lam_proc_t that contains data specific
  *  to the PML. Note that this name is not PML specific.
  */
-
 struct mca_pml_proc_t {
    lam_list_item_t super;
-   lam_proc_t *proc_lam;
-   lam_mutex_t proc_lock;
-   mca_ptl_array_t proc_ptl_first;
-   mca_ptl_array_t proc_ptl_next;
+   lam_proc_t *proc_lam;             /**< back-pointer to lam_proc_t */
+   lam_mutex_t proc_lock;            /**< lock to protect against concurrent access */
+   mca_ptl_array_t proc_ptl_first;   /**< array of ptls to use for first fragments */
+   mca_ptl_array_t proc_ptl_next;    /**< array of ptls to use for remaining fragments */
 };
 typedef struct mca_pml_proc_t mca_pml_proc_t;
 
@@ -29,6 +30,13 @@ typedef struct mca_pml_proc_t mca_pml_proc_t;
 extern lam_class_t mca_pml_teg_proc_t_class;
 typedef struct mca_pml_proc_t mca_pml_teg_proc_t;
 
+/**
+ * Return the mca_pml_proc_t instance cached in the communicators local group.
+ * 
+ * @param comm   Communicator
+ * @param rank   Peer rank
+ * @return       mca_pml_proc_t instance
+ */
 
 static inline mca_pml_proc_t* mca_pml_teg_proc_lookup_local(lam_communicator_t* comm, int rank)
 {
@@ -36,6 +44,14 @@ static inline mca_pml_proc_t* mca_pml_teg_proc_lookup_local(lam_communicator_t*
     return proc->proc_pml;
 }
 
+/**
+ * Return the mca_pml_proc_t instance cached on the communicators remote group.
+ * 
+ * @param comm   Communicator
+ * @param rank   Peer rank
+ * @return       mca_pml_proc_t instance
+ */
+
 static inline mca_pml_proc_t* mca_pml_teg_proc_lookup_remote(lam_communicator_t* comm, int rank)
 {
     lam_proc_t* proc = comm->c_remote_group->grp_proc_pointers[rank];
diff --git a/src/mca/pml/teg/src/pml_teg_progress.c b/src/mca/pml/teg/src/pml_teg_progress.c
index 2a50376658..a1897e84fb 100644
--- a/src/mca/pml/teg/src/pml_teg_progress.c
+++ b/src/mca/pml/teg/src/pml_teg_progress.c
@@ -4,6 +4,7 @@
 
 int mca_pml_teg_progress(void)
 {
+#if 0
     mca_ptl_base_tstamp_t tstamp;
     size_t i;
 
@@ -12,6 +13,7 @@ int mca_pml_teg_progress(void)
      */
     for(i=0; i<mca_pml_teg.teg_num_ptl_modules; i++)
         mca_pml_teg.teg_ptl_modules[i]->ptlm_progress(tstamp);
+#endif
     return LAM_SUCCESS;
 }
 
diff --git a/src/mca/pml/teg/src/pml_teg_recvreq.h b/src/mca/pml/teg/src/pml_teg_recvreq.h
index 7a1a6070d7..a8e75bb74c 100644
--- a/src/mca/pml/teg/src/pml_teg_recvreq.h
+++ b/src/mca/pml/teg/src/pml_teg_recvreq.h
@@ -1,7 +1,9 @@
 /*
  * $HEADER$
  */
-
+/**
+ * @file
+ */
 #ifndef LAM_PML_TEG_RECV_REQUEST_H
 #define LAM_PML_TEG_RECV_REQUEST_H
 
@@ -11,38 +13,56 @@
 #include "mca/ptl/base/ptl_base_recvfrag.h"
 
 
-/*
- *  Allocate a recv request. 
+/**
+ *  Allocate a recv request from the modules free list.
+ *
+ *  @param rc (OUT)  LAM_SUCCESS or error status on failure.
+ *  @return          Receive request.
  */
 static inline mca_ptl_base_recv_request_t* mca_pml_teg_recv_request_alloc(int *rc)
 {
     return (mca_ptl_base_recv_request_t*)lam_free_list_get(&mca_pml_teg.teg_recv_requests, rc);
 }
 
+/**
+ *  Return a recv request to the modules free list.
+ *
+ *  @param request (IN)  Receive request.
+ */
 static inline void mca_pml_teg_recv_request_return(mca_ptl_base_recv_request_t* request)
 {
     lam_free_list_return(&mca_pml_teg.teg_recv_requests, (lam_list_item_t*)request);
 }
 
-/*
- * Progress an initialized request.
+/**
+ * Start an initialized request.
+ *
+ * @param request  Receive request.
+ * @return         LAM_SUCESS or error status on failure.
  */
-static inline int mca_pml_teg_recv_request_start(mca_ptl_base_recv_request_t* req)
+static inline int mca_pml_teg_recv_request_start(mca_ptl_base_recv_request_t* request)
 {
     THREAD_SCOPED_LOCK(&mca_pml_teg.teg_lock,
-        (req->req_sequence = mca_pml_teg.teg_recv_sequence++));
+        (request->req_sequence = mca_pml_teg.teg_recv_sequence++));
 
-    if(req->super.req_peer == LAM_ANY_SOURCE) {
-        mca_ptl_base_recv_request_match_wild(req);
+    if(request->super.req_peer == LAM_ANY_SOURCE) {
+        mca_ptl_base_recv_request_match_wild(request);
     } else {
-        mca_ptl_base_recv_request_match_specific(req);
+        mca_ptl_base_recv_request_match_specific(request);
     }
     return LAM_SUCCESS;
 }
 
+/**
+ *  Update status of a recv request based on the completion status of 
+ *  the receive fragment.
+ *
+ *  @param request (IN)  Receive request.
+ *  @param frag (IN)     Receive fragment.
+ */
 void mca_pml_teg_recv_request_progress(
-    mca_ptl_base_recv_request_t* recv_request,
-    mca_ptl_base_recv_frag_t* recv_frag
+    mca_ptl_base_recv_request_t* request,
+    mca_ptl_base_recv_frag_t* frag
 );
 
 
diff --git a/src/mca/pml/teg/src/pml_teg_sendreq.c b/src/mca/pml/teg/src/pml_teg_sendreq.c
index 4a545d0a13..5a6ae76b2d 100644
--- a/src/mca/pml/teg/src/pml_teg_sendreq.c
+++ b/src/mca/pml/teg/src/pml_teg_sendreq.c
@@ -25,19 +25,19 @@ void mca_pml_teg_send_request_schedule(mca_ptl_base_send_request_t* req)
     mca_pml_proc_t* proc_pml = proc->proc_pml;
 
     /* allocate remaining bytes to PTLs */
-    size_t bytes_remaining = req->super.req_length - req->req_offset;
+    size_t bytes_remaining = req->req_packed_size - req->req_offset;
     size_t num_ptl_avail = proc_pml->proc_ptl_next.ptl_size;
     size_t num_ptl = 0;
     while(bytes_remaining > 0 && num_ptl++ < num_ptl_avail) {
         mca_ptl_proc_t* ptl_proc = mca_ptl_array_get_next(&proc_pml->proc_ptl_next);
         mca_ptl_t* ptl = ptl_proc->ptl;
+        int rc;
 
         /* if this is the last PTL that is available to use, or the number of 
          * bytes remaining in the message is less than the PTLs minimum fragment 
          * size, then go ahead and give the rest of the message to this PTL.
          */
         size_t bytes_to_frag;
-		int rc;
         if(num_ptl == num_ptl_avail || bytes_remaining < ptl->ptl_min_frag_size)
             bytes_to_frag = bytes_remaining;
 
@@ -47,14 +47,14 @@ void mca_pml_teg_send_request_schedule(mca_ptl_base_send_request_t* req)
          * previously assigned)
          */
         else {
-            bytes_to_frag = ptl_proc->ptl_weight * req->super.req_length;
+            bytes_to_frag = ptl_proc->ptl_weight * req->req_packed_size;
             if(bytes_to_frag > bytes_remaining)
                 bytes_to_frag = bytes_remaining;
         }
 
         rc = ptl->ptl_send(ptl, ptl_proc->ptl_peer, req, bytes_to_frag, 0);
         if(rc == LAM_SUCCESS)
-            bytes_remaining = req->super.req_length - req->req_offset;
+            bytes_remaining = req->req_packed_size - req->req_offset;
     }
 
     /* unable to complete send - signal request failed */
@@ -76,7 +76,7 @@ void mca_pml_teg_send_request_progress(
     bool complete = false;
     lam_mutex_lock(&mca_pml_teg.teg_request_lock);
     req->req_bytes_sent += frag->super.frag_size;
-    if (req->req_bytes_sent >= req->super.req_length) {
+    if (req->req_bytes_sent >= req->req_packed_size) {
         req->super.req_mpi_done = true;
         req->super.req_pml_done = true;
         if(mca_pml_teg.teg_request_waiting) {
diff --git a/src/mca/pml/teg/src/pml_teg_sendreq.h b/src/mca/pml/teg/src/pml_teg_sendreq.h
index ea88fc5ba9..25ff819baf 100644
--- a/src/mca/pml/teg/src/pml_teg_sendreq.h
+++ b/src/mca/pml/teg/src/pml_teg_sendreq.h
@@ -1,7 +1,9 @@
 /*
  * $HEADER$
  */
-
+/**
+ * @file
+ */
 #ifndef LAM_PML_TEG_SEND_REQUEST_H
 #define LAM_PML_TEG_SEND_REQUEST_H
 
@@ -47,8 +49,8 @@ static inline int mca_pml_teg_send_request_start(
     int flags, rc;
 
     /* start the first fragment */
-    if(req->super.req_length <= first_fragment_size) {
-        first_fragment_size = req->super.req_length;
+    if(req->req_packed_size <= first_fragment_size) {
+        first_fragment_size = req->req_packed_size;
         flags = (req->req_send_mode == MCA_PML_BASE_SEND_SYNCHRONOUS) ? MCA_PTL_FLAGS_ACK_MATCHED : 0;
     } else {
         /* require match for first fragment of a multi-fragment message or if synchronous send */
diff --git a/src/mca/pml/teg/src/pml_teg_start.c b/src/mca/pml/teg/src/pml_teg_start.c
index bc4c396bd3..b97d738af2 100644
--- a/src/mca/pml/teg/src/pml_teg_start.c
+++ b/src/mca/pml/teg/src/pml_teg_start.c
@@ -3,16 +3,30 @@
 #include "pml_teg_sendreq.h"
 
 
-int mca_pml_teg_start(lam_request_t** request)
+int mca_pml_teg_start(size_t count, lam_request_t** requests)
 {
-    mca_pml_base_request_t *pml_request = *(mca_pml_base_request_t**)request;
-    switch(pml_request->req_type) {
-        case MCA_PML_REQUEST_SEND:
-            return mca_pml_teg_send_request_start((mca_ptl_base_send_request_t*)pml_request);
-        case MCA_PML_REQUEST_RECV:
-            return mca_pml_teg_recv_request_start((mca_ptl_base_recv_request_t*)pml_request);
-        default:
-            return LAM_ERROR;
+    int rc;
+    size_t i;
+    for(i=0; i<count; i++) {
+        mca_pml_base_request_t *pml_request = (mca_pml_base_request_t*)requests[i];
+        if(NULL == pml_request)
+            continue;
+
+        switch(pml_request->req_type) {
+            case MCA_PML_REQUEST_SEND:
+                if((rc = mca_pml_teg_send_request_start((mca_ptl_base_send_request_t*)pml_request)) 
+                    != LAM_SUCCESS)
+                    return rc;
+                break;
+            case MCA_PML_REQUEST_RECV:
+                if((rc = mca_pml_teg_recv_request_start((mca_ptl_base_recv_request_t*)pml_request)) 
+                    != LAM_SUCCESS)
+                    return rc;
+                break;
+            default:
+                return LAM_ERROR;
+        }
     }
+    return LAM_SUCCESS;
 }
 
diff --git a/src/mca/pml/teg/src/pml_teg_test.c b/src/mca/pml/teg/src/pml_teg_test.c
index 722f46b896..555b88179e 100644
--- a/src/mca/pml/teg/src/pml_teg_test.c
+++ b/src/mca/pml/teg/src/pml_teg_test.c
@@ -2,18 +2,74 @@
 
                                                                                                                               
 int mca_pml_teg_test(
-    lam_request_t** request,
+    size_t count,
+    lam_request_t** requests,
+    int *index,
     int *completed,
     lam_status_public_t* status)
 {
-    mca_pml_base_request_t* pml_request = *(mca_pml_base_request_t**)request;
-    if(pml_request->req_mpi_done) {
-        *completed = true;
-        mca_pml_teg_free(request);
-        if (status != NULL)
-            *status = pml_request->req_status;
-    } else {
+    size_t i;
+    for(i=0; i<count; i++) {
+        mca_pml_base_request_t* pml_request = (mca_pml_base_request_t*)requests[i];
+        if(pml_request == NULL)
+            continue;
+        if(pml_request->req_mpi_done) {
+            *index = i;
+            *completed = true;
+            if (NULL != status)
+                *status = pml_request->req_status;
+            if(false == pml_request->req_persistent)
+                mca_pml_teg_free(requests+i);
+        }
+    }
+
+    *index = MPI_UNDEFINED;
+    *completed = false;
+    if(NULL != status) 
+        *status = mca_pml_teg.teg_request_null.req_status;
+    return LAM_SUCCESS;
+}
+
+
+int mca_pml_teg_test_all(
+    size_t count,
+    lam_request_t** requests,
+    int *completed,
+    lam_status_public_t* statuses)
+{
+    size_t i;
+    size_t num_completed;
+    for(i=0; i<count; i++) {
+        mca_pml_base_request_t* pml_request = (mca_pml_base_request_t*)requests[i];
+        if(pml_request == NULL || pml_request->req_mpi_done)
+            num_completed++;
+    }
+
+    if(num_completed != count) {
         *completed = false;
+        return LAM_SUCCESS;
+    }
+
+    *completed = true;
+    if(NULL != statuses) {
+        /* fill out completion status and free request if required */
+        for(i=0; i<count; i++) {
+            mca_pml_base_request_t* pml_request = (mca_pml_base_request_t*)requests[i];
+            if(NULL == pml_request) {
+                statuses[i] = mca_pml_teg.teg_request_null.req_status;
+            } else {
+                statuses[i] = pml_request->req_status;
+                if(false == pml_request->req_persistent)
+                    mca_pml_teg_free(requests+i);
+            }
+        }
+    } else {
+        /* free request if required */
+        for(i=0; i<count; i++) {
+            mca_pml_base_request_t* pml_request = (mca_pml_base_request_t*)requests[i];
+            if(NULL != pml_request && false == pml_request->req_persistent)
+                mca_pml_teg_free(requests+i);
+        }
     }
     return LAM_SUCCESS;
 }
diff --git a/src/mca/pml/teg/src/pml_teg_wait.c b/src/mca/pml/teg/src/pml_teg_wait.c
index 15e4fbb569..df06e68aed 100644
--- a/src/mca/pml/teg/src/pml_teg_wait.c
+++ b/src/mca/pml/teg/src/pml_teg_wait.c
@@ -46,15 +46,13 @@ int mca_pml_teg_wait(
     }
 
     /* return request to pool */
-    if(pml_request->req_persistent == false) {
+    if(false == pml_request->req_persistent) {
         mca_pml_teg_free(request);
     }
-    if (status != NULL) {
+    if (NULL != status) {
        *status = pml_request->req_status;
     }
-    if (index != NULL) {
-       *index = completed;
-    }
+    *index = completed;
     return LAM_SUCCESS;
 }
 
@@ -64,46 +62,58 @@ int mca_pml_teg_wait_all(
     lam_request_t** requests,
     lam_status_public_t* statuses)
 {
-    int completed, i;
-
-    /*
-     * acquire lock and test for completion - if all requests are not completed
-     * pend on condition variable until a request completes 
-     */
-    lam_mutex_lock(&mca_pml_teg.teg_request_lock);
-    mca_pml_teg.teg_request_waiting++;
-    do {
-        completed = 0;
-        for(i=0; i<count; i++) {
-            mca_pml_base_request_t* pml_request = (mca_pml_base_request_t*)requests[i];
-            if(pml_request == NULL || pml_request->req_mpi_done == true) {
-                completed++;
-                continue;
-            }
-        } 
-        if(completed != count)
-            lam_condition_wait(&mca_pml_teg.teg_request_cond, &mca_pml_teg.teg_request_lock);
-    } while (completed != count);
-    mca_pml_teg.teg_request_waiting--;
-    lam_mutex_unlock(&mca_pml_teg.teg_request_lock);
-
-    /* 
-     * fill out completion status and free request if required 
-     */
+    int completed = 0, i;
     for(i=0; i<count; i++) {
         mca_pml_base_request_t* pml_request = (mca_pml_base_request_t*)requests[i];
-        if (NULL == pml_request) {
-            if(NULL != statuses)
-                statuses[i] = mca_pml_teg.teg_null.req_status;
-            continue;
-        }
+        if(pml_request == NULL || pml_request->req_mpi_done == true) {
+            completed++;
+        } 
+    }
 
-        if (NULL != statuses) {
-            statuses[i] = pml_request->req_status;
+    /* if all requests have not completed -- defer requiring lock unless required */
+    if(completed != count) {
+        /*
+         * acquire lock and test for completion - if all requests are not completed
+         * pend on condition variable until a request completes 
+         */
+        lam_mutex_lock(&mca_pml_teg.teg_request_lock);
+        mca_pml_teg.teg_request_waiting++;
+        do {
+            completed = 0;
+            for(i=0; i<count; i++) {
+                mca_pml_base_request_t* pml_request = (mca_pml_base_request_t*)requests[i];
+                if(pml_request == NULL || pml_request->req_mpi_done == true) {
+                    completed++;
+                    continue;
+                }
+            } 
+            if(completed != count)
+                lam_condition_wait(&mca_pml_teg.teg_request_cond, &mca_pml_teg.teg_request_lock);
+        } while (completed != count);
+        mca_pml_teg.teg_request_waiting--;
+        lam_mutex_unlock(&mca_pml_teg.teg_request_lock);
+    }
+
+    if(NULL != statuses) {
+        /* fill out status and free request if required */
+        for(i=0; i<count; i++) {
+            mca_pml_base_request_t* pml_request = (mca_pml_base_request_t*)requests[i];
+            if (NULL == pml_request) {
+                statuses[i] = mca_pml_teg.teg_request_null.req_status;
+            } else {
+                statuses[i] = pml_request->req_status;
+                if (false == pml_request->req_persistent) {
+                    mca_pml_teg_free(&requests[i]);
+                }
+            }
         }
-        if (false == pml_request->req_persistent) {
-            /* return request to pool */
-            mca_pml_teg_free(&requests[i]);
+    } else {
+        /* free request if required */
+        for(i=0; i<count; i++) {
+            mca_pml_base_request_t* pml_request = (mca_pml_base_request_t*)requests[i];
+            if (NULL != pml_request && false == pml_request->req_persistent) {
+                    mca_pml_teg_free(&requests[i]);
+            }
         }
     }
     return LAM_SUCCESS;
diff --git a/src/mca/ptl/base/base.h b/src/mca/ptl/base/base.h
index 881827f227..1681a743c5 100644
--- a/src/mca/ptl/base/base.h
+++ b/src/mca/ptl/base/base.h
@@ -1,7 +1,9 @@
 /*
  * $HEADER$
  */
-
+/**
+ * @file
+ */
 #ifndef MCA_PTL_BASE_H
 #define MCA_PTL_BASE_H
 
diff --git a/src/mca/ptl/base/ptl_base_comm.c b/src/mca/ptl/base/ptl_base_comm.c
index 6ddab15e47..a29b3944e3 100644
--- a/src/mca/ptl/base/ptl_base_comm.c
+++ b/src/mca/ptl/base/ptl_base_comm.c
@@ -23,6 +23,7 @@ static void mca_pml_ptl_comm_construct(mca_pml_ptl_comm_t* comm)
     OBJ_CONSTRUCT(&comm->c_wild_lock, lam_mutex_t);
 }
 
+
 static void mca_pml_ptl_comm_destruct(mca_pml_ptl_comm_t* comm)
 {
     free(comm->c_msg_seq);
diff --git a/src/mca/ptl/base/ptl_base_comm.h b/src/mca/ptl/base/ptl_base_comm.h
index 5a90b315bf..0621be3719 100644
--- a/src/mca/ptl/base/ptl_base_comm.h
+++ b/src/mca/ptl/base/ptl_base_comm.h
@@ -1,3 +1,9 @@
+/*
+ * $HEADER$
+ */
+/**
+ * @file
+ */
 #ifndef MCA_PML_COMM_H
 #define MCA_PML_COMM_H
 
@@ -6,49 +12,44 @@
 #include "mca/ptl/ptl.h"
 #include "lfc/lam_list.h"
 
-/*
- *  Structure associated w/ lam_communicator_t that contains data 
- *  specific to the PML. 
- */
-
 extern lam_class_t mca_pml_ptl_comm_t_class;
 
+/**
+ *  Cached on lam_communicator_t to hold queues/state
+ *  used by the PML<->PTL interface for matching logic. 
+ */
 struct mca_pml_comm_t {
     lam_object_t super;
-
-    /* send message sequence-number support - sender side */
-    mca_ptl_base_sequence_t *c_msg_seq;
-
-    /* send message sequence-number support - receiver side */
-    mca_ptl_base_sequence_t *c_next_msg_seq;
-
-    /* matching lock */
-    lam_mutex_t *c_matching_lock;
-
-    /* unexpected fragments queues */
-    lam_list_t *c_unexpected_frags;
-
-    /* these locks are needed to avoid a probe interfering with a match */
-    lam_mutex_t *c_unexpected_frags_lock;
-
-    /* out-of-order fragments queues */
-    lam_list_t *c_frags_cant_match;
-
-    /* queues of unmatched specific (source process specified) receives
-     * sorted by source process */
-    lam_list_t *c_specific_receives;
-
-    /* queue of unmatched wild (source process not specified) receives
-     * */
-    lam_list_t c_wild_receives;
-
-    /* protect access to wild receives */
-    lam_mutex_t c_wild_lock;
+    mca_ptl_base_sequence_t *c_msg_seq;      /**< send message sequence number - sender side */
+    mca_ptl_base_sequence_t *c_next_msg_seq; /**< send message sequence number - receiver side */
+    lam_mutex_t *c_matching_lock;            /**< matching lock */
+    lam_list_t *c_unexpected_frags;          /**< unexpected fragment queues */
+    lam_mutex_t *c_unexpected_frags_lock;    /**< unexpected fragment locks */
+    lam_list_t *c_frags_cant_match;          /**< out-of-order fragment queues */
+    lam_list_t *c_specific_receives;         /**< queues of unmatched specific (source process specified) receives */
+    lam_list_t c_wild_receives;              /**< queue of unmatched wild (source process not specified) receives */
+    lam_mutex_t c_wild_lock;                 /**< lock to protect access to wild receives */
 };
 typedef struct mca_pml_comm_t mca_pml_ptl_comm_t;
 
 
-extern int mca_pml_ptl_comm_init_size(mca_pml_ptl_comm_t*, size_t);
+/**
+ * Initialize an instance of mca_pml_ptl_comm_t based on the communicator size.
+ *
+ * @param  comm   Instance of mca_pml_ptl_comm_t
+ * @param  size   Size of communicator 
+ * @return        LAM_SUCCESS or error status on failure.
+ */
+
+extern int mca_pml_ptl_comm_init_size(mca_pml_ptl_comm_t* comm, size_t size);
+
+/**
+ * Obtain the next sequence number (MPI) for a given destination rank.
+ *
+ * @param  comm   Instance of mca_pml_ptl_comm_t
+ * @param  dst    Rank of destination.
+ * @return        Next available sequence number.
+ */
 
 static inline mca_ptl_base_sequence_t mca_pml_ptl_comm_send_sequence(mca_pml_ptl_comm_t* comm, int dst)
 {
diff --git a/src/mca/ptl/base/ptl_base_fragment.h b/src/mca/ptl/base/ptl_base_fragment.h
index 99ec041a8a..f903c1f488 100644
--- a/src/mca/ptl/base/ptl_base_fragment.h
+++ b/src/mca/ptl/base/ptl_base_fragment.h
@@ -1,25 +1,30 @@
 /*
  * $HEADER$
  */
-/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-
+/** 
+ * @file
+ */
 #ifndef MCA_PML_BASE_FRAGMENT_H
 #define MCA_PML_BASE_FRAGMENT_H
 
 #include "lfc/lam_list.h"
 #include "mca/ptl/ptl.h"
+#include "datatype/datatype.h"
 #include "mca/ptl/base/ptl_base_header.h"
 
-
 extern lam_class_t mca_ptl_base_frag_t_class;
 
+/**
+ * Base type for fragment descriptors. 
+ */
 struct mca_ptl_base_frag_t {
-    lam_list_item_t super;
-    mca_ptl_base_header_t frag_header;
+    lam_list_item_t super; /**< allow the fragment to be placed on a list */
+    mca_ptl_base_header_t frag_header; /**< header used for fragment matching */
     struct mca_ptl_t* frag_owner; /**< PTL that allocated this fragment */
     struct mca_ptl_base_peer_t* frag_peer; /**< PTL specific addressing info */
-    void  *frag_addr; /* pointer into request buffer at fragment offset */
-    size_t frag_size; /* number of bytes available in request buffer */
+    void  *frag_addr; /**< pointer into request buffer at fragment offset */
+    size_t frag_size; /**< number of bytes available in request buffer */
+    lam_convertor_t frag_convertor; /**< datatype convertor for fragment packing/unpacking */
 };
 typedef struct mca_ptl_base_frag_t mca_ptl_base_frag_t;
 
diff --git a/src/mca/ptl/base/ptl_base_header.h b/src/mca/ptl/base/ptl_base_header.h
index 7dac91e2eb..2ba8617a52 100644
--- a/src/mca/ptl/base/ptl_base_header.h
+++ b/src/mca/ptl/base/ptl_base_header.h
@@ -1,7 +1,9 @@
 /*
  * $HEADER$
  */
-
+/**
+ * @file
+ */
 #ifndef MCA_PTL_BASE_HEADER_H
 #define MCA_PTL_BASE_HEADER_H
 
@@ -17,80 +19,60 @@
 #define MCA_PTL_FLAGS_ACK_AGGREGATE   2
 
 
-/* Defines the common header attributes - must be first element in each header type */
+/**
+ * Common header attributes - must be first element in each header type 
+ */
 struct mca_ptl_base_common_header_t {
-    /* type of envelope */
-    uint8_t hdr_type;
-    /* flags indicating how fragment should be processed */
-    uint8_t hdr_flags;
-    /* size of header - allow for variable length */
-    uint16_t hdr_size;
+    uint8_t hdr_type;  /**< type of envelope */
+    uint8_t hdr_flags; /**< flags indicating how fragment should be processed */
+    uint16_t hdr_size; /**< size of header - allow for variable length */
 };
 typedef struct mca_ptl_base_common_header_t mca_ptl_base_common_header_t;
 
 
-/* 
- *  Common header definition for all fragments.
+/**
+ *  Basic header for all fragments.
  */
-
 struct mca_ptl_base_frag_header_t {
-    /* common header */
-    mca_ptl_base_common_header_t hdr_common;
-    /* fragment length */
-    uint32_t hdr_frag_length;
-    /* offset into message */
-    uint32_t hdr_frag_offset;
-    /* fragment sequence number */
-    mca_ptl_base_sequence_t hdr_frag_seq;
-    /* pointer to source fragment */
-    lam_ptr_t hdr_src_ptr;
-    /* pointer to matched receive */
-    lam_ptr_t hdr_dst_ptr;
+    mca_ptl_base_common_header_t hdr_common; /**< common attributes */
+    uint32_t hdr_frag_length;                /**< fragment length */
+    uint32_t hdr_frag_offset;                /**< offset into message */
+    mca_ptl_base_sequence_t hdr_frag_seq;    /**< fragment sequence number */
+    lam_ptr_t hdr_src_ptr;                   /**< pointer to source fragment */
+    lam_ptr_t hdr_dst_ptr;                   /**< pointer to matched receive */
 };
 typedef struct mca_ptl_base_frag_header_t mca_ptl_base_frag_header_t;
 
 
-/* 
+/**
  *  Header definition for the first fragment, contains the additional
  *  attributes required to match the corresponding posted receive.
  */
-
 struct mca_ptl_base_match_header_t {
-    /* fragment info */
-    mca_ptl_base_frag_header_t hdr_frag;
-    /* communicator index */
-    uint32_t hdr_contextid;
-    /* source rank */
-    int32_t hdr_src;
-    /* destination rank */
-    int32_t hdr_dst;
-    /* user tag */
-    int32_t hdr_tag;
-    /* message length */
-    uint32_t hdr_msg_length;
-    /* message sequence number */
-    mca_ptl_base_sequence_t hdr_msg_seq;
+    mca_ptl_base_frag_header_t hdr_frag; /**< fragment attributes */
+    uint32_t hdr_contextid;              /**< communicator index */
+    int32_t hdr_src;                     /**< source rank */
+    int32_t hdr_dst;                     /**< destination rank */
+    int32_t hdr_tag;                     /**< user tag */
+    uint32_t hdr_msg_length;             /**< message length */
+    mca_ptl_base_sequence_t hdr_msg_seq; /**< message sequence number */
 };
 typedef struct mca_ptl_base_match_header_t mca_ptl_base_match_header_t;
 
 
-/*
+/**
  *  Header used to acknowledgment outstanding fragment(s).
  */
-
 struct mca_ptl_base_ack_header_t {
-    /* common header */
-    mca_ptl_base_common_header_t hdr_common;
-    /* source fragment */
-    lam_ptr_t hdr_src_ptr;
-    /* matched receive request */
-    lam_ptr_t hdr_dst_ptr;
-    /* sequence range */
+    mca_ptl_base_common_header_t hdr_common; /**< common attributes */
+    lam_ptr_t hdr_src_ptr;                   /**< source fragment */
+    lam_ptr_t hdr_dst_ptr;                   /**< matched receive request */
+    /* sequence range? */
 };
 typedef struct mca_ptl_base_ack_header_t mca_ptl_base_ack_header_t;
 
 
-/*
+/**
  * Union of defined header types.
  */
 union mca_ptl_base_header_t {
diff --git a/src/mca/ptl/base/ptl_base_match.c b/src/mca/ptl/base/ptl_base_match.c
index c039ab84ae..47e5c44135 100644
--- a/src/mca/ptl/base/ptl_base_match.c
+++ b/src/mca/ptl/base/ptl_base_match.c
@@ -535,7 +535,7 @@ static void mca_ptl_base_check_cantmatch_for_match(lam_list_t *additional_matche
 {
     /* local parameters */
     int match_found;
-    mca_ptl_base_sequence_t next_msg_seq_expected, frag_seqber;
+    mca_ptl_base_sequence_t next_msg_seq_expected, frag_seq;
     mca_ptl_base_recv_frag_t *frag_desc;
     mca_ptl_base_recv_request_t *matched_receive;
 
@@ -567,8 +567,8 @@ static void mca_ptl_base_check_cantmatch_for_match(lam_list_t *additional_matche
             /*
              * If the message has the next expected seq from that proc...
              */
-            frag_seqber=frag_desc->super.frag_header.hdr_match.hdr_msg_seq;
-            if (frag_seqber == next_msg_seq_expected) {
+            frag_seq=frag_desc->super.frag_header.hdr_match.hdr_msg_seq;
+            if (frag_seq == next_msg_seq_expected) {
 
                 /* We're now expecting the next sequence number. */
                 (pml_comm->c_next_msg_seq[frag_src])++;
@@ -617,7 +617,7 @@ static void mca_ptl_base_check_cantmatch_for_match(lam_list_t *additional_matche
                  * and re-start search for next sequence number */
                 break;
 
-            } /* end if (frag_seqber == next_msg_seq_expected) */
+            } /* end if (frag_seq == next_msg_seq_expected) */
             
         } /* end for (frag_desc) loop */
         
diff --git a/src/mca/ptl/base/ptl_base_match.h b/src/mca/ptl/base/ptl_base_match.h
index 38d03b2e53..756bdb28c0 100644
--- a/src/mca/ptl/base/ptl_base_match.h
+++ b/src/mca/ptl/base/ptl_base_match.h
@@ -1,13 +1,23 @@
 /*
  * $HEADER$
  */
-
+/**
+ * @file
+ */
 #ifndef MCA_PTL_BASE_MATCH_H
 #define MCA_PTL_BASE_MATCH_H
 
 struct mca_ptl_base_recv_frag_t;
 
-
+/**
+ * Match incoming fragments against posted receives.
+ * 
+ * @param frag_header (IN)          Header of received fragment.
+ * @param frag_desc (IN)            Received fragment descriptor.
+ * @param match_made (OUT)          Flag indicating wether a match was made.
+ * @param additional_matches (OUT)  List of additional matches if a match was made.
+ * @return                          LAM_SUCCESS or error status on failure.
+ */
 int mca_ptl_base_match(mca_ptl_base_match_header_t *frag_header,
     struct mca_ptl_base_recv_frag_t *frag_desc, bool *match_made,
     lam_list_t *additional_matches);
diff --git a/src/mca/ptl/base/ptl_base_recvfrag.h b/src/mca/ptl/base/ptl_base_recvfrag.h
index a3ac539ba5..3849bf873c 100644
--- a/src/mca/ptl/base/ptl_base_recvfrag.h
+++ b/src/mca/ptl/base/ptl_base_recvfrag.h
@@ -1,8 +1,9 @@
 /*
  * $HEADER$
  */
-/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-
+/**
+ * @file
+ */
 #ifndef MCA_PTL_BASE_RECVFRAG_H
 #define MCA_PTL_BASE_RECVFRAG_H
 
@@ -13,43 +14,85 @@
 
 extern lam_class_t mca_ptl_base_recv_frag_t_class;
 
-
+/**
+ * Base type for receive fragment descriptors.
+ */
 struct mca_ptl_base_recv_frag_t {
-    mca_ptl_base_frag_t super;
-    mca_ptl_base_recv_request_t *frag_request; /* matched posted receive */
+    mca_ptl_base_frag_t super; /**< base fragment descriptor */
+    mca_ptl_base_recv_request_t *frag_request; /**< matched posted receive */
+    bool frag_is_buffered; /**< does fragment need to be unpacked into users buffer */
 };
 typedef struct mca_ptl_base_recv_frag_t mca_ptl_base_recv_frag_t;
 
 
-
+/**
+ * Initialize the receive fragment after a match has been made.
+ * 
+ * @param frag (IN)  Receive fragment descriptor.
+ *
+ * If a buffer has not already been allocated, determine the
+ * offset into the users buffer (if contigous data), or allocate
+ * a buffer for the non-contigous case. 
+ *
+ * TODO: may need to pass in an allocator....
+ */
 static inline void mca_ptl_base_recv_frag_init(mca_ptl_base_recv_frag_t* frag)
 {
     mca_ptl_base_recv_request_t* request = frag->frag_request;
-    mca_ptl_base_frag_header_t* header = &frag->super.frag_header.hdr_frag;
+    mca_ptl_base_match_header_t* header = &frag->super.frag_header.hdr_match;
 
-    /* determine the offset and size of posted buffer */
-    if (request->super.req_length < header->hdr_frag_offset) {
+    /* initialize status */
+    request->super.req_status.MPI_SOURCE = header->hdr_src;
+    request->super.req_status.MPI_TAG = header->hdr_tag;
+    request->super.req_status.MPI_ERROR = LAM_SUCCESS;
+    request->super.req_status._count = header->hdr_msg_length;
 
-        /* user buffer is to small - discard entire fragment */
-        frag->super.frag_addr = 0;
-        frag->super.frag_size = 0;
+    if(header->hdr_frag.hdr_frag_length > 0) {
 
-    } else if (request->super.req_length < header->hdr_frag_offset + header->hdr_frag_length) {
+        /* initialize receive convertor */
+        lam_proc_t *proc = 
+            lam_comm_peer_lookup(request->super.req_comm, request->super.req_peer);
+        lam_convertor_copy(proc->proc_convertor, &frag->super.frag_convertor);
+        lam_convertor_init_for_recv(
+            &frag->super.frag_convertor,  /* convertor */
+            0,                            /* flags */
+            request->super.req_datatype,  /* datatype */
+            request->super.req_count,     /* count elements */
+            request->super.req_addr,      /* users buffer */
+            header->hdr_frag.hdr_frag_offset);  /* offset in bytes into packed buffer */
 
-        /* user buffer is to small - discard part of fragment */
-        frag->super.frag_addr = ((unsigned char*)request->super.req_addr + header->hdr_frag_offset);
-        frag->super.frag_size = request->super.req_length - header->hdr_frag_offset;
-
-    } else {
-
-        /* user buffer is large enough for this fragment */
-        frag->super.frag_addr = ((unsigned char*)request->super.req_addr + header->hdr_frag_offset);
-        frag->super.frag_size = header->hdr_frag_length;
+        /* if buffer has not already been allocated for eager
+         * send - go ahead and figure out offset into users 
+         * buffer (for contigous data) - or allocate a buffer
+         * for the receive if required.
+        */
+        if(NULL == frag->super.frag_addr) {
+            struct iovec iov;
+            iov.iov_base = NULL;
+            iov.iov_len = header->hdr_frag.hdr_frag_length;
+            lam_convertor_unpack(&frag->super.frag_convertor, &iov, 1);
 
+            /* non-contiguous - allocate buffer for receive */
+            if(NULL == iov.iov_base) {
+                frag->super.frag_addr = malloc(iov.iov_len);
+                frag->frag_is_buffered = true;
+            /* we now have correct offset into users buffer */
+            } else {
+                frag->super.frag_addr = iov.iov_base;
+                frag->frag_is_buffered = false;
+            }
+            frag->super.frag_size = header->hdr_frag.hdr_frag_length;
+        }
     }
 }
                                                                                                                    
-
+/**
+ * Called by the PTL to match attempt a match for new fragments.
+ * 
+ * @param frag (IN)     Receive fragment descriptor.
+ * @param header (IN)   Header corresponding to the receive fragment.
+ * @return              LAM_SUCCESS or error status on failure.
+ */
 static inline int mca_ptl_base_recv_frag_match(
     mca_ptl_base_recv_frag_t* frag, 
     mca_ptl_base_match_header_t* header)
@@ -70,7 +113,7 @@ static inline int mca_ptl_base_recv_frag_match(
             mca_ptl_base_recv_frag_init(frag);
             
             /* notify ptl of match */
-            ptl->ptl_recv(ptl, frag, &frag->frag_request->super.req_status);
+            ptl->ptl_recv(ptl, frag);
 
             /* process any additional fragments that arrived out of order */
             frag = (mca_ptl_base_recv_frag_t*)lam_list_remove_first(&matched_frags);
diff --git a/src/mca/ptl/base/ptl_base_recvreq.c b/src/mca/ptl/base/ptl_base_recvreq.c
index 0e1824cc9f..f8fa8c1239 100644
--- a/src/mca/ptl/base/ptl_base_recvreq.c
+++ b/src/mca/ptl/base/ptl_base_recvreq.c
@@ -51,7 +51,7 @@ void mca_ptl_base_recv_request_match_specific(mca_ptl_base_recv_request_t* reque
         mca_ptl_t* ptl = frag->super.frag_owner;
         THREAD_UNLOCK(pml_comm->c_matching_lock+req_peer);
         mca_ptl_base_recv_frag_init(frag);
-        ptl->ptl_recv(ptl, frag, &request->super.req_status);
+        ptl->ptl_recv(ptl, frag);
         return; /* match found */
     }
 
@@ -95,8 +95,8 @@ void mca_ptl_base_recv_request_match_wild(mca_ptl_base_recv_request_t* request)
         if ((frag = mca_ptl_base_recv_request_match_specific_proc(request, proc)) != NULL) {
             mca_ptl_t* ptl = frag->super.frag_owner;
             THREAD_UNLOCK(pml_comm->c_matching_lock+proc);
-            mca_ptl_base_recv_frag_init(frag); 
-            ptl->ptl_recv(ptl, frag, &request->super.req_status);
+            mca_ptl_base_recv_frag_init(frag);
+            ptl->ptl_recv(ptl, frag);
             return; /* match found */
         }
         THREAD_UNLOCK(pml_comm->c_matching_lock+proc);
diff --git a/src/mca/ptl/base/ptl_base_recvreq.h b/src/mca/ptl/base/ptl_base_recvreq.h
index 9a2904c074..874c90b570 100644
--- a/src/mca/ptl/base/ptl_base_recvreq.h
+++ b/src/mca/ptl/base/ptl_base_recvreq.h
@@ -1,7 +1,9 @@
 /*
  * $HEADER$
  */
-
+/**
+ * @file
+ */
 #ifndef MCA_PML_BASE_RECV_REQUEST_H
 #define MCA_PML_BASE_RECV_REQUEST_H
 
@@ -11,25 +13,33 @@
 extern lam_class_t mca_ptl_base_recv_request_t_class;
 struct mca_ptl_base_recv_frag_t;
 
-
+/**
+ * Base type for receive requests.
+ */
 struct mca_ptl_base_recv_request_t {
-   mca_pml_base_request_t super;
-   /* request sequence number */
-   mca_ptl_base_sequence_t req_sequence;
-   /* number of bytes delivered */
-   size_t req_bytes_recvd;
+   mca_pml_base_request_t super;          /**< base request */
+   mca_ptl_base_sequence_t req_sequence;  /**< request sequence number */
+   size_t req_bytes_recvd;                /**< number of bytes delivered to user */
 };
 typedef struct mca_ptl_base_recv_request_t mca_ptl_base_recv_request_t;
 
 
-void mca_ptl_base_recv_request_match_wild(mca_ptl_base_recv_request_t*);
-void mca_ptl_base_recv_request_match_specific(mca_ptl_base_recv_request_t*);
-
-
+/**
+ * Initialize a receive request with call parameters.
+ *
+ * @param request (IN)       Receive request.
+ * @param addr (IN)          User buffer.
+ * @param count (IN)         Number of elements of indicated datatype.
+ * @param datatype (IN)      User defined datatype.
+ * @param src (IN)           Source rank w/in the communicator.
+ * @param tag (IN)           User defined tag.
+ * @param comm (IN)          Communicator.
+ * @param persistent (IN)    Is this a ersistent request.
+ */
 static inline void mca_ptl_base_recv_request_init(
     mca_ptl_base_recv_request_t *request,
     void *addr,
-    size_t length,
+    size_t count,
     lam_datatype_t* datatype,
     int src,
     int tag,
@@ -39,11 +49,12 @@ static inline void mca_ptl_base_recv_request_init(
     request->req_sequence = 0;
     request->req_bytes_recvd = 0;
     request->super.req_addr = addr;
-    request->super.req_length = length;
+    request->super.req_count = count;
     request->super.req_datatype = datatype;
     request->super.req_peer = src;
     request->super.req_tag = tag;
     request->super.req_comm = comm;
+    request->super.req_proc = NULL;
     request->super.req_type = MCA_PML_REQUEST_RECV;
     request->super.req_persistent = persistent;
     request->super.req_mpi_done = false;
@@ -52,5 +63,21 @@ static inline void mca_ptl_base_recv_request_init(
     request->super.super.req_type = LAM_REQUEST_PML;
 }
 
+/**
+ * Attempt to match the request against the unexpected fragment list
+ * for all source ranks w/in the communicator.
+ *
+ * @param request (IN)   Request to match.
+ */
+void mca_ptl_base_recv_request_match_wild(mca_ptl_base_recv_request_t* request);
+
+/**
+ * Attempt to match the request against the unexpected fragment list 
+ * for a specific source rank.
+ *
+ * @param request (IN)   Request to match.
+ */
+void mca_ptl_base_recv_request_match_specific(mca_ptl_base_recv_request_t* request);
+
 #endif
 
diff --git a/src/mca/ptl/base/ptl_base_sendfrag.h b/src/mca/ptl/base/ptl_base_sendfrag.h
index d5454ff0c5..ef9305d62a 100644
--- a/src/mca/ptl/base/ptl_base_sendfrag.h
+++ b/src/mca/ptl/base/ptl_base_sendfrag.h
@@ -1,6 +1,9 @@
 /* 
  * $HEADER$
  */
+/**
+ * @file
+ */
 #ifndef MCA_PTL_BASE_SEND_FRAG_H
 #define MCA_PTL_BASE_SEND_FRAG_H
 
@@ -9,10 +12,12 @@
 
 extern lam_class_t mca_ptl_base_send_frag_t_class;
 
-
+/**
+ * Base type for send fragment descriptors 
+ */
 struct mca_ptl_base_send_frag_t {
-    mca_ptl_base_frag_t super;
-    struct mca_ptl_base_send_request_t *frag_request;
+    mca_ptl_base_frag_t super;  /**< base fragment descriptor */
+    struct mca_ptl_base_send_request_t *frag_request;  /**< pointer to send request */
 };
 typedef struct mca_ptl_base_send_frag_t mca_ptl_base_send_frag_t;
 
diff --git a/src/mca/ptl/base/ptl_base_sendreq.c b/src/mca/ptl/base/ptl_base_sendreq.c
index 557282d8be..c38553d112 100644
--- a/src/mca/ptl/base/ptl_base_sendreq.c
+++ b/src/mca/ptl/base/ptl_base_sendreq.c
@@ -1,6 +1,7 @@
 /*
  * $HEADER$
  */
+#include <string.h>
 #include "mca/ptl/base/ptl_base_sendreq.h"
 #include "mca/ptl/base/ptl_base_sendfrag.h"
 
@@ -18,11 +19,9 @@ lam_class_t mca_ptl_base_send_request_t_class = {
 
 static void mca_ptl_base_send_request_construct(mca_ptl_base_send_request_t* req)
 {
-    OBJ_CONSTRUCT(&req->req_unacked_frags, lam_list_t);
 }
 
 static void mca_ptl_base_send_request_destruct(mca_ptl_base_send_request_t* req)
 {
-    OBJ_DESTRUCT(&req->req_unacked_frags);
 }
 
diff --git a/src/mca/ptl/base/ptl_base_sendreq.h b/src/mca/ptl/base/ptl_base_sendreq.h
index c9ac31b0a3..5aa61bc3ec 100644
--- a/src/mca/ptl/base/ptl_base_sendreq.h
+++ b/src/mca/ptl/base/ptl_base_sendreq.h
@@ -1,12 +1,14 @@
 /*
  * $HEADER$
  */
-
+/**
+ * @file
+ */
 #ifndef MCA_PML_BASE_SEND_REQUEST_H
 #define MCA_PML_BASE_SEND_REQUEST_H
 
 #include "lam_config.h"
-
+#include "datatype/datatype.h"
 #include "mca/ptl/ptl.h"
 #include "mca/pml/base/pml_base_request.h"
 #include "mca/ptl/base/ptl_base_comm.h"
@@ -16,72 +18,100 @@ extern lam_class_t mca_ptl_base_send_request_t_class;
 struct mca_ptl_base_send_frag_t;
 
 
+/**
+ * Base type for send requests 
+ */
 struct mca_ptl_base_send_request_t {
-    /* request object - common data structure for use by wait/test */
-    mca_pml_base_request_t super;
-    /* number of bytes that have already been assigned to a fragment */
-    size_t req_offset;
-    /* number of fragments that have been allocated */
-    size_t req_frags;
-    /* number of bytes that have been sent */
-    size_t req_bytes_sent;
-    /* number of bytes that have been acked */
-    size_t req_bytes_acked;
-    /* type of send */
-    mca_pml_base_send_mode_t req_send_mode;
-    /* sequence number for MPI pt-2-pt ordering */
-    mca_ptl_base_sequence_t req_msg_seq;
-    /* queue of fragments that are waiting to be acknowledged */
-    mca_ptl_base_queue_t req_unacked_frags;
-    /* PTL that allocated this descriptor */
-    struct mca_ptl_t* req_owner;
-    /* PTL peer instance that will be used for first fragment */
-    struct mca_ptl_base_peer_t* req_peer;
-    /* peer matched receive */
-    lam_ptr_t req_peer_request;
+    mca_pml_base_request_t super; /** base request type - common data structure for use by wait/test */
+    size_t req_offset; /**< number of bytes that have already been assigned to a fragment */
+    size_t req_frags; /**< number of fragments that have been allocated */
+    size_t req_bytes_sent; /**< number of bytes that have been sent */
+    mca_pml_base_send_mode_t req_send_mode; /**< type of send */
+    mca_ptl_base_sequence_t req_msg_seq; /**< sequence number for MPI pt-2-pt ordering */
+    struct mca_ptl_t* req_owner; /**< PTL that allocated this descriptor */
+    struct mca_ptl_base_peer_t* req_peer; /**< PTL peer instance that will be used for first fragment */
+    lam_ptr_t req_peer_request; /**< matched receive at peer */
+    lam_convertor_t req_convertor; /**< convertor that describes this datatype */
+    size_t req_packed_size; /**< packed size of a message given the datatype and count */
 };
 typedef struct mca_ptl_base_send_request_t mca_ptl_base_send_request_t;
 
 
-static inline bool mca_ptl_base_send_request_matched(
-    mca_ptl_base_send_request_t* request)
-{
-    return (NULL != request->req_peer_request.pval);
-}
-
 
+/**
+ * Initialize a send request with call parameters.
+ *
+ * @param request (IN)     Send request
+ * @param addr (IN)        User buffer
+ * @param count (IN)       Number of elements of indicated datatype.
+ * @param datatype (IN)    User defined datatype
+ * @param peer (IN)        Destination rank
+ * @param tag (IN)         User defined tag
+ * @param comm (IN)        Communicator
+ * @param mode (IN)        Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY)
+ * @param persistent (IN)  Is request persistent.
+ */
 static inline void mca_ptl_base_send_request_init(
     mca_ptl_base_send_request_t *request, 
     void *addr, 
-    size_t length, 
+    size_t count, 
     lam_datatype_t* datatype, 
     int peer, 
     int tag, 
     lam_communicator_t* comm, 
-    mca_pml_base_send_mode_t sendmode,
+    mca_pml_base_send_mode_t mode,
     bool persistent) 
 { 
     request->req_offset = 0; 
     request->req_frags = 0; 
     request->req_bytes_sent = 0; 
-    request->req_bytes_acked = 0; 
-    request->req_send_mode = sendmode;
+    request->req_send_mode = mode;
     request->req_peer_request.lval = 0; 
     request->req_msg_seq = mca_pml_ptl_comm_send_sequence(comm->c_pml_comm, peer);
     request->super.req_addr = addr; 
-    request->super.req_length = length; 
+    request->super.req_count = count; 
     request->super.req_datatype = datatype; 
     request->super.req_peer = peer; 
     request->super.req_tag = tag; 
     request->super.req_comm = comm; 
+    request->super.req_proc = lam_comm_peer_lookup(comm,peer);
     request->super.req_type = MCA_PML_REQUEST_SEND; 
     request->super.req_persistent = persistent; 
     request->super.req_mpi_done = false; 
     request->super.req_pml_done = false; 
     request->super.req_free_called = false; 
     request->super.super.req_type = LAM_REQUEST_PML; 
+
+    /* initialize datatype convertor for this request */
+    if(count > 0) {
+        int packed_size;
+        lam_convertor_copy(request->super.req_proc->proc_convertor, &request->req_convertor);
+        lam_convertor_init_for_send(
+            &request->req_convertor, 
+            0,
+            request->super.req_datatype,
+            request->super.req_count,
+            request->super.req_addr,
+            0);
+        lam_convertor_get_packed_size(&request->req_convertor, &packed_size);
+        request->req_packed_size = packed_size;
+     } else {
+         request->req_packed_size = 0;
+     }
 }
 
 
+/**
+ * Test to check if an acknowledgment has been received, with the match.
+ *
+ * @param  request (IN)  Send request.
+ * return                TRUE if an ack/match has been received from peer.
+ */
+static inline bool mca_ptl_base_send_request_matched(
+    mca_ptl_base_send_request_t* request)
+{
+    return (NULL != request->req_peer_request.pval);
+}
+
 #endif
 
diff --git a/src/mca/ptl/ptl.h b/src/mca/ptl/ptl.h
index 975e099657..bed9d93dac 100644
--- a/src/mca/ptl/ptl.h
+++ b/src/mca/ptl/ptl.h
@@ -1,9 +1,127 @@
 /*
  * $HEADER$
  */
-/*
+/**
+ * @file
+ *
  * P2P Transport Layer (PTL)
+ *
+ * An MCA component type that allows the PML (mca_pml_t) to support a variety of 
+ * network transports concurrently. The PTL layer is responsible for the reliable 
+ * delivery of message fragments, while the assignment and scheduling of fragments 
+ * to PTLs is handled by the upper layer.
+ *
+ * PTL Initialization:
+ *
+ * During library initialization, all available PTL modules are loaded and opened 
+ * via their mca_base_open_module_fn_t function. If possible, the module should
+ * load and open regardless of wether the transport is available. This allows
+ * parameters used by the module to be reported on by tools such as XXXinfo.
+ * 
+ * The mca_ptl_base_module_init_fn_t() is then called for each of the modules that
+ * are succesfully opened. The module init function may return either:
+ *
+ * (1) a NULL list of PTL instances if the transport is not available,
+ * (2) a list containing a single PTL instance, where the PTL provides
+ *     a layer of abstraction over multiple physical devices (e.g. NICs),
+ * (3) a list containing multiple PTL instances where each PTL instance
+ *     corresponds to a single physical device.
+ * 
+ * If multiple network devices are available for a given transport, the preferred
+ * approach is (3) above. In this case, the PML layer will handle scheduling 
+ * across the available resources, and fail-over in the event of a PTL failure.
+ * If the second approach is used, and a single PTL instance abstracts multiple 
+ * physical devices, the PTL assumes all responsibility for scheduling/failover
+ * within those devices.
+ *
+ * During module initialization, the module should post any addressing information
+ * required by its peers. An example would be the TCP listen port opened by the
+ * TCP module for incoming connection requests. This information is published 
+ * to peers via the mca_base_modex_send() interface. Note that peer information 
+ * will not be available via mca_base_modex_recv() during the modules init
+ * function. However, it is guaranteed to be available during PTL selection.
+ *
+ * PTL Selection:
+ *
+ * The PML maintains a list of available PTL instances, sorted by their exclusivity 
+ * ranking. This is a relative ranking that is used to select the set of PTLs that 
+ * may be used to reach a given destination.  The PTL modules are queried via their 
+ * mca_ptl_base_add_proc_fn_t() to determine if they are able to reach a given destination. 
+ * The first PTL module that returns success is selected. Subsequent PTL modules are 
+ * queried only if they are at the same exclusivity ranking.
+ * 
+ * An example of how this might be used:
+ *
+ * PTL         Exclusivity   Comments
+ * --------    -----------   ------------------
+ * LO              100       Selected exclusively for local process
+ * SM               50       Selected exclusively for other processes on host
+ * IB                0       Selected based on network reachability
+ * IB                0       Selected based on network reachability
+ * TCP               0       Selected based on network reachability
+ * TCP               0       Selected based on network reachability
+ *
+ * When a PTL module is selected, it may choose to optionally return a pointer to an
+ * an mca_ptl_base_peer_t data structure to the PML. This pointer is cached by the PML
+ * and returned to the PTL on subsequent data transfer calls to this specific process.
+ * The actual contents of the data structure are defined on a per PTL basis, and are
+ * typically used to cache addressing or connection information, such as the TCP socket
+ * used by the TCP PTL. 
+ *
+ * Send Path:
+ *
+ * When multiple PTLs are available to reach a given destination, a single request 
+ * (that is large enough) will be split across the available PTLs. The PML scheduler 
+ * will determine the PTL to use for the first fragment based on the relative latency
+ * ranking exported by the PTLs. 
+ *
+ * To minimize latency, and allow for derived types of the mca_pml_base_send_request_t, 
+ * the PML will call the selected PTLs ptl_request_alloc() method to allocate the send 
+ * request descriptor. The PTL should return a derived type of mca_pml_base_send_request_t, 
+ * that contains space for both the base send request and initial fragment descriptor
+ * (mca_ptl_base_send_frag_t or derived type).  This approach allows all of the descriptors
+ * required to initiate the send to be allocated from a free list in a single operation.
+ *
+ * When the request is started, the PML will call the selected PTL's ptl_send() method
+ * with up to ptl_first_frag_size bytes of the request. The PTL should attempt to deliver
+ * up to the requested number of bytes. The number of bytes actually fragmented and queued
+ * for delivery must be updated on the send request to reflect the current offset into the
+ * send buffer.
+ *
+ * If the request is larger than ptl_first_frag_size, the remainder of the request
+ * will be scheduled across potentially multiple PTLs, upon an acknowledgment from 
+ * the peer that the request has been matched on the receive side. The PTL must defer
+ * calling ptl_send_progress() on the initial fragment until an acknowledment is received,
+ * as this signals to the PML that the remaining fragments may be scheduled. For further
+ * detail on the scheduling algorithm, refer to the PML (mca_pml_t) documentation.
+ *
+ * As subsequent fragments are completed by the PTLs, the ptl_send_progress() method should 
+ * be called to update the status of the send request. Note that each PTL is responsible
+ * for managing the resources associated with send fragments and their allocation from and
+ * return to internal caches/free lists.
+ * 
+ * Recv Path:
+ *
+ * The first fragment of a message is sent with a header type of mca_ptl_base_match_header_t.
+ * When a header of this type is received, to minimize latency the PTL should call the 
+ * ptl_match() method as soon as entire header is available, potentially prior to receiving
+ * any data associated with the first fragment. If a match is made, the PML will call
+ * the ptl_recv() method of the fragments PTL. 
+ *
+ * The ptl_recv() method should generate, if required, an ack to the source process. An
+ * ack is required if the MCA_PTL_FLAGS_ACK_MATCHED bit is set by the source in the initial 
+ * message header.  The ack should contain a pointer to the matched request, along 
+ * with the pointer to the orignal send fragment contained in the initial message header. 
+ *
+ * On receipt of the ack, the source will schedule any remaining fragments. The selected PTLs 
+ * should generate the remaining fragments with an mca_ptl_base_frag_header_t, which contains
+ * a placeholder for a pointer to the matched receive request. This allows the receiver to
+ * avoid calling the matching logic for subsequent fragments. As fragments are completed,
+ * each PTL calls their ptl_recv_progress() method to update the PML with the request
+ * status.
+ *
  */
+
 #ifndef MCA_PTL_H
 #define MCA_PTL_H
 
@@ -13,7 +131,6 @@
 #include "proc/proc.h"
 #include "mca/pml/pml.h"
 
-
 /*
  * PTL types
  */
@@ -36,9 +153,10 @@ typedef lam_list_t mca_ptl_base_queue_t;
  */
 
 /**
- *  MCA->PTL Intializes the PTL module and creates specific PTL instance(s).
+ * MCA->PTL Intializes the PTL module and creates specific PTL instance(s).
  *
- *  @param num_ptls (OUT) Returns the number of ptl instances created.
+ * @param num_ptls (OUT) Returns the number of ptl instances created, or 0
+ *                       if the transport is not available.
  *
  * @param allow_multi_user_threads (OUT) Whether this module can run
  * at MPI_THREAD_MULTIPLE or not.
@@ -46,7 +164,14 @@ typedef lam_list_t mca_ptl_base_queue_t;
  * @param have_hidden_threads (OUT) Whether this module may use
  * hidden threads (e.g., progress threads) or not.
  *
- *  @return Array of pointers to PTL instances.
+ * @return Array of pointers to PTL instances, or NULL if the transport  
+ *         is not available.
+ *
+ * During module initialization, the PTL module should discover the physical
+ * devices that are available for the given transport, and a PTL instance 
+ * created to represent each available device. Any addressing information
+ * required by peers to reach the available devices should be published during
+ * the module init via the mca_base_modex_send() interface.
  */
 typedef struct mca_ptl_t** (*mca_ptl_base_module_init_fn_t)(
     int *num_ptls, 
@@ -56,53 +181,71 @@ typedef struct mca_ptl_t** (*mca_ptl_base_module_init_fn_t)(
 
                                                                                                             
 /**
- *  PML->PTL Progresses outstanding requests in each PTL module.
- *
- *  @param timstamp (IN)     The current time - used for retransmisstion timers.
- */
-typedef void (*mca_ptl_base_module_progress_fn_t)(
-    mca_ptl_base_tstamp_t timestamp
-);
-
-/**
- *  PTL module version and interface functions.
+ *  PTL module descriptor. Contains module version information
+ *  and module open/close/init functions.
  */
 
 struct mca_ptl_base_module_1_0_0_t {
   mca_base_module_t ptlm_version;
   mca_base_module_data_1_0_0_t ptlm_data;
   mca_ptl_base_module_init_fn_t ptlm_init;
-  mca_ptl_base_module_progress_fn_t ptlm_progress;
 };
 typedef struct mca_ptl_base_module_1_0_0_t mca_ptl_base_module_1_0_0_t;
 typedef struct mca_ptl_base_module_1_0_0_t mca_ptl_base_module_t;
 
 
 /*
- *  PTL instance interface functions and datatype.
+ * PTL instance interface functions and datatype.
  */
 
+
 /**
- *  PML->PTL notification of change in the process list.
+ * MCA->PTL Clean up any resources held by PTL instance before the 
+ * module is unloaded.
+ *  
+ * @param ptl (IN)   PTL instance.
  *
- *  @param ptl (IN)     
- *  @param procs (IN)   
- *  @param nprocs (IN)  
- *  @return            
+ * Prior to unloading a PTL module, the MCA framework will call the PTL
+ * finalize method for each PTL instance.
+ * 
+ */
+typedef int (*mca_ptl_base_finalize_fn_t)(
+    struct mca_ptl_t* ptl
+);
+                                                                                                         
+/**
+ * PML->PTL notification of change in the process list. 
+ *
+ * @param ptl (IN)     PTL instance
+ * @param proc (IN)    Peer process
+ * @param peer (OUT)   Peer addressing information.
+ * @return             Status indicates wether PTL is reachable.
+ *
+ * The mca_ptl_base_add_proc_fn_t() is called by the PML to determine
+ * the set of PTLs that should be used to reach the specified process.
+ * A return value of LAM_SUCCESS indicates the PTL should be added to the
+ * set used to reach the proc. The peers addressing information may be 
+ * obtained by the PTL via the mca_base_modex_recv() function if required. 
+ * The PTL may optionally return a pointer to a mca_ptl_base_peer_t data 
+ * structure, to cache peer addressing or connection information.
  */
 typedef int (*mca_ptl_base_add_proc_fn_t)(
     struct mca_ptl_t* ptl, 
     struct lam_proc_t* proc, 
-    struct mca_ptl_base_peer_t**
+    struct mca_ptl_base_peer_t** peer
 );
 
 /**
- *  PML->PTL notification of change in the process list.
+ * PML->PTL notification of change in the process list.
  *
- *  @param ptl (IN)     
- *  @param procs (IN)   
- *  @param nprocs (IN)  
- *  @return            
+ * @param ptl (IN)     PTL instance
+ * @param proc (IN)    Peer process
+ * @param peer (IN)    Peer addressing information.
+ * @return             Status indicating if cleanup was successful
+ *
+ * If the process list shrinks, the PML will notify the PTL of the
+ * change. Peer addressing information cached by the PML is provided
+ * for cleanup by the PTL.
  */
 typedef int (*mca_ptl_base_del_proc_fn_t)(
     struct mca_ptl_t* ptl, 
@@ -111,29 +254,59 @@ typedef int (*mca_ptl_base_del_proc_fn_t)(
 );
 
 /**
- *  MCA->PTL Clean up any resources held by PTL instance before the module is unloaded.
- *  
- *  @param ptl (IN)   The PTL module instance that is being unloaded.
+ * PML->PTL Allocate a send request from the PTL modules free list.
+ *
+ * @param ptl (IN)       PTL instance
+ * @param request (OUT)  Pointer to allocated request.
+ * @return               Status indicating if allocation was successful.
+ *
+ * To reduce latency (number of required allocations), a derived
+ * type of mca_ptl_base_send_request_t is obtained from the PTL that
+ * is selected to send the first fragment. The derived type should contain
+ * space for the base request structure, the PTL first fragment,
+ * and any other required buffer space.
  */
-typedef int (*mca_ptl_base_finalize_fn_t)(
-    struct mca_ptl_t* ptl
-);
-                                                                                                         
 typedef int (*mca_ptl_base_request_alloc_fn_t)(
     struct mca_ptl_t* ptl, 
     struct mca_ptl_base_send_request_t** request
 );
 
+/**
+ * PML->PTL Return a send request to the PTL modules free list.
+ *
+ * @param ptl (IN)       PTL instance
+ * @param request (IN)   Pointer to allocated request.
+ *
+ * Called when the request has been completed at both the MPI
+ * and PML layers.
+ */
 typedef void (*mca_ptl_base_request_return_fn_t)(
     struct mca_ptl_t* ptl, 
     struct mca_ptl_base_send_request_t* request
 );
 
-typedef void (*mca_ptl_base_frag_return_fn_t)(
-    struct mca_ptl_t* ptl, 
-    struct mca_ptl_base_recv_frag_t* frag
-);
-
+/**
+ * PML->PTL Initiate a send of the specified size.
+ *
+ * @param ptl (IN)               PTL instance
+ * @param ptl_base_peer (IN)     PTL peer addressing
+ * @param send_request (IN/OUT)  Send request (allocated by PML via mca_ptl_base_request_alloc_fn_t)
+ * @param size (IN)              Number of bytes PML is requesting PTL to deliver
+ * @param flags (IN)             Flags that should be passed to the peer via the message header.
+ * @param request (OUT)          LAM_SUCCESS if the PTL was able to queue one or more fragments
+ *
+ * When multiple PTLs are available, a single request (that is large enough)
+ * will be split across the available PTLs. The PML scheduler will determine
+ * the percentage given to a PTL based on the bandwidth provided by the transport
+ * and its current resource usage. The size parameter to the send function indicates 
+ * the number of bytes the PML is requesting the PTL to send. The PTL may choose
+ * to send 0->size bytes based on available resources. 
+ *
+ * The current offset into the users buffer is passed into the send function
+ * via the req_offset member of the send request parameter. The send function
+ * must update req_offset with the actual number of bytes the PTL is able to
+ * fragment for delivery.
+ */
 typedef int (*mca_ptl_base_send_fn_t)(
     struct mca_ptl_t* ptl, 
     struct mca_ptl_base_peer_t* ptl_base_peer, 
@@ -142,36 +315,77 @@ typedef int (*mca_ptl_base_send_fn_t)(
     int flags
 );
 
+/**
+ * PML->PTL Notification that a receive fragment has been matched.
+ *
+ * @param ptl (IN)          PTL instance
+ * @param recv_frag (IN)    Receive fragment
+ *
+ * A fragment may be matched either when a new receive is posted,
+ * or on receipt of a fragment from the network. In either case,
+ * the PML will downcall into the PTL to provide a notification 
+ * that the match was made.
+ */
 typedef void (*mca_ptl_base_recv_fn_t)(
     struct mca_ptl_t* ptl, 
-    struct mca_ptl_base_recv_frag_t* recv_frag,
-    struct lam_status_public_t* recv_status
+    struct mca_ptl_base_recv_frag_t* recv_frag
 );
 
+/**
+ * PTL->PML Notification from the PTL to the PML that a new fragment
+ * has arrived and can be matched against posted receives.
+ *
+ * @param ptl (IN)       PTL instance
+ * @param recv_frag      Receive fragment
+ * @param header (IN)    Message header
+ *
+ * A fragment may be matched either when a new receive is posted,
+ * or on receipt of a fragment from the network. In either case,
+ * the PML will downcall into the PTL to provide a notification 
+ * that the match was made.
+ *
+ * The message header used for matching is not required to be
+ * contained within the receive fragment. However, if the match is 
+ * not made, the matching code will copy the supplied header into the 
+ * recv fragment so that the match can be made when the receive is posted.
+ */
 typedef int (*mca_ptl_base_match_fn_t)(
-    struct mca_ptl_base_recv_frag_t* frag,
+    struct mca_ptl_base_recv_frag_t* recv_frag,
     struct mca_ptl_base_match_header_t* header
 );
 
+/**
+ * PTL->PML Notification from the PTL to the PML that a fragment
+ * has completed (e.g. been successfully delivered into users buffer)
+ *
+ * @param recv_request (IN)   Receive Request
+ * @param recv_frag (IN)      Receive Fragment
+ */
 typedef void (*mca_ptl_base_recv_progress_fn_t)(
     struct mca_ptl_base_recv_request_t* recv_request,
     struct mca_ptl_base_recv_frag_t* recv_frag
 );
 
+/**
+ * PTL->PML Notification from the PTL to the PML that a fragment
+ * has completed (e.g. been successfully delivered to peer)
+ *
+ * @param send_request (IN)   Send Request
+ * @param send_frag (IN)      Send Fragment
+ */
 typedef void (*mca_ptl_base_send_progress_fn_t)(
     struct mca_ptl_base_send_request_t* send_request,
     struct mca_ptl_base_send_frag_t* send_frag
 );
 
 /**
- * PTL instance interface functions and common state.
+ * PTL instance interface functions and attributes.
  */
-
 struct mca_ptl_t {
 
     /* PTL common attributes */
-    mca_ptl_base_module_t* ptl_module;
-    size_t      ptl_first_frag_size;   /**< maximum size of first fragment */
+    mca_ptl_base_module_t* ptl_module; /**< pointer back to the PTL module structure */
+    size_t      ptl_first_frag_size;   /**< maximum size of first fragment -- eager send */
     size_t      ptl_min_frag_size;     /**< threshold below which the PTL will not fragment */
     size_t      ptl_max_frag_size;     /**< maximum fragment size supported by the PTL */
     uint32_t    ptl_exclusivity;       /**< indicates this PTL should be used exclusively */
@@ -186,7 +400,6 @@ struct mca_ptl_t {
     mca_ptl_base_recv_fn_t             ptl_recv;
     mca_ptl_base_request_alloc_fn_t    ptl_request_alloc;
     mca_ptl_base_request_return_fn_t   ptl_request_return;
-    mca_ptl_base_frag_return_fn_t      ptl_frag_return;
 
     /* PTL->PML function table - filled in by PML at init */
     mca_ptl_base_match_fn_t            ptl_match;
diff --git a/src/mca/ptl/tcp/src/ptl_tcp.c b/src/mca/ptl/tcp/src/ptl_tcp.c
index 4a0bd845b3..df3fc8f344 100644
--- a/src/mca/ptl/tcp/src/ptl_tcp.c
+++ b/src/mca/ptl/tcp/src/ptl_tcp.c
@@ -2,6 +2,7 @@
  * $HEADER$
  */
 
+#include <string.h>
 #include "util/output.h"
 #include "util/if.h"
 #include "mca/pml/pml.h"
@@ -35,28 +36,11 @@ mca_ptl_tcp_t mca_ptl_tcp = {
     mca_ptl_tcp_send,
     mca_ptl_tcp_recv,
     mca_ptl_tcp_request_alloc,
-    mca_ptl_tcp_request_return,
-    (mca_ptl_base_frag_return_fn_t)mca_ptl_tcp_recv_frag_return
+    mca_ptl_tcp_request_return
     }
 };
 
 
-int mca_ptl_tcp_create(int if_index)
-{
-    mca_ptl_tcp_t* ptl = malloc(sizeof(mca_ptl_tcp_t));
-    if(NULL == ptl)
-        return LAM_ERR_OUT_OF_RESOURCE;
-    memcpy(ptl, &mca_ptl_tcp, sizeof(mca_ptl_tcp));
-    mca_ptl_tcp_module.tcp_ptls[mca_ptl_tcp_module.tcp_num_ptls++] = ptl;
-
-    /* initialize the ptl */
-    ptl->ptl_ifindex = if_index;
-    lam_ifindextoaddr(if_index, (struct sockaddr*)&ptl->ptl_ifaddr, sizeof(ptl->ptl_ifaddr));
-    lam_ifindextomask(if_index, (struct sockaddr*)&ptl->ptl_ifmask, sizeof(ptl->ptl_ifmask));
-    return LAM_SUCCESS;
-}
-
-
 int mca_ptl_tcp_add_proc(struct mca_ptl_t* ptl, struct lam_proc_t *lam_proc, struct mca_ptl_base_peer_t** peer_ret)
 {
     mca_ptl_tcp_proc_t* ptl_proc = mca_ptl_tcp_proc_create(lam_proc);
@@ -130,8 +114,7 @@ void mca_ptl_tcp_request_return(struct mca_ptl_t* ptl, struct mca_ptl_base_send_
 
 void mca_ptl_tcp_recv_frag_return(struct mca_ptl_t* ptl, struct mca_ptl_tcp_recv_frag_t* frag)
 {
-    if(frag->frag_buff != NULL && frag->frag_buff != frag->super.super.frag_addr)
-        free(frag->frag_buff);
+    /* FIX - need to cleanup convertor */
     lam_free_list_return(&mca_ptl_tcp_module.tcp_recv_frags, (lam_list_item_t*)frag);
 }
 
@@ -139,8 +122,9 @@ void mca_ptl_tcp_recv_frag_return(struct mca_ptl_t* ptl, struct mca_ptl_tcp_recv
 void mca_ptl_tcp_send_frag_return(struct mca_ptl_t* ptl, struct mca_ptl_tcp_send_frag_t* frag)
 {
     if(lam_list_get_size(&mca_ptl_tcp_module.tcp_pending_acks)) {
-        mca_ptl_tcp_recv_frag_t* pending = (mca_ptl_tcp_recv_frag_t*)
-            lam_list_remove_first(&mca_ptl_tcp_module.tcp_pending_acks);
+        mca_ptl_tcp_recv_frag_t* pending;
+        THREAD_LOCK(&mca_ptl_tcp_module.tcp_lock);
+        pending = (mca_ptl_tcp_recv_frag_t*)lam_list_remove_first(&mca_ptl_tcp_module.tcp_pending_acks);
         THREAD_LOCK(&mca_ptl_tcp_module.tcp_lock);
         if(NULL == pending) {
             THREAD_UNLOCK(&mca_ptl_tcp_module.tcp_lock);
@@ -156,6 +140,12 @@ void mca_ptl_tcp_send_frag_return(struct mca_ptl_t* ptl, struct mca_ptl_tcp_send
     }
 }
 
+/*
+ *  Initiate a send. If this is the first fragment, use the fragment
+ *  descriptor allocated with the send requests, otherwise obtain
+ *  one from the free list. Initialize the fragment and foward
+ *  on to the peer.
+ */
 
 int mca_ptl_tcp_send(
     struct mca_ptl_t* ptl,
@@ -178,20 +168,17 @@ int mca_ptl_tcp_send(
 }
 
 
+/*
+ *  A posted receive has been matched - if required send an
+ *  ack back to the peer and process the fragment.
+ */
+
 void mca_ptl_tcp_recv(
     mca_ptl_t* ptl,
-    mca_ptl_base_recv_frag_t* frag,
-    lam_status_public_t* status)
+    mca_ptl_base_recv_frag_t* frag)
 {
+    /* send ack back to peer? */
     mca_ptl_base_header_t* header = &frag->super.frag_header;
-
-    /* fill in match */
-    status->MPI_SOURCE = header->hdr_match.hdr_src;
-    status->MPI_TAG = header->hdr_match.hdr_tag;
-    status->MPI_ERROR = LAM_SUCCESS;
-    status->_count = header->hdr_match.hdr_msg_length;
-
-    /* send ack back to peer */
     if(header->hdr_common.hdr_flags & MCA_PTL_FLAGS_ACK_MATCHED) {
         int rc;
         mca_ptl_tcp_send_frag_t* ack = mca_ptl_tcp_send_frag_alloc(&rc);
diff --git a/src/mca/ptl/tcp/src/ptl_tcp.h b/src/mca/ptl/tcp/src/ptl_tcp.h
index 787ab122bd..2c61e953dc 100644
--- a/src/mca/ptl/tcp/src/ptl_tcp.h
+++ b/src/mca/ptl/tcp/src/ptl_tcp.h
@@ -1,8 +1,9 @@
-/* @file
- *
+/*
  * $HEADER$
  */
-
+/**
+ * @file
+ */
 #ifndef MCA_PTL_TCP_H
 #define MCA_PTL_TCP_H
 
@@ -15,31 +16,30 @@
 #include "mca/ptl/ptl.h"
 
 
-/*
+/**
  * TCP PTL module.
  */
-
 struct mca_ptl_tcp_module_1_0_0_t {
-    mca_ptl_base_module_1_0_0_t super;
-    struct mca_ptl_tcp_t** tcp_ptls;
-    size_t tcp_num_ptls;         /**< number of ptls actually used */
-    size_t tcp_max_ptls;         /**< maximum number of ptls - available kernel ifs */
-    int tcp_listen_sd;
-    unsigned short tcp_listen_port;
-    char* tcp_if_include;        /**< comma seperated list of interface to include */
-    char* tcp_if_exclude;        /**< comma seperated list of interface to exclude */
-    int   tcp_free_list_num;     /**< initial size of free lists */
-    int   tcp_free_list_max;     /**< maximum size of free lists */
-    int   tcp_free_list_inc;     /**< number of elements to alloc when growing free lists */
-    lam_free_list_t tcp_send_requests;
-    lam_free_list_t tcp_send_frags;
-    lam_free_list_t tcp_recv_frags;
-    lam_list_t tcp_procs;
-    lam_list_t tcp_pending_acks;
-    struct mca_ptl_tcp_proc_t* tcp_local;
-    lam_event_t tcp_send_event;
-    lam_event_t tcp_recv_event;
-    lam_mutex_t tcp_lock;
+    mca_ptl_base_module_1_0_0_t super;    /**< base PTL module */
+    struct mca_ptl_tcp_t** tcp_ptls;      /**< array of available PTLs */
+    size_t tcp_num_ptls;                  /**< number of ptls actually used */
+    size_t tcp_max_ptls;                  /**< maximum number of ptls - available kernel ifs */
+    int tcp_listen_sd;                    /**< listen socket for incoming connection requests */
+    unsigned short tcp_listen_port;       /**< listen port */
+    char* tcp_if_include;                 /**< comma seperated list of interface to include */
+    char* tcp_if_exclude;                 /**< comma seperated list of interface to exclude */
+    int   tcp_free_list_num;              /**< initial size of free lists */
+    int   tcp_free_list_max;              /**< maximum size of free lists */
+    int   tcp_free_list_inc;              /**< number of elements to alloc when growing free lists */
+    lam_free_list_t tcp_send_requests;    /**< free list of tcp send requests -- sendreq + sendfrag */
+    lam_free_list_t tcp_send_frags;       /**< free list of tcp send fragments */
+    lam_free_list_t tcp_recv_frags;       /**< free list of tcp recv fragments */
+    lam_list_t tcp_procs;                 /**< list of tcp proc structures */
+    lam_list_t tcp_pending_acks;          /**< list of pending acks - retry as sends complete */
+    struct mca_ptl_tcp_proc_t* tcp_local; /**< the tcp proc instance corresponding to the local process */
+    lam_event_t tcp_send_event;           /**< event structure for sends */
+    lam_event_t tcp_recv_event;           /**< event structure for recvs */
+    lam_mutex_t tcp_lock;                 /**< lock for accessing module state */
 };
 typedef struct mca_ptl_tcp_module_1_0_0_t mca_ptl_tcp_module_1_0_0_t;
 typedef struct mca_ptl_tcp_module_1_0_0_t mca_ptl_tcp_module_t;
@@ -48,76 +48,140 @@ struct mca_ptl_tcp_send_frag_t;
 
 extern mca_ptl_tcp_module_1_0_0_t mca_ptl_tcp_module;
 
+/**
+ * Register TCP module parameters with the MCA framework
+ */
 extern int mca_ptl_tcp_module_open(void);
+
+/**
+ * Any final cleanup before being unloaded.
+ */
 extern int mca_ptl_tcp_module_close(void);
 
+/**
+ * TCP module initialization.
+ * 
+ * @param num_ptls (OUT)                  Number of PTLs returned in PTL array.
+ * @param allow_multi_user_threads (OUT)  Flag indicating wether PTL supports user threads (TRUE)
+ * @param have_hidden_threads (OUT)       Flag indicating wether PTL uses threads (TRUE)
+ *
+ *  (1) read interface list from kernel and compare against module parameters
+ *      then create a PTL instance for selected interfaces
+ *  (2) setup TCP listen socket for incoming connection attempts
+ *  (3) publish PTL addressing info 
+ *
+ */
 extern mca_ptl_t** mca_ptl_tcp_module_init(
     int *num_ptls, 
     bool *allow_multi_user_threads,
     bool *have_hidden_threads
 );
 
-extern void mca_ptl_tcp_module_progress(
-    mca_ptl_base_tstamp_t tstamp
-);
-
-
 /**
  * TCP PTL Interface
- *
  */
-
 struct mca_ptl_tcp_t {
-    mca_ptl_t          super; 
-    int                ptl_ifindex;
-    struct sockaddr_in ptl_ifaddr;
-    struct sockaddr_in ptl_ifmask;
+    mca_ptl_t          super;       /**< base PTL interface */
+    int                ptl_ifindex; /**< PTL interface index */
+    struct sockaddr_in ptl_ifaddr;  /**< PTL interface address */
+    struct sockaddr_in ptl_ifmask;  /**< PTL interface netmask */
 };
 typedef struct mca_ptl_tcp_t mca_ptl_tcp_t;
 
 extern mca_ptl_tcp_t mca_ptl_tcp;
 
 
-extern int mca_ptl_tcp_create(
-    int if_index
-);
+/**
+ * Cleanup any resources held by the PTL.
+ * 
+ * @param ptl  PTL instance.
+ * @return     LAM_SUCCESS or error status on failure.
+ */
 
 extern int mca_ptl_tcp_finalize(
     struct mca_ptl_t* ptl
 );
 
+
+/**
+ * PML->PTL notification of change in the process list.
+ * 
+ * @param ptl (IN)
+ * @param proc (IN)  
+ * @param peer (OUT)
+ * @return     LAM_SUCCESS or error status on failure.
+ * 
+ */
+
 extern int mca_ptl_tcp_add_proc(
     struct mca_ptl_t* ptl,
-    struct lam_proc_t *procs,
-    struct mca_ptl_base_peer_t** addr
+    struct lam_proc_t *proc,
+    struct mca_ptl_base_peer_t** peer
 );
 
+                                                                                                               
+/**
+ * PML->PTL notification of change in the process list.
+ *
+ * @param ptl (IN)     PTL instance
+ * @param proc (IN)    Peer process
+ * @param peer (IN)    Peer addressing information.
+ * @return             Status indicating if cleanup was successful
+ *
+ */
 extern int mca_ptl_tcp_del_proc(
     struct mca_ptl_t* ptl,
     struct lam_proc_t *procs,
     struct mca_ptl_base_peer_t* addr
 );
 
+/**
+ * PML->PTL Allocate a send request from the PTL modules free list.
+ *
+ * @param ptl (IN)       PTL instance
+ * @param request (OUT)  Pointer to allocated request.
+ * @return               Status indicating if allocation was successful.
+ *
+ */
 extern int mca_ptl_tcp_request_alloc(
     struct mca_ptl_t* ptl,
     struct mca_ptl_base_send_request_t**
 );
 
+/**
+ * PML->PTL Return a send request to the PTL modules free list.
+ *
+ * @param ptl (IN)       PTL instance
+ * @param request (IN)   Pointer to allocated request.
+ *
+ */
 extern void mca_ptl_tcp_request_return(
     struct mca_ptl_t* ptl,
     struct mca_ptl_base_send_request_t*
 );
 
-extern void mca_ptl_tcp_recv_frag_return(
+/**
+ * PML->PTL Notification that a receive fragment has been matched.
+ *
+ * @param ptl (IN)          PTL instance
+ * @param recv_frag (IN)    Receive fragment
+ *
+ */
+extern void mca_ptl_tcp_recv(
     struct mca_ptl_t* ptl,
-    struct mca_ptl_tcp_recv_frag_t*
+    struct mca_ptl_base_recv_frag_t* frag
 );
-
-extern void mca_ptl_tcp_send_frag_return(
-    struct mca_ptl_t* ptl,
-    struct mca_ptl_tcp_send_frag_t*
-);
-
+                                                                                                 
+/**
+ * PML->PTL Initiate a send of the specified size.
+ *
+ * @param ptl (IN)               PTL instance
+ * @param ptl_base_peer (IN)     PTL peer addressing
+ * @param send_request (IN/OUT)  Send request (allocated by PML via mca_ptl_base_request_alloc_fn_t)
+ * @param size (IN)              Number of bytes PML is requesting PTL to deliver
+ * @param flags (IN)             Flags that should be passed to the peer via the message header.
+ * @param request (OUT)          LAM_SUCCESS if the PTL was able to queue one or more fragments
+ */
 extern int mca_ptl_tcp_send(
     struct mca_ptl_t* ptl,
     struct mca_ptl_base_peer_t* ptl_peer,
@@ -125,13 +189,32 @@ extern int mca_ptl_tcp_send(
     size_t size,
     int flags
 );
-                                                                                                 
-extern void mca_ptl_tcp_recv(
+
+/**
+ * Return a recv fragment to the modules free list.
+ *
+ * @param ptl (IN)   PTL instance
+ * @param frag (IN)  TCP receive fragment
+ *
+ */
+extern void mca_ptl_tcp_recv_frag_return(
     struct mca_ptl_t* ptl,
-    struct mca_ptl_base_recv_frag_t* frag,
-    struct lam_status_public_t* status
+    struct mca_ptl_tcp_recv_frag_t* frag
+);
+
+
+
+/**
+ * Return a send fragment to the modules free list.
+ *
+ * @param ptl (IN)   PTL instance
+ * @param frag (IN)  TCP send fragment
+ *
+ */
+extern void mca_ptl_tcp_send_frag_return(
+    struct mca_ptl_t* ptl,
+    struct mca_ptl_tcp_send_frag_t*
 );
-                                                                                                 
 
 #endif
 
diff --git a/src/mca/ptl/tcp/src/ptl_tcp_addr.h b/src/mca/ptl/tcp/src/ptl_tcp_addr.h
index 88b541bc8e..c9ffe9abd6 100644
--- a/src/mca/ptl/tcp/src/ptl_tcp_addr.h
+++ b/src/mca/ptl/tcp/src/ptl_tcp_addr.h
@@ -1,8 +1,9 @@
-/* @file
- *
+/*
  * $HEADER$
  */
-
+/**
+ * @file
+ */
 #ifndef MCA_PTL_TCP_ADDR_H
 #define MCA_PTL_TCP_ADDR_H
 
@@ -11,10 +12,13 @@
 #include <netinet/in.h>
 
 
+/**
+ * Structure used to publish TCP connection information to peers.
+ */
 struct mca_ptl_tcp_addr_t {
-    struct in_addr addr_inet;
-    in_port_t      addr_port;
-    unsigned short addr_inuse;
+    struct in_addr addr_inet;  /**< IPv4 address in network byte order */
+    in_port_t      addr_port;  /**< listen port */
+    unsigned short addr_inuse; /**< local meaning only */
 };
 typedef struct mca_ptl_tcp_addr_t mca_ptl_tcp_addr_t;
 
diff --git a/src/mca/ptl/tcp/src/ptl_tcp_module.c b/src/mca/ptl/tcp/src/ptl_tcp_module.c
index 1deca40244..a44e46f161 100644
--- a/src/mca/ptl/tcp/src/ptl_tcp_module.c
+++ b/src/mca/ptl/tcp/src/ptl_tcp_module.c
@@ -3,6 +3,7 @@
  */
 #include <errno.h>
 #include <unistd.h>
+#include <string.h>
 #include <fcntl.h>
 #include <sys/types.h>
 #include <sys/socket.h>
@@ -54,8 +55,7 @@ mca_ptl_tcp_module_1_0_0_t mca_ptl_tcp_module = {
     false
     },
 
-    mca_ptl_tcp_module_init,    /* module init */
-    mca_ptl_tcp_module_progress /* module progress */
+    mca_ptl_tcp_module_init   /* module init */
     }
 };
 
@@ -124,7 +124,7 @@ int mca_ptl_tcp_module_open(void)
     mca_ptl_tcp.super.ptl_exclusivity =
         mca_ptl_tcp_param_register_int("exclusivity", 0);
     mca_ptl_tcp.super.ptl_first_frag_size =
-        mca_ptl_tcp_param_register_int("first_frag_size", 16*1024);
+        mca_ptl_tcp_param_register_int("first_frag_size", 64*1024);
     mca_ptl_tcp.super.ptl_min_frag_size = 
         mca_ptl_tcp_param_register_int("min_frag_size", 64*1024);
     mca_ptl_tcp.super.ptl_max_frag_size =
@@ -149,6 +149,27 @@ int mca_ptl_tcp_module_close(void)
 }
 
 
+/*
+ *  Create a ptl instance and add to modules list.
+ */
+
+static int mca_ptl_tcp_create(int if_index)
+{
+    mca_ptl_tcp_t* ptl = malloc(sizeof(mca_ptl_tcp_t));
+    if(NULL == ptl)
+        return LAM_ERR_OUT_OF_RESOURCE;
+    memcpy(ptl, &mca_ptl_tcp, sizeof(mca_ptl_tcp));
+    mca_ptl_tcp_module.tcp_ptls[mca_ptl_tcp_module.tcp_num_ptls++] = ptl;
+                                                                                                                                
+    /* initialize the ptl */
+    ptl->ptl_ifindex = if_index;
+    lam_ifindextoaddr(if_index, (struct sockaddr*)&ptl->ptl_ifaddr, sizeof(ptl->ptl_ifaddr));
+    lam_ifindextomask(if_index, (struct sockaddr*)&ptl->ptl_ifmask, sizeof(ptl->ptl_ifmask));
+    return LAM_SUCCESS;
+}
+                                                                                                                                
+                                                                                                                                
+
 /*
  * Create a TCP PTL instance for either:
  * (1) all interfaces specified by the user
@@ -173,7 +194,7 @@ static int mca_ptl_tcp_module_create_instances(void)
     if(NULL == mca_ptl_tcp_module.tcp_ptls)
         return LAM_ERR_OUT_OF_RESOURCE;
 
-    /* if the user specified an interface list - use these only */
+    /* if the user specified an interface list - use these exclusively */
     argv = include = lam_argv_split(mca_ptl_tcp_module.tcp_if_include,'\'');
     while(argv && *argv) {
         char* if_name = *argv;
@@ -217,8 +238,8 @@ static int mca_ptl_tcp_module_create_instances(void)
 static int mca_ptl_tcp_module_create_listen(void)
 {
     int flags;
-    struct sockaddr_in inaddr;
-    lam_socklen_t addrlen = sizeof(struct sockaddr_in);
+    struct sockaddr_in inaddr; 
+    lam_socklen_t addrlen;
 
     /* create a listen socket for incoming connections */
     mca_ptl_tcp_module.tcp_listen_sd = socket(AF_INET, SOCK_STREAM, 0);
@@ -239,6 +260,7 @@ static int mca_ptl_tcp_module_create_listen(void)
     }
                                                                                                       
     /* resolve system assignend port */
+    addrlen = sizeof(struct sockaddr_in);
     if(getsockname(mca_ptl_tcp_module.tcp_listen_sd, (struct sockaddr*)&inaddr, &addrlen) < 0) {
         lam_output(0, "mca_ptl_tcp_module_init: getsockname() failed with errno=%d", errno);
         return LAM_ERROR;
@@ -281,7 +303,8 @@ static int mca_ptl_tcp_module_create_listen(void)
 
 static int mca_ptl_tcp_module_exchange(void)
 {
-     size_t i, rc;
+     int rc;
+     size_t i;
      size_t size = mca_ptl_tcp_module.tcp_num_ptls * sizeof(mca_ptl_tcp_addr_t);
      mca_ptl_tcp_addr_t *addrs = malloc(size);
      for(i=0; i<mca_ptl_tcp_module.tcp_num_ptls; i++) {
@@ -351,7 +374,7 @@ mca_ptl_t** mca_ptl_tcp_module_init(int *num_ptls,
     if(mca_ptl_tcp_module_create_listen() != LAM_SUCCESS)
         return 0;
 
-    /* register TCP parameters with the MCA framework */
+    /* publish TCP parameters with the MCA framework */
     if(mca_ptl_tcp_module_exchange() != LAM_SUCCESS)
         return 0;
 
@@ -364,19 +387,6 @@ mca_ptl_t** mca_ptl_tcp_module_init(int *num_ptls,
     return ptls;
 }
 
-
-/*
- *  All TCP progress is handled via an event loop based on select. Events
- *  are dispatched to the appropriate callbacks as file descriptors become
- *  available for read/write.
- */
-
-void mca_ptl_tcp_module_progress(mca_ptl_base_tstamp_t tstamp)
-{
-    lam_event_loop(LAM_EVLOOP_NONBLOCK);
-}
-
-                                                                                                                
 /*
  *  Called by mca_ptl_tcp_module_recv() when the TCP listen
  *  socket has pending connection requests. Accept incoming
@@ -388,8 +398,8 @@ static void mca_ptl_tcp_module_accept(void)
     while(true) {
         lam_socklen_t addrlen = sizeof(struct sockaddr_in);
         struct sockaddr_in addr;
-        int sd = accept(mca_ptl_tcp_module.tcp_listen_sd, (struct sockaddr*)&addr, &addrlen);
         lam_event_t* event;
+        int sd = accept(mca_ptl_tcp_module.tcp_listen_sd, (struct sockaddr*)&addr, &addrlen);
         if(sd < 0) {
             if(errno == EINTR)
                 continue;
@@ -415,9 +425,9 @@ static void mca_ptl_tcp_module_recv_handler(int sd, short flags, void* user)
     void* guid; 
     uint32_t size;
     struct sockaddr_in addr;
-    lam_socklen_t addr_len = sizeof(addr);
     int retval;
     mca_ptl_tcp_proc_t* ptl_proc;
+    lam_socklen_t addr_len = sizeof(addr);
 
     /* accept new connections on the listen socket */
     if(mca_ptl_tcp_module.tcp_listen_sd == sd) {
diff --git a/src/mca/ptl/tcp/src/ptl_tcp_peer.c b/src/mca/ptl/tcp/src/ptl_tcp_peer.c
index 0934693431..8c4ca18d9b 100644
--- a/src/mca/ptl/tcp/src/ptl_tcp_peer.c
+++ b/src/mca/ptl/tcp/src/ptl_tcp_peer.c
@@ -2,6 +2,7 @@
  * $HEADER$
  */
 #include <stdlib.h>
+#include <string.h>
 #include <unistd.h>
 #include <sys/errno.h>
 #include <sys/types.h>
@@ -331,9 +332,9 @@ static int mca_ptl_tcp_peer_recv_connect_ack(mca_ptl_base_peer_t* ptl_peer)
 
 static int mca_ptl_tcp_peer_start_connect(mca_ptl_base_peer_t* ptl_peer)
 {
-    int rc;
-    int flags;
+    int rc,flags;
     struct sockaddr_in peer_addr;
+
     ptl_peer->peer_sd = socket(AF_INET, SOCK_STREAM, 0);
     if (ptl_peer->peer_sd < 0) {
         ptl_peer->peer_retries++;
diff --git a/src/mca/ptl/tcp/src/ptl_tcp_peer.h b/src/mca/ptl/tcp/src/ptl_tcp_peer.h
index 1079cf591f..38aab2b855 100644
--- a/src/mca/ptl/tcp/src/ptl_tcp_peer.h
+++ b/src/mca/ptl/tcp/src/ptl_tcp_peer.h
@@ -1,8 +1,9 @@
-/* @file
- *
+/*
  * $HEADER$
  */
-
+/**
+ * @file
+ */
 #ifndef MCA_PTL_TCP_PEER_H
 #define MCA_PTL_TCP_PEER_H
 
@@ -26,7 +27,6 @@ typedef enum {
 } mca_ptl_tcp_state_t;
     
 
-
 /**
  *  An abstraction that represents a connection to a peer process.
  *  An instance of mca_ptl_base_peer_t is associated w/ each process 
@@ -36,18 +36,18 @@ typedef enum {
                                                                                                                             
 struct mca_ptl_base_peer_t {
     lam_list_item_t            super;
-    struct mca_ptl_tcp_t*      peer_ptl;
-    struct mca_ptl_tcp_proc_t* peer_proc;
-    struct mca_ptl_tcp_addr_t* peer_addr;
-    int                        peer_sd;
-    mca_ptl_tcp_send_frag_t*   peer_send_frag;
-    mca_ptl_tcp_recv_frag_t*   peer_recv_frag;
-    mca_ptl_tcp_state_t        peer_state;
-    size_t                     peer_retries;
-    lam_list_t                 peer_frags;   /* list of pending frags to send */
-    lam_mutex_t                peer_lock;
-    lam_event_t                peer_send_event;
-    lam_event_t                peer_recv_event;
+    struct mca_ptl_tcp_t*      peer_ptl;         /**< PTL instance that created this connection */
+    struct mca_ptl_tcp_proc_t* peer_proc;        /**< proc structure corresponding to peer */
+    struct mca_ptl_tcp_addr_t* peer_addr;        /**< address of peer */
+    int                        peer_sd;          /**< socket connection to peer */
+    mca_ptl_tcp_send_frag_t*   peer_send_frag;   /**< current send frag being processed */
+    mca_ptl_tcp_recv_frag_t*   peer_recv_frag;   /**< current recv frag being processed */
+    mca_ptl_tcp_state_t        peer_state;       /**< current state of the connection */
+    size_t                     peer_retries;     /**< number of connection retries attempted */
+    lam_list_t                 peer_frags;       /**< list of pending frags to send */
+    lam_mutex_t                peer_lock;        /**< lock for concurrent access to peer state */
+    lam_event_t                peer_send_event;  /**< event for async processing of send frags */
+    lam_event_t                peer_recv_event;  /**< event for async processing of recv frags */
 };
 typedef struct mca_ptl_base_peer_t mca_ptl_base_peer_t;
 
diff --git a/src/mca/ptl/tcp/src/ptl_tcp_proc.c b/src/mca/ptl/tcp/src/ptl_tcp_proc.c
index 1b448dc2ce..83b5ee2db3 100644
--- a/src/mca/ptl/tcp/src/ptl_tcp_proc.c
+++ b/src/mca/ptl/tcp/src/ptl_tcp_proc.c
@@ -1,6 +1,7 @@
 /*
  * $HEADER$
  */
+#include <string.h>
 #include "atomic.h"
 #include "lfc/lam_hash_table.h"
 #include "mca/base/mca_base_module_exchange.h"
@@ -81,7 +82,7 @@ mca_ptl_tcp_proc_t* mca_ptl_tcp_proc_create(lam_proc_t* lam_proc)
         return 0;
     }
     memcpy(ptl_proc->proc_guid, lam_proc->proc_job, size);
-    memcpy(((char*) ptl_proc->proc_guid) + size, &vpid, sizeof(uint32_t));
+    memcpy(((unsigned char*)ptl_proc->proc_guid)+size, &vpid, sizeof(uint32_t));
 
     /* lookup tcp parameters exported by this proc */
     rc = mca_base_modex_recv(
diff --git a/src/mca/ptl/tcp/src/ptl_tcp_proc.h b/src/mca/ptl/tcp/src/ptl_tcp_proc.h
index f1b2a17ecd..d4bba30689 100644
--- a/src/mca/ptl/tcp/src/ptl_tcp_proc.h
+++ b/src/mca/ptl/tcp/src/ptl_tcp_proc.h
@@ -1,8 +1,9 @@
-/* @file
- *
+/* 
  * $HEADER$
  */
-
+/**
+ * @file
+ */
 #ifndef MCA_PTL_TCP_PROC_H
 #define MCA_PTL_TCP_PROC_H
 
@@ -19,19 +20,19 @@ extern lam_class_t mca_ptl_tcp_proc_t_class;
 
 /**
  *  Represents the state of a remote process and the set of addresses
- *  that it exports. Also cache an instance or mca_ptl_base_peer_t for each
+ *  that it exports. Also cache an instance of mca_ptl_base_peer_t for each
  *  PTL instance that attempts to open a connection to the process.
  */
 struct mca_ptl_tcp_proc_t {
-    lam_list_item_t super;
-    lam_proc_t *proc_lam;
-    void*  proc_guid; 
-    size_t proc_guid_size;
-    struct mca_ptl_tcp_addr_t *proc_addrs;
-    size_t proc_addr_count;
-    struct mca_ptl_base_peer_t **proc_peers;
-    size_t proc_peer_count;
-    lam_mutex_t proc_lock;
+    lam_list_item_t super;                   /**< allow proc to be placed on a list */
+    lam_proc_t *proc_lam;                    /**< pointer to corresponding lam_proc_t */
+    void*  proc_guid;                        /**< globally unique identifier for the process */
+    size_t proc_guid_size;                   /**< size of the guid */
+    struct mca_ptl_tcp_addr_t *proc_addrs;   /**< array of addresses published by peer */
+    size_t proc_addr_count;                  /**< number of addresses published by peer */
+    struct mca_ptl_base_peer_t **proc_peers; /**< array of peers that have been created to access this proc */
+    size_t proc_peer_count;                  /**< number of peers */
+    lam_mutex_t proc_lock;                   /**< lock to protect against concurrent access to proc state */
 };
 typedef struct mca_ptl_tcp_proc_t mca_ptl_tcp_proc_t;
 
diff --git a/src/mca/ptl/tcp/src/ptl_tcp_recvfrag.c b/src/mca/ptl/tcp/src/ptl_tcp_recvfrag.c
index 347b12669a..e086ddebc1 100644
--- a/src/mca/ptl/tcp/src/ptl_tcp_recvfrag.c
+++ b/src/mca/ptl/tcp/src/ptl_tcp_recvfrag.c
@@ -48,8 +48,10 @@ void mca_ptl_tcp_recv_frag_init(mca_ptl_tcp_recv_frag_t* frag, mca_ptl_base_peer
 {
     frag->frag_owner = &peer->peer_ptl->super;
     frag->super.frag_request = 0;
+    frag->super.super.frag_addr = NULL;
+    frag->super.super.frag_size = 0;
+    frag->super.frag_is_buffered = false;
     frag->frag_peer = peer;
-    frag->frag_buff = NULL;
     frag->frag_hdr_cnt = 0;
     frag->frag_msg_cnt = 0;
     frag->frag_ack_pending = false;
@@ -140,15 +142,11 @@ static bool mca_ptl_tcp_recv_frag_match(mca_ptl_tcp_recv_frag_t* frag, int sd)
 
         /* match was not made - so allocate buffer for eager send */
         if (NULL == frag->super.frag_request) {
-
             if(frag->frag_header.hdr_frag.hdr_frag_length > 0) {
-                frag->frag_buff = malloc(frag->frag_header.hdr_frag.hdr_frag_length);
+                frag->super.super.frag_addr = malloc(frag->frag_header.hdr_frag.hdr_frag_length);
                 frag->super.super.frag_size = frag->frag_header.hdr_frag.hdr_frag_length;
+                frag->super.frag_is_buffered = true;
             }
-
-        /* match was made - use application buffer */
-        } else {
-            frag->frag_buff = (unsigned char*)frag->super.super.frag_addr;
         }
     } 
 
@@ -178,7 +176,6 @@ static bool mca_ptl_tcp_recv_frag_frag(mca_ptl_tcp_recv_frag_t* frag, int sd)
     if(frag->frag_msg_cnt == 0) {
         frag->super.frag_request = frag->frag_header.hdr_frag.hdr_dst_ptr.pval;
         mca_ptl_base_recv_frag_init(&frag->super);
-        frag->frag_buff = frag->super.super.frag_addr;
     }
 
     /* continue to receive user data */
@@ -207,7 +204,7 @@ static bool mca_ptl_tcp_recv_frag_data(mca_ptl_tcp_recv_frag_t* frag, int sd)
 {
     int cnt = -1;
     while(cnt < 0) {
-        cnt = recv(sd, (unsigned char*)frag->frag_buff+frag->frag_msg_cnt,  
+        cnt = recv(sd, (unsigned char*)frag->super.super.frag_addr+frag->frag_msg_cnt,  
             frag->super.super.frag_size-frag->frag_msg_cnt, 0);
         if(cnt == 0) {
             mca_ptl_tcp_peer_close(frag->frag_peer);
diff --git a/src/mca/ptl/tcp/src/ptl_tcp_recvfrag.h b/src/mca/ptl/tcp/src/ptl_tcp_recvfrag.h
index 7c9c7569c3..94eb536329 100644
--- a/src/mca/ptl/tcp/src/ptl_tcp_recvfrag.h
+++ b/src/mca/ptl/tcp/src/ptl_tcp_recvfrag.h
@@ -1,7 +1,9 @@
-/* @file
- *
+/* 
  * $HEADER$
  */
+/**
+ * @file
+ */
 
 #ifndef MCA_PTL_TCP_RECV_FRAG_H
 #define MCA_PTL_TCP_RECV_FRAG_H
@@ -18,13 +20,14 @@
 extern lam_class_t mca_ptl_tcp_recv_frag_t_class;
 
 
+/**
+ *  TCP received fragment derived type.
+ */
 struct mca_ptl_tcp_recv_frag_t {
-    mca_ptl_base_recv_frag_t super;
-    mca_ptl_base_ack_header_t frag_ack;
-    unsigned char* frag_buff;
-    size_t frag_hdr_cnt;
-    size_t frag_msg_cnt;
-    bool frag_ack_pending;
+    mca_ptl_base_recv_frag_t super; /**< base receive fragment descriptor */
+    size_t frag_hdr_cnt;            /**< number of header bytes received */
+    size_t frag_msg_cnt;            /**< number of msg bytes received */
+    bool frag_ack_pending;          /**< is an ack pending for this fragment */
 };
 typedef struct mca_ptl_tcp_recv_frag_t mca_ptl_tcp_recv_frag_t;
 
@@ -42,8 +45,11 @@ bool mca_ptl_tcp_recv_frag_send_ack(mca_ptl_tcp_recv_frag_t* frag);
 static inline void mca_ptl_tcp_recv_frag_progress(mca_ptl_tcp_recv_frag_t* frag)
 {
     if(frag->frag_msg_cnt >= frag->super.super.frag_header.hdr_frag.hdr_frag_length) {
-        if(frag->frag_buff != frag->super.super.frag_addr) {
-            memcpy(frag->super.super.frag_addr, frag->frag_buff, frag->super.super.frag_size);
+        if(frag->super.frag_is_buffered) {
+            struct iovec iov;
+            iov.iov_base = frag->super.super.frag_addr;
+            iov.iov_len = frag->super.super.frag_size;
+            lam_convertor_unpack(&frag->super.super.frag_convertor, &iov, 1);
         }
         frag->super.super.frag_owner->ptl_recv_progress(frag->super.frag_request, &frag->super);
         if(frag->frag_ack_pending == false) {
diff --git a/src/mca/ptl/tcp/src/ptl_tcp_sendfrag.c b/src/mca/ptl/tcp/src/ptl_tcp_sendfrag.c
index 2cc8f17318..2a7dfcdf24 100644
--- a/src/mca/ptl/tcp/src/ptl_tcp_sendfrag.c
+++ b/src/mca/ptl/tcp/src/ptl_tcp_sendfrag.c
@@ -5,14 +5,17 @@
 #include <sys/types.h>
 #include <sys/errno.h>
 #include "types.h"
+#include "datatype/datatype.h"
 #include "mca/ptl/base/ptl_base_sendreq.h"
 #include "ptl_tcp.h"
 #include "ptl_tcp_peer.h"
+#include "ptl_tcp_proc.h"
 #include "ptl_tcp_sendfrag.h"
 
-#define frag_header super.super.frag_header
-#define frag_owner  super.super.frag_owner
-#define frag_peer   super.super.frag_peer
+#define frag_header     super.super.frag_header
+#define frag_owner      super.super.frag_owner
+#define frag_peer       super.super.frag_peer
+#define frag_convertor  super.super.frag_convertor
 
 
 static void mca_ptl_tcp_send_frag_construct(mca_ptl_tcp_send_frag_t* frag);
@@ -42,7 +45,7 @@ static void mca_ptl_tcp_send_frag_destruct(mca_ptl_tcp_send_frag_t* frag)
  *  data buffer, and the indicated size.
  */
 
-void mca_ptl_tcp_send_frag_init(
+int mca_ptl_tcp_send_frag_init(
     mca_ptl_tcp_send_frag_t* sendfrag,
     mca_ptl_base_peer_t* ptl_peer,
     mca_ptl_base_send_request_t* sendreq,
@@ -63,7 +66,7 @@ void mca_ptl_tcp_send_frag_init(
         hdr->hdr_match.hdr_src = sendreq->super.req_comm->c_my_rank;
         hdr->hdr_match.hdr_dst = sendreq->super.req_peer;
         hdr->hdr_match.hdr_tag = sendreq->super.req_tag;
-        hdr->hdr_match.hdr_msg_length = sendreq->super.req_length;
+        hdr->hdr_match.hdr_msg_length = sendreq->req_packed_size;
         hdr->hdr_match.hdr_msg_seq = sendreq->req_msg_seq;
     } else {
         hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG;
@@ -75,29 +78,61 @@ void mca_ptl_tcp_send_frag_init(
         hdr->hdr_frag.hdr_dst_ptr = sendreq->req_peer_request;
     }
 
-    /* update request */
-    if(sendreq->req_offset + size > sendreq->super.req_length)
-        size = sendreq->super.req_length - sendreq->req_offset;
+    /* initialize convertor */
+    if(size > 0) {
+       lam_convertor_t *convertor;
+       int rc;
+
+        /* first fragment (eager send) and first fragment of long protocol
+         * can use the convertor initialized on the request, remaining fragments
+         * must copy/reinit the convertor as the transfer could be in parallel.
+         */
+        if(sendreq->req_frags < 2) {
+            convertor = &sendreq->req_convertor;
+        } else {
+
+            convertor = &sendfrag->frag_convertor;
+            if((rc = lam_convertor_copy(&sendreq->req_convertor, convertor)) != LAM_SUCCESS)
+                return rc;
+
+            if((rc = lam_convertor_init_for_send( 
+                convertor,
+                0, 
+                sendreq->super.req_datatype,
+                sendreq->super.req_count,
+                sendreq->super.req_addr,
+                sendreq->req_offset)) != LAM_SUCCESS)
+                return rc; 
+        }
+
+        /* if data is contigous convertor will return an offset
+         * into users buffer - otherwise will return an allocated buffer 
+         * that holds the packed data
+         */
+        sendfrag->frag_vec[1].iov_base = NULL;
+        sendfrag->frag_vec[1].iov_len = size;
+        if((rc = lam_convertor_pack(convertor, &sendfrag->frag_vec[1], 1)) != LAM_SUCCESS)
+            return rc;
+
+        /* adjust size and request offset to reflect actual number of bytes packed by convertor */
+        size = sendfrag->frag_vec[1].iov_len;
+        sendreq->req_offset += size;
+    }
     hdr->hdr_frag.hdr_frag_length = size;
-    sendreq->req_offset += size;
     sendreq->req_frags++;
 
     /* fragment state */
     sendfrag->frag_owner = &ptl_peer->peer_ptl->super;
     sendfrag->super.frag_request = sendreq;
-    sendfrag->super.super.frag_addr = ((char*) sendreq->super.req_addr) + hdr->hdr_frag.hdr_frag_offset;
+    sendfrag->super.super.frag_addr = sendfrag->frag_vec[1].iov_base;
     sendfrag->super.super.frag_size = size;
 
     sendfrag->frag_peer = ptl_peer;
     sendfrag->frag_vec_ptr = sendfrag->frag_vec;
+    sendfrag->frag_vec_cnt = (size == 0) ? 1 : 2;
     sendfrag->frag_vec[0].iov_base = (lam_iov_base_ptr_t)hdr;
     sendfrag->frag_vec[0].iov_len = sizeof(mca_ptl_base_header_t);
-    sendfrag->frag_vec_cnt = 1;
-    if(size > 0) {
-        sendfrag->frag_vec[1].iov_base = (lam_iov_base_ptr_t)sendfrag->super.super.frag_addr;
-        sendfrag->frag_vec[1].iov_len = sendfrag->super.super.frag_size;
-        sendfrag->frag_vec_cnt++;
-    }
+    return LAM_SUCCESS;
 }
 
 
diff --git a/src/mca/ptl/tcp/src/ptl_tcp_sendfrag.h b/src/mca/ptl/tcp/src/ptl_tcp_sendfrag.h
index 275fa8592d..0514fb45cd 100644
--- a/src/mca/ptl/tcp/src/ptl_tcp_sendfrag.h
+++ b/src/mca/ptl/tcp/src/ptl_tcp_sendfrag.h
@@ -1,8 +1,9 @@
-/* @file
- *
+/*
  * $HEADER$
  */
-
+/**
+ * @file
+ */
 #ifndef MCA_PTL_TCP_SEND_FRAG_H
 #define MCA_PTL_TCP_SEND_FRAG_H
 
@@ -19,11 +20,14 @@ extern lam_class_t mca_ptl_tcp_send_frag_t_class;
 struct mca_ptl_base_peer_t;
 
 
+/**
+ * TCP send fragment derived type.
+ */
 struct mca_ptl_tcp_send_frag_t {
-   mca_ptl_base_send_frag_t super;
-   struct iovec *frag_vec_ptr;
-   size_t frag_vec_cnt;
-   struct iovec frag_vec[2];
+   mca_ptl_base_send_frag_t super;  /**< base send fragment descriptor */
+   struct iovec *frag_vec_ptr;      /**< pointer into iovec array */
+   size_t frag_vec_cnt;             /**< number of iovec structs left to process */
+   struct iovec frag_vec[2];        /**< array of iovecs for send */
 };
 typedef struct mca_ptl_tcp_send_frag_t mca_ptl_tcp_send_frag_t;
 
@@ -36,7 +40,8 @@ static inline mca_ptl_tcp_send_frag_t* mca_ptl_tcp_send_frag_alloc(int* rc)
 
 bool mca_ptl_tcp_send_frag_handler(mca_ptl_tcp_send_frag_t*, int sd);
 
-void mca_ptl_tcp_send_frag_init(
+
+int mca_ptl_tcp_send_frag_init(
     mca_ptl_tcp_send_frag_t*, 
     struct mca_ptl_base_peer_t*, 
     struct mca_ptl_base_send_request_t*, 
diff --git a/src/mca/ptl/tcp/src/ptl_tcp_sendreq.h b/src/mca/ptl/tcp/src/ptl_tcp_sendreq.h
index b2bc15be78..e4216383be 100644
--- a/src/mca/ptl/tcp/src/ptl_tcp_sendreq.h
+++ b/src/mca/ptl/tcp/src/ptl_tcp_sendreq.h
@@ -1,7 +1,9 @@
-/* @file
- *
+/*
  * $HEADER$
  */
+/**
+ * @file
+ */
 
 #ifndef MCA_PTL_TCP_SEND_REQUEST_H
 #define MCA_PTL_TCP_SEND_REQUEST_H
@@ -13,9 +15,14 @@
 #include "mca/ptl/base/ptl_base_sendreq.h"
 #include "ptl_tcp_sendfrag.h"
 
-
 extern lam_class_t mca_ptl_tcp_send_request_t_class;
 
+/**
+ * TCP send request derived type. The send request contains both the
+ * base send request, and space for the first TCP send fragment descriptor.
+ * This avoids the overhead of a second allocation for the initial send 
+ * fragment on every send request.
+ */
 struct mca_ptl_tcp_send_request_t {
    mca_ptl_base_send_request_t super;
    mca_ptl_tcp_send_frag_t req_frag; /* first fragment */
diff --git a/src/mpi/c/bsend.c b/src/mpi/c/bsend.c
index aaf74d07d6..e4cf4e31df 100644
--- a/src/mpi/c/bsend.c
+++ b/src/mpi/c/bsend.c
@@ -17,20 +17,19 @@
 
 int MPI_Bsend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm) 
 {
+    int rc;
     if (dest == MPI_PROC_NULL) {
         return MPI_SUCCESS;
     }
-                                                                                                                        
+   
     if ( MPI_PARAM_CHECK ) {
-        int rc = MPI_SUCCESS;
-        if (lam_mpi_finalized) {
+        rc = MPI_SUCCESS;
+        if ( LAM_MPI_INVALID_STATE ) {
             rc = MPI_ERR_INTERN;
         } else if (count < 0) {
             rc = MPI_ERR_COUNT;
-#if 0
         } else if (type == MPI_DATATYPE_NULL) {
             rc = MPI_ERR_TYPE;
-#endif
         } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
             rc = MPI_ERR_TAG;
         } else if (lam_comm_invalid(comm)) {
@@ -38,11 +37,10 @@ int MPI_Bsend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Co
         } else if (lam_comm_peer_invalid(comm, dest)) {
             rc = MPI_ERR_RANK;
         }
-        if (rc != MPI_SUCCESS) {
-            return rc;
-        }
+        LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Bsend");
     }
-                                                                                                                        
-    return mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_BUFFERED, comm);
+
+    rc = mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_BUFFERED, comm);
+    LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Bsend");
 }
 
diff --git a/src/mpi/c/bsend_init.c b/src/mpi/c/bsend_init.c
index 0374a51184..f806c5a1e8 100644
--- a/src/mpi/c/bsend_init.c
+++ b/src/mpi/c/bsend_init.c
@@ -16,20 +16,19 @@
 int MPI_Bsend_init(void *buf, int count, MPI_Datatype type,
                    int dest, int tag, MPI_Comm comm, MPI_Request *request) 
 {
+    int rc;
     if (dest == MPI_PROC_NULL) {
         return MPI_SUCCESS;
     }
-                                                                                                                       
+    
     if ( MPI_PARAM_CHECK ) {
-        int rc = MPI_SUCCESS;
-        if (lam_mpi_finalized) {
+        rc = MPI_SUCCESS;
+        if ( LAM_MPI_INVALID_STATE ) {
             rc = MPI_ERR_INTERN;
         } else if (count < 0) {
             rc = MPI_ERR_COUNT;
-#if 0
         } else if (type == MPI_DATATYPE_NULL) {
             rc = MPI_ERR_TYPE;
-#endif
         } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
             rc = MPI_ERR_TAG;
         } else if (lam_comm_invalid(comm)) {
@@ -39,11 +38,10 @@ int MPI_Bsend_init(void *buf, int count, MPI_Datatype type,
         } else if (request == NULL) {
             rc = MPI_ERR_REQUEST;
         }
-        if (rc != MPI_SUCCESS) {
-            return rc;
-        }
+        LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Bsend_init");
     }
-                                                                                                                       
-    return mca_pml.pml_isend_init(buf, count, type, dest, tag, MCA_PML_BASE_SEND_BUFFERED, comm, request);;
+
+    rc = mca_pml.pml_isend_init(buf,count,type,dest,tag,MCA_PML_BASE_SEND_BUFFERED,comm,request);
+    LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Bsend_init");
 }
 
diff --git a/src/mpi/c/ibsend.c b/src/mpi/c/ibsend.c
index b1756e2e76..befb51522b 100644
--- a/src/mpi/c/ibsend.c
+++ b/src/mpi/c/ibsend.c
@@ -17,20 +17,19 @@
 int MPI_Ibsend(void *buf, int count, MPI_Datatype type, int dest,
 		       int tag, MPI_Comm comm, MPI_Request *request) 
 {
+    int rc;
     if (dest == MPI_PROC_NULL) {
         return MPI_SUCCESS;
     }
 
     if ( MPI_PARAM_CHECK ) {
-        int rc = MPI_SUCCESS;
-        if (lam_mpi_finalized) {
+        rc = MPI_SUCCESS;
+        if ( LAM_MPI_INVALID_STATE ) {
             rc = MPI_ERR_INTERN;
         } else if (count < 0) {
             rc = MPI_ERR_COUNT;
-#if 0
         } else if (type == MPI_DATATYPE_NULL) {
             rc = MPI_ERR_TYPE;
-#endif
         } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
             rc = MPI_ERR_TAG;
         } else if (lam_comm_invalid(comm)) {
@@ -40,11 +39,10 @@ int MPI_Ibsend(void *buf, int count, MPI_Datatype type, int dest,
         } else if (request == NULL) {
             rc = MPI_ERR_REQUEST;
         }
-        if (rc != MPI_SUCCESS) {
-            return rc;
-        }
+        LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Ibsend");
     }
 
-    return mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_BUFFERED,comm,request);
+    rc = mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_BUFFERED,comm,request);
+    LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Ibsend");
 }
 
diff --git a/src/mpi/c/irecv.c b/src/mpi/c/irecv.c
index 5aec85c8de..37cbc45f76 100644
--- a/src/mpi/c/irecv.c
+++ b/src/mpi/c/irecv.c
@@ -17,32 +17,32 @@
 int MPI_Irecv(void *buf, int count, MPI_Datatype type, int source,
               int tag, MPI_Comm comm, MPI_Request *request) 
 {
+    int rc;
     if (source == MPI_PROC_NULL) {
         return mca_pml.pml_null(request);
     }
 
     if ( MPI_PARAM_CHECK ) {
-        int rc = MPI_SUCCESS;
-        if (lam_mpi_finalized) {
+        rc = MPI_SUCCESS;
+        if ( LAM_MPI_INVALID_STATE ) {
             rc = MPI_ERR_INTERN;
         } else if (count < 0) {
             rc = MPI_ERR_COUNT;
-#if 0
         } else if (type == MPI_DATATYPE_NULL) {
             rc = MPI_ERR_TYPE;
-#endif
         } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
             rc = MPI_ERR_TAG;
         } else if (lam_comm_invalid(comm)) {
             rc = MPI_ERR_COMM;
-        } else if (source != MPI_ANY_SOURCE && source != MPI_PROC_NULL && lam_comm_peer_invalid(comm, source)) {
+        } else if (source != MPI_ANY_SOURCE && 
+                   source != MPI_PROC_NULL &&  
+                   lam_comm_peer_invalid(comm, source)) {
             rc = MPI_ERR_RANK;
         }
-        if (rc != MPI_SUCCESS) {
-            return rc;
-        }
+        LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Irecv");
     }
 
-    return mca_pml.pml_irecv(buf,count,type,source,tag,comm,request);
+    rc = mca_pml.pml_irecv(buf,count,type,source,tag,comm,request);
+    LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Irecv");
 }
 
diff --git a/src/mpi/c/irsend.c b/src/mpi/c/irsend.c
index 0426d171bd..f518ed2fb5 100644
--- a/src/mpi/c/irsend.c
+++ b/src/mpi/c/irsend.c
@@ -17,20 +17,19 @@
 int MPI_Irsend(void *buf, int count, MPI_Datatype type, int dest,
                int tag, MPI_Comm comm, MPI_Request *request) 
 {
+    int rc;
     if (dest == MPI_PROC_NULL) {
         return MPI_SUCCESS;
     }
 
     if ( MPI_PARAM_CHECK ) {
-        int rc = MPI_SUCCESS;
-        if (lam_mpi_finalized) {
+        rc = MPI_SUCCESS;
+        if ( LAM_MPI_INVALID_STATE ) {
             rc = MPI_ERR_INTERN;
         } else if (count < 0) {
             rc = MPI_ERR_COUNT;
-#if 0
         } else if (type == MPI_DATATYPE_NULL) {
             rc = MPI_ERR_TYPE;
-#endif
         } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
             rc = MPI_ERR_TAG;
         } else if (lam_comm_invalid(comm)) {
@@ -40,11 +39,10 @@ int MPI_Irsend(void *buf, int count, MPI_Datatype type, int dest,
         } else if (request == NULL) {
             rc = MPI_ERR_REQUEST;
         }
-        if (rc != MPI_SUCCESS) {
-            return rc;
-        }
+        LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Irsend");
     }
 
-    return mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_READY,comm,request);
+    rc = mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_READY,comm,request);
+    LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Irsend");
 }
 
diff --git a/src/mpi/c/isend.c b/src/mpi/c/isend.c
index 858242e85b..614c07e9c7 100644
--- a/src/mpi/c/isend.c
+++ b/src/mpi/c/isend.c
@@ -17,20 +17,19 @@
 int MPI_Isend(void *buf, int count, MPI_Datatype type, int dest,
                int tag, MPI_Comm comm, MPI_Request *request) 
 {
+    int rc;
     if (dest == MPI_PROC_NULL) {
         return MPI_SUCCESS;
     }
 
     if ( MPI_PARAM_CHECK ) {
-        int rc = MPI_SUCCESS;
-        if (lam_mpi_finalized) {
+        rc = MPI_SUCCESS;
+        if ( LAM_MPI_INVALID_STATE ) {
             rc = MPI_ERR_INTERN;
         } else if (count < 0) {
             rc = MPI_ERR_COUNT;
-#if 0
         } else if (type == MPI_DATATYPE_NULL) {
             rc = MPI_ERR_TYPE;
-#endif
         } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
             rc = MPI_ERR_TAG;
         } else if (lam_comm_invalid(comm)) {
@@ -40,11 +39,10 @@ int MPI_Isend(void *buf, int count, MPI_Datatype type, int dest,
         } else if (request == NULL) {
             rc = MPI_ERR_REQUEST;
         }
-        if (rc != MPI_SUCCESS) {
-            return rc;
-        }
+        LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Isend");
     }
 
-    return mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_STANDARD,comm,request);
+    rc = mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_STANDARD,comm,request);
+    LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Isend");
 }
 
diff --git a/src/mpi/c/issend.c b/src/mpi/c/issend.c
index 4e20a8ecea..a60585d7a9 100644
--- a/src/mpi/c/issend.c
+++ b/src/mpi/c/issend.c
@@ -5,6 +5,7 @@
 #include "lam_config.h"
 
 #include "mpi.h"
+#include "errhandler/errhandler.h"
 #include "runtime/runtime.h"
 #include "mpi/c/bindings.h"
 #include "mca/pml/pml.h"
@@ -17,20 +18,19 @@
 int MPI_Issend(void *buf, int count, MPI_Datatype type, int dest,
                int tag, MPI_Comm comm, MPI_Request *request) 
 {
+    int rc;
     if (dest == MPI_PROC_NULL) {
         return MPI_SUCCESS;
     }
 
     if ( MPI_PARAM_CHECK ) {
-        int rc = MPI_SUCCESS;
-        if (lam_mpi_finalized) {
+        rc = MPI_SUCCESS;
+        if ( LAM_MPI_INVALID_STATE ) {
             rc = MPI_ERR_INTERN;
         } else if (count < 0) {
             rc = MPI_ERR_COUNT;
-#if 0
         } else if (type == MPI_DATATYPE_NULL) {
             rc = MPI_ERR_TYPE;
-#endif
         } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
             rc = MPI_ERR_TAG;
         } else if (lam_comm_invalid(comm)) {
@@ -40,11 +40,10 @@ int MPI_Issend(void *buf, int count, MPI_Datatype type, int dest,
         } else if (request == NULL) {
             rc = MPI_ERR_REQUEST;
         }
-        if (rc != MPI_SUCCESS) {
-            return rc;
-        }
+        LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Issend");
     }
 
-    return mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_SYNCHRONOUS,comm,request);
+    rc = mca_pml.pml_isend(buf,count,type,dest,tag,MCA_PML_BASE_SEND_SYNCHRONOUS,comm,request);
+    LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Issend");
 }
 
diff --git a/src/mpi/c/recv.c b/src/mpi/c/recv.c
index 143be7faaf..4e83c2a95c 100644
--- a/src/mpi/c/recv.c
+++ b/src/mpi/c/recv.c
@@ -17,6 +17,7 @@
 int MPI_Recv(void *buf, int count, MPI_Datatype type, int source,
              int tag, MPI_Comm comm, MPI_Status *status) 
 {
+    int rc;
     if (source == MPI_PROC_NULL) {
         if (status) {
             status->MPI_SOURCE = MPI_PROC_NULL;
@@ -28,15 +29,13 @@ int MPI_Recv(void *buf, int count, MPI_Datatype type, int source,
     }
 
     if ( MPI_PARAM_CHECK ) {
-        int rc = MPI_SUCCESS;
-        if (lam_mpi_finalized) {
+        rc = MPI_SUCCESS;
+        if ( LAM_MPI_INVALID_STATE ) {
             rc = MPI_ERR_INTERN;
         } else if (count < 0) {
             rc = MPI_ERR_COUNT;
-#if 0
         } else if (type == MPI_DATATYPE_NULL) {
             rc = MPI_ERR_TYPE;
-#endif
         } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
             rc = MPI_ERR_TAG;
         } else if (lam_comm_invalid(comm)) {
@@ -44,10 +43,10 @@ int MPI_Recv(void *buf, int count, MPI_Datatype type, int source,
         } else if (source != MPI_ANY_SOURCE && lam_comm_peer_invalid(comm, source)) {
             rc = MPI_ERR_RANK;
         }
-        if (rc != MPI_SUCCESS) {
-            return rc;
-        }
+        LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Recv");
     }
 
-    return mca_pml.pml_recv(buf, count, type, source, tag, comm, status);
+    rc = mca_pml.pml_recv(buf, count, type, source, tag, comm, status);
+    LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Recv");
 }
+
diff --git a/src/mpi/c/recv_init.c b/src/mpi/c/recv_init.c
index 40676d097b..b0384b04f7 100644
--- a/src/mpi/c/recv_init.c
+++ b/src/mpi/c/recv_init.c
@@ -17,31 +17,32 @@
 int MPI_Recv_init(void *buf, int count, MPI_Datatype type, int source,
                   int tag, MPI_Comm comm, MPI_Request *request) 
 {
+    int rc;
     if (source == MPI_PROC_NULL) {
         return mca_pml.pml_null(request);
     }
-                                                                                                                       
+
     if ( MPI_PARAM_CHECK ) {
-        int rc = MPI_SUCCESS;
-        if (lam_mpi_finalized) {
+        rc = MPI_SUCCESS;
+        if ( LAM_MPI_INVALID_STATE ) {
             rc = MPI_ERR_INTERN;
         } else if (count < 0) {
             rc = MPI_ERR_COUNT;
-#if 0
         } else if (type == MPI_DATATYPE_NULL) {
             rc = MPI_ERR_TYPE;
-#endif
         } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
             rc = MPI_ERR_TAG;
         } else if (lam_comm_invalid(comm)) {
             rc = MPI_ERR_COMM;
-        } else if (source != MPI_ANY_SOURCE && source != MPI_PROC_NULL && lam_comm_peer_invalid(comm, source)) {
+        } else if (source != MPI_ANY_SOURCE && 
+                   source != MPI_PROC_NULL && 
+                   lam_comm_peer_invalid(comm, source)) {
             rc = MPI_ERR_RANK;
         }
-        if (rc != MPI_SUCCESS) {
-            return rc;
-        }
+        LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Recv_init");
     }
-    return mca_pml.pml_irecv_init(buf,count,type,source,tag,comm,request);
+
+    rc = mca_pml.pml_irecv_init(buf,count,type,source,tag,comm,request);
+    LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Recv_init");
 }
 
diff --git a/src/mpi/c/rsend.c b/src/mpi/c/rsend.c
index fa549b0ab8..8915c67a16 100644
--- a/src/mpi/c/rsend.c
+++ b/src/mpi/c/rsend.c
@@ -16,20 +16,19 @@
 
 int MPI_Rsend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm) 
 {
+    int rc;
     if (dest == MPI_PROC_NULL) {
         return MPI_SUCCESS;
     }
 
     if ( MPI_PARAM_CHECK ) {
-        int rc = MPI_SUCCESS;
-        if (lam_mpi_finalized) {
+        rc = MPI_SUCCESS;
+        if ( LAM_MPI_INVALID_STATE ) {
             rc = MPI_ERR_INTERN;
         } else if (count < 0) {
             rc = MPI_ERR_COUNT;
-#if 0
         } else if (type == MPI_DATATYPE_NULL) {
             rc = MPI_ERR_TYPE;
-#endif
         } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
             rc = MPI_ERR_TAG;
         } else if (lam_comm_invalid(comm)) {
@@ -37,10 +36,10 @@ int MPI_Rsend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Co
         } else if (lam_comm_peer_invalid(comm, dest)) {
             rc = MPI_ERR_RANK;
         }
-        if (rc != MPI_SUCCESS) {
-            return rc;
-        }
+        LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Rsend");
     }
 
-    return mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_READY, comm);
+    rc = mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_READY, comm);
+    LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Rsend");
 }
+
diff --git a/src/mpi/c/rsend_init.c b/src/mpi/c/rsend_init.c
index 59fb86fc4c..db863b1c42 100644
--- a/src/mpi/c/rsend_init.c
+++ b/src/mpi/c/rsend_init.c
@@ -18,20 +18,19 @@ int MPI_Rsend_init(void *buf, int count, MPI_Datatype type,
                    int dest, int tag, MPI_Comm comm,
                    MPI_Request *request) 
 {
+    int rc;
     if (dest == MPI_PROC_NULL) {
         return MPI_SUCCESS;
     }
 
     if ( MPI_PARAM_CHECK ) {
-        int rc = MPI_SUCCESS;
-        if (lam_mpi_finalized) {
+        rc = MPI_SUCCESS;
+        if ( LAM_MPI_INVALID_STATE ) {
             rc = MPI_ERR_INTERN;
         } else if (count < 0) {
             rc = MPI_ERR_COUNT;
-#if 0
         } else if (type == MPI_DATATYPE_NULL) {
             rc = MPI_ERR_TYPE;
-#endif
         } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
             rc = MPI_ERR_TAG;
         } else if (lam_comm_invalid(comm)) {
@@ -41,11 +40,10 @@ int MPI_Rsend_init(void *buf, int count, MPI_Datatype type,
         } else if (request == NULL) {
             rc = MPI_ERR_REQUEST;
         }
-        if (rc != MPI_SUCCESS) {
-            return rc;
-        }
+        LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Rsend_init");
     }
 
-    return mca_pml.pml_isend_init(buf, count, type, dest, tag, MCA_PML_BASE_SEND_READY, comm, request);;
+    rc =  mca_pml.pml_isend_init(buf,count,type,dest,tag,MCA_PML_BASE_SEND_READY,comm,request);
+    LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Rsend_init");
 }
 
diff --git a/src/mpi/c/send.c b/src/mpi/c/send.c
index 0aa887d4c6..448cd35386 100644
--- a/src/mpi/c/send.c
+++ b/src/mpi/c/send.c
@@ -17,20 +17,19 @@
 int MPI_Send(void *buf, int count, MPI_Datatype type, int dest,
              int tag, MPI_Comm comm) 
 {
+    int rc;
     if (dest == MPI_PROC_NULL) {
         return MPI_SUCCESS;
     }
 
     if ( MPI_PARAM_CHECK ) {
         int rc = MPI_SUCCESS;
-        if (lam_mpi_finalized) {
+        if ( LAM_MPI_INVALID_STATE ) {
             rc = MPI_ERR_INTERN;
         } else if (count < 0) {
             rc = MPI_ERR_COUNT;
-#if 0
         } else if (type == MPI_DATATYPE_NULL) {
             rc = MPI_ERR_TYPE;
-#endif
         } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
             rc = MPI_ERR_TAG;
         } else if (lam_comm_invalid(comm)) {
@@ -38,11 +37,10 @@ int MPI_Send(void *buf, int count, MPI_Datatype type, int dest,
         } else if (lam_comm_peer_invalid(comm, dest)) {
             rc = MPI_ERR_RANK;
         }
-        if (rc != MPI_SUCCESS) {
-            return rc;
-        }
+        LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Send");
     }
 
-    return mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_STANDARD, comm);
+    rc = mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_STANDARD, comm);
+    LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Send");
 }
 
diff --git a/src/mpi/c/send_init.c b/src/mpi/c/send_init.c
index 62bb700cc9..09818d66c9 100644
--- a/src/mpi/c/send_init.c
+++ b/src/mpi/c/send_init.c
@@ -18,20 +18,19 @@ int MPI_Send_init(void *buf, int count, MPI_Datatype type,
                   int dest, int tag, MPI_Comm comm,
                   MPI_Request *request) 
 {
+    int rc;
     if (dest == MPI_PROC_NULL) {
         return MPI_SUCCESS;
     }
 
     if ( MPI_PARAM_CHECK ) {
-        int rc = MPI_SUCCESS;
-        if (lam_mpi_finalized) {
+        rc = MPI_SUCCESS;
+        if ( LAM_MPI_INVALID_STATE ) {
             rc = MPI_ERR_INTERN;
         } else if (count < 0) {
             rc = MPI_ERR_COUNT;
-#if 0
         } else if (type == MPI_DATATYPE_NULL) {
             rc = MPI_ERR_TYPE;
-#endif
         } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
             rc = MPI_ERR_TAG;
         } else if (lam_comm_invalid(comm)) {
@@ -41,11 +40,10 @@ int MPI_Send_init(void *buf, int count, MPI_Datatype type,
         } else if (request == NULL) {
             rc = MPI_ERR_REQUEST;
         }
-        if (rc != MPI_SUCCESS) {
-            return rc;
-        }
+        LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Send_init");
     }
 
-    return mca_pml.pml_isend_init(buf, count, type, dest, tag, MCA_PML_BASE_SEND_STANDARD, comm, request);;
+    rc = mca_pml.pml_isend_init(buf,count,type,dest,tag,MCA_PML_BASE_SEND_STANDARD,comm,request);
+    LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Send_init");
 }
 
diff --git a/src/mpi/c/sendrecv.c b/src/mpi/c/sendrecv.c
index 3b6b4d5160..798a24600e 100644
--- a/src/mpi/c/sendrecv.c
+++ b/src/mpi/c/sendrecv.c
@@ -5,7 +5,10 @@
 #include <stdio.h>
 
 #include "mpi.h"
+#include "runtime/runtime.h"
 #include "mpi/c/bindings.h"
+#include "mca/pml/pml.h"
+
 
 #if LAM_HAVE_WEAK_SYMBOLS && LAM_PROFILING_DEFINES
 #pragma weak MPI_Sendrecv = PMPI_Sendrecv
@@ -14,6 +17,60 @@
 int MPI_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype recvtype,
                  int dest, int sendtag, void *recvbuf, int recvcount,
                  MPI_Datatype sendtype, int source, int recvtag,
-                 MPI_Comm comm,  MPI_Status *status) {
-    return MPI_SUCCESS;
+                 MPI_Comm comm,  MPI_Status *status) 
+{
+    lam_request_t* req;
+    int rc;
+
+    if ( MPI_PARAM_CHECK ) {
+        rc = MPI_SUCCESS;
+        if ( LAM_MPI_INVALID_STATE ) {
+            rc = MPI_ERR_INTERN;
+        } else if (lam_comm_invalid(comm)) {
+            rc = MPI_ERR_COMM;
+        } else if (sendcount < 0) {
+            rc = MPI_ERR_COUNT;
+        } else if (sendtype == MPI_DATATYPE_NULL) {
+            rc = MPI_ERR_TYPE;
+        } else if (lam_comm_peer_invalid(comm, dest)) {
+            rc = MPI_ERR_RANK;
+        } else if (sendtag < 0 || sendtag > MPI_TAG_UB_VALUE) {
+            rc = MPI_ERR_TAG;
+        } else if (recvcount < 0) {
+            rc = MPI_ERR_COUNT;
+        } else if (recvtype == MPI_DATATYPE_NULL) {
+            rc = MPI_ERR_TYPE;
+        } else if (source != MPI_PROC_NULL && source != MPI_ANY_SOURCE && lam_comm_peer_invalid(comm, source)) {
+            rc = MPI_ERR_RANK;
+        } else if (recvtag < MPI_ANY_TAG || recvtag > MPI_TAG_UB_VALUE) {
+            rc = MPI_ERR_TAG;
+        }
+        LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Sendrecv");
+    }
+
+    if (source != MPI_PROC_NULL) { /* post recv */
+        rc = mca_pml.pml_irecv(recvbuf, recvcount, recvtype,
+                           source, recvtag, comm, &req);
+        LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Sendrecv");
+    }
+
+    if (dest != MPI_PROC_NULL) { /* send */
+        rc = mca_pml.pml_send(sendbuf, sendcount, sendtype, dest,
+                           sendtag, MCA_PML_BASE_SEND_STANDARD, comm);
+        LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Sendrecv");
+    }
+
+    if (source != MPI_PROC_NULL) { /* wait for recv */
+ 
+        rc = mca_pml.pml_wait(1, &req, NULL, status);
+
+    } else {
+
+        status->MPI_ERROR = MPI_SUCCESS;
+        status->MPI_SOURCE = MPI_PROC_NULL;
+        status->MPI_TAG = MPI_ANY_TAG;
+        status->_count = 0;
+        rc = MPI_SUCCESS;
+    }
+    LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Sendrecv");
 }
diff --git a/src/mpi/c/sendrecv_replace.c b/src/mpi/c/sendrecv_replace.c
index e3268788ad..f9895c4d28 100644
--- a/src/mpi/c/sendrecv_replace.c
+++ b/src/mpi/c/sendrecv_replace.c
@@ -13,6 +13,7 @@
 
 int MPI_Sendrecv_replace(void * buf, int count, MPI_Datatype datatype,
                          int dest, int sendtag, int source, int recvtag,
-                         MPI_Comm comm, MPI_Status *status) {
+                         MPI_Comm comm, MPI_Status *status) 
+{
     return MPI_SUCCESS;
 }
diff --git a/src/mpi/c/ssend.c b/src/mpi/c/ssend.c
index d3cd42f575..29dbda4e06 100644
--- a/src/mpi/c/ssend.c
+++ b/src/mpi/c/ssend.c
@@ -16,20 +16,19 @@
 
 int MPI_Ssend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Comm comm) 
 {
+    int rc;
     if (dest == MPI_PROC_NULL) {
         return MPI_SUCCESS;
     }
 
     if ( MPI_PARAM_CHECK ) {
-        int rc = MPI_SUCCESS;
-        if (lam_mpi_finalized) {
+        rc = MPI_SUCCESS;
+        if ( LAM_MPI_INVALID_STATE ) {
             rc = MPI_ERR_INTERN;
         } else if (count < 0) {
             rc = MPI_ERR_COUNT;
-#if 0
         } else if (type == MPI_DATATYPE_NULL) {
             rc = MPI_ERR_TYPE;
-#endif
         } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
             rc = MPI_ERR_TAG;
         } else if (lam_comm_invalid(comm)) {
@@ -37,11 +36,10 @@ int MPI_Ssend(void *buf, int count, MPI_Datatype type, int dest, int tag, MPI_Co
         } else if (lam_comm_peer_invalid(comm, dest)) {
             rc = MPI_ERR_RANK;
         }
-        if (rc != MPI_SUCCESS) {
-            return rc;
-        }
+        LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Ssend");
     }
 
-    return mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_SYNCHRONOUS, comm);
+    rc = mca_pml.pml_send(buf, count, type, dest, tag, MCA_PML_BASE_SEND_SYNCHRONOUS, comm);
+    LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Ssend");
 }
 
diff --git a/src/mpi/c/ssend_init.c b/src/mpi/c/ssend_init.c
index 08f2386a41..684a6bbc8a 100644
--- a/src/mpi/c/ssend_init.c
+++ b/src/mpi/c/ssend_init.c
@@ -18,20 +18,19 @@ int MPI_Ssend_init(void *buf, int count, MPI_Datatype type,
                    int dest, int tag, MPI_Comm comm,
                    MPI_Request *request) 
 {
+    int rc;
     if (dest == MPI_PROC_NULL) {
         return MPI_SUCCESS;
     }
 
     if ( MPI_PARAM_CHECK ) {
-        int rc = MPI_SUCCESS;
-        if (lam_mpi_finalized) {
+        rc = MPI_SUCCESS;
+        if ( LAM_MPI_INVALID_STATE ) {
             rc = MPI_ERR_INTERN;
         } else if (count < 0) {
             rc = MPI_ERR_COUNT;
-#if 0
         } else if (type == MPI_DATATYPE_NULL) {
             rc = MPI_ERR_TYPE;
-#endif
         } else if (tag < 0 || tag > MPI_TAG_UB_VALUE) {
             rc = MPI_ERR_TAG;
         } else if (lam_comm_invalid(comm)) {
@@ -41,10 +40,10 @@ int MPI_Ssend_init(void *buf, int count, MPI_Datatype type,
         } else if (request == NULL) {
             rc = MPI_ERR_REQUEST;
         }
-        if (rc != MPI_SUCCESS) {
-            return rc;
-        }
+        LAM_ERRHANDLER_CHECK(rc, comm, rc, "MPI_Ssend_init");
     }
 
-    return mca_pml.pml_isend_init(buf, count, type, dest, tag, MCA_PML_BASE_SEND_SYNCHRONOUS, comm, request);;
+    rc = mca_pml.pml_isend_init(buf,count,type,dest,tag,MCA_PML_BASE_SEND_SYNCHRONOUS,comm,request);
+    LAM_ERRHANDLER_RETURN(rc, comm, rc, "MPI_Ssend_init");
 }
+
diff --git a/src/mpi/c/start.c b/src/mpi/c/start.c
index 14ac3db83f..1b68185dce 100644
--- a/src/mpi/c/start.c
+++ b/src/mpi/c/start.c
@@ -27,6 +27,6 @@ int MPI_Start(MPI_Request *request)
             return rc;
         }
     }
-    return mca_pml.pml_start(request);
+    return mca_pml.pml_start(1, request);
 }
 
diff --git a/src/mpi/c/startall.c b/src/mpi/c/startall.c
index 4f06e2a06b..f3b646bff8 100644
--- a/src/mpi/c/startall.c
+++ b/src/mpi/c/startall.c
@@ -5,12 +5,29 @@
 #include <stdio.h>
 
 #include "mpi.h"
+#include "runtime/runtime.h"
 #include "mpi/c/bindings.h"
+#include "mca/pml/pml.h"
+
 
 #if LAM_HAVE_WEAK_SYMBOLS && LAM_PROFILING_DEFINES
 #pragma weak MPI_Startall = PMPI_Startall
 #endif
 
-int MPI_Startall(int count, MPI_Request *array_of_requests) {
-    return MPI_SUCCESS;
+
+int MPI_Startall(int count, MPI_Request *requests) 
+{
+    if ( MPI_PARAM_CHECK ) {
+        int rc = MPI_SUCCESS;
+        if (lam_mpi_finalized) {
+            rc = MPI_ERR_INTERN;
+        } else if (requests == NULL) {
+            rc = MPI_ERR_REQUEST;
+        }
+        if (rc != MPI_SUCCESS) {
+            return rc;
+        }
+    }
+    return mca_pml.pml_start(count, requests);
 }
+
diff --git a/src/mpi/c/test.c b/src/mpi/c/test.c
index 47a881b35b..30ed0609bf 100644
--- a/src/mpi/c/test.c
+++ b/src/mpi/c/test.c
@@ -16,18 +16,17 @@
 
 int MPI_Test(MPI_Request *request, int *completed, MPI_Status *status) 
 {
+    int rc, index;
     if ( MPI_PARAM_CHECK ) {
-        int rc = MPI_SUCCESS;
-        if ( lam_mpi_finalized ) {
+        rc = MPI_SUCCESS;
+        if ( LAM_MPI_INVALID_STATE ) {
             rc = MPI_ERR_INTERN;
         } else if (request == NULL) {
             rc = MPI_ERR_REQUEST;
         } else if (completed == NULL) {
             rc = MPI_ERR_ARG;
         }
-        if (rc != MPI_SUCCESS) {
-            return rc;
-        }
+        LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Test");
     }
 
     if(*request == NULL) {
@@ -38,6 +37,9 @@ int MPI_Test(MPI_Request *request, int *completed, MPI_Status *status)
         status->_count = 0;
         return MPI_SUCCESS;
     }
-    return mca_pml.pml_test(request,completed,status);
+    rc = mca_pml.pml_test(1, request, index, completed, status);
+    if(completed < 0)
+        completed = 0;
+    LAM_ERRHANDLER_RETURN(rc, (lam_communicator_t*)NULL, rc, "MPI_Test");
 }
 
diff --git a/src/mpi/c/testall.c b/src/mpi/c/testall.c
index 536149432e..1c605d3a8e 100644
--- a/src/mpi/c/testall.c
+++ b/src/mpi/c/testall.c
@@ -12,7 +12,7 @@
 #endif
 
 int MPI_Testall(int count, MPI_Request array_of_requests[], int *flag,
-                MPI_Status array_of_statuses[]) {
-
+                MPI_Status array_of_statuses[]) 
+{
     return MPI_SUCCESS;
 }
diff --git a/src/mpi/c/testany.c b/src/mpi/c/testany.c
index 1351491f55..43613bde66 100644
--- a/src/mpi/c/testany.c
+++ b/src/mpi/c/testany.c
@@ -5,13 +5,30 @@
 #include <stdio.h>
 
 #include "mpi.h"
+#include "runtime/runtime.h"
 #include "mpi/c/bindings.h"
+#include "mca/pml/pml.h"
 
 #if LAM_HAVE_WEAK_SYMBOLS && LAM_PROFILING_DEFINES
 #pragma weak MPI_Testany = PMPI_Testany
 #endif
 
-int MPI_Testany(int count, MPI_Request array_of_requests[], int *index,
-                MPI_Status *status) {
-    return MPI_SUCCESS;
+int MPI_Testany(int count, MPI_Request requests[], int *index, int *completed, MPI_Status *status) 
+{
+    int rc;
+    if ( MPI_PARAM_CHECK ) {
+        int rc = MPI_SUCCESS;
+        if ( LAM_MPI_INVALID_STATE ) {
+            rc = MPI_ERR_INTERN;
+        } else if (NULL == requests) {
+            rc = MPI_ERR_REQUEST;
+        } else if (NULL == index) {
+            rc = MPI_ERR_ARG;
+        }
+        LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Testany");
+    }
+                                                                                                                        
+    rc = mca_pml.pml_test(count, requests, index, completed, status);
+    LAM_ERRHANDLER_RETURN(rc, (lam_communicator_t*)NULL, rc, "MPI_Testany");
 }
+
diff --git a/src/mpi/c/testsome.c b/src/mpi/c/testsome.c
index 90911368d9..b84cc232ae 100644
--- a/src/mpi/c/testsome.c
+++ b/src/mpi/c/testsome.c
@@ -5,14 +5,40 @@
 #include <stdio.h>
 
 #include "mpi.h"
+#include "runtime/runtime.h"
 #include "mpi/c/bindings.h"
+#include "mca/pml/pml.h"
 
 #if LAM_HAVE_WEAK_SYMBOLS && LAM_PROFILING_DEFINES
 #pragma weak MPI_Testsome = PMPI_Testsome
 #endif
 
-int MPI_Testsome(int incount, MPI_Request array_of_requests[],
-                 int *outcount, int array_of_indices,
-                 MPI_Status array_of_statuses) {
+int MPI_Testsome(int incount, MPI_Request requests[],
+                 int *outcount, int indices[],
+                 MPI_Status statuses[]) 
+{
+    int rc, index, completed;
+    if ( MPI_PARAM_CHECK ) {
+        int rc = MPI_SUCCESS;
+        if ( LAM_MPI_INVALID_STATE ) {
+            rc = MPI_ERR_INTERN;
+        } else if (NULL == requests) {
+            rc = MPI_ERR_REQUEST;
+        } else if (NULL == indices) {
+            rc = MPI_ERR_ARG;
+        }
+        LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Testsome");
+    }
+                                                                                                                        
+    /* optimize this in the future */
+    rc = mca_pml.pml_test(incount, requests, &index, &completed, statuses);
+    LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Testsome");
+    if(completed) {
+        *outcount = 1;
+        indices[0] = index;
+    } else {
+        *outcount = 0;
+    }
     return MPI_SUCCESS;
 }
+
diff --git a/src/mpi/c/topo_test.c b/src/mpi/c/topo_test.c
index 7f51ac47ed..623fc7430a 100644
--- a/src/mpi/c/topo_test.c
+++ b/src/mpi/c/topo_test.c
@@ -11,6 +11,7 @@
 #pragma weak MPI_Topo_test = PMPI_Topo_test
 #endif
 
-int MPI_Topo_test(MPI_Comm comm, int *status) {
+int MPI_Topo_test(MPI_Comm comm, int *status) 
+{
     return MPI_SUCCESS;
 }
diff --git a/src/mpi/c/wait.c b/src/mpi/c/wait.c
index bc1da906b5..4ded5841b0 100644
--- a/src/mpi/c/wait.c
+++ b/src/mpi/c/wait.c
@@ -15,26 +15,27 @@
 
 int MPI_Wait(MPI_Request *request, MPI_Status *status) 
 {
-    int index;
+    int index, rc;
     if ( MPI_PARAM_CHECK ) {
-        int rc = MPI_SUCCESS;
-        if (lam_mpi_finalized) {
+        rc = MPI_SUCCESS;
+        if ( LAM_MPI_INVALID_STATE ) {
             rc = MPI_ERR_INTERN;
         } else if (request == NULL) {
             rc = MPI_ERR_REQUEST;
         }
-        if (rc != MPI_SUCCESS) {
-            return rc;
-        }
+        LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Wait");
     }
 
-    if (*request == NULL) {
-        status->MPI_SOURCE = MPI_PROC_NULL;
-        status->MPI_TAG = MPI_ANY_TAG;
-        status->MPI_ERROR = MPI_SUCCESS;
-        status->_count = 0;
+    if (NULL == *request) {
+        if (NULL != status) {
+            status->MPI_SOURCE = MPI_PROC_NULL;
+            status->MPI_TAG = MPI_ANY_TAG;
+            status->MPI_ERROR = MPI_SUCCESS;
+            status->_count = 0;
+        }
         return MPI_SUCCESS;
     }
-    return mca_pml.pml_wait(1, request, &index, status);
+    rc = mca_pml.pml_wait(1, request, &index, status);
+    LAM_ERRHANDLER_RETURN(rc, (lam_communicator_t*)NULL, rc, "MPI_Wait");
 }
 
diff --git a/src/mpi/c/waitall.c b/src/mpi/c/waitall.c
index d14c7da31f..0e397109f6 100644
--- a/src/mpi/c/waitall.c
+++ b/src/mpi/c/waitall.c
@@ -16,17 +16,17 @@
 
 int MPI_Waitall(int count, MPI_Request *requests, MPI_Status *statuses) 
 {
+    int rc;
     if ( MPI_PARAM_CHECK ) {
-        int rc = MPI_SUCCESS;
-        if (lam_mpi_finalized) {
+        rc = MPI_SUCCESS;
+        if ( LAM_MPI_INVALID_STATE ) {
             rc = MPI_ERR_INTERN;
         } else if (requests == NULL) {
             rc = MPI_ERR_REQUEST;
         }
-        if (rc != MPI_SUCCESS) {
-            return rc;
-        }
+        LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Waitall");
     }
-    return mca_pml.pml_wait_all(count, requests, statuses);
+    rc = mca_pml.pml_wait_all(count, requests, statuses);
+    LAM_ERRHANDLER_RETURN(rc, (lam_communicator_t*)NULL, rc, "MPI_Waitall");
 }
 
diff --git a/src/mpi/c/waitany.c b/src/mpi/c/waitany.c
index f01473ea19..89d331dc5c 100644
--- a/src/mpi/c/waitany.c
+++ b/src/mpi/c/waitany.c
@@ -15,17 +15,17 @@
 
 int MPI_Waitany(int count, MPI_Request *requests, int *index, MPI_Status *status) 
 {
+    int rc;
     if ( MPI_PARAM_CHECK ) {
-        int rc = MPI_SUCCESS;
-        if (lam_mpi_finalized) {
+        rc = MPI_SUCCESS;
+        if ( LAM_MPI_INVALID_STATE ) {
             rc = MPI_ERR_INTERN;
         } else if (requests == NULL) {
             rc = MPI_ERR_REQUEST;
         }
-        if (rc != MPI_SUCCESS) {
-            return rc;
-        }
+        LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Waitany");
     }
-    return mca_pml.pml_wait(count, requests, index, status);
+    rc = mca_pml.pml_wait(count, requests, index, status);
+    LAM_ERRHANDLER_RETURN(rc, (lam_communicator_t*)NULL, rc, "MPI_Waitany");
 }
 
diff --git a/src/mpi/c/waitsome.c b/src/mpi/c/waitsome.c
index 4f745765ed..81cebdfbdc 100644
--- a/src/mpi/c/waitsome.c
+++ b/src/mpi/c/waitsome.c
@@ -29,14 +29,12 @@ int MPI_Waitsome(int incount, MPI_Request *requests,
         } else if (requests == NULL) {
             rc = MPI_ERR_REQUEST;
         }
-        if (rc != MPI_SUCCESS) {
-            return rc;
-        }
+        LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Waitsome");
     }
 
     /* optimize this in the future */
-    if((rc = mca_pml.pml_wait(incount, requests, &index, statuses)) != LAM_SUCCESS)
-        return rc;
+    rc = mca_pml.pml_wait(incount, requests, &index, statuses);
+    LAM_ERRHANDLER_CHECK(rc, (lam_communicator_t*)NULL, rc, "MPI_Waitsome");
     *outcount = 1;
     indices[0] = index;
     return MPI_SUCCESS;
diff --git a/src/proc/proc.c b/src/proc/proc.c
index 243773eab6..8d65da25c6 100644
--- a/src/proc/proc.c
+++ b/src/proc/proc.c
@@ -32,6 +32,10 @@ void lam_proc_construct(lam_proc_t* proc)
     proc->proc_vpid = 0;
     proc->proc_pml = NULL;
     proc->proc_modex = NULL;
+    proc->proc_arch = 0;
+
+    /* FIX - need to determine remote process architecture */
+    proc->proc_convertor = lam_convertor_create(0, 0);
 
     THREAD_LOCK(&lam_proc_lock);
     lam_list_append(&lam_proc_list, (lam_list_item_t*)proc);
diff --git a/src/proc/proc.h b/src/proc/proc.h
index ccf28c9833..cc8ca89bf9 100644
--- a/src/proc/proc.h
+++ b/src/proc/proc.h
@@ -7,6 +7,7 @@
 
 #include "include/types.h"
 #include "lfc/lam_list.h"
+#include "datatype/datatype.h"
 
 
 extern lam_class_t lam_proc_t_class;
@@ -18,6 +19,8 @@ struct lam_proc_t {
     uint32_t                  proc_vpid;   /* process identifier w/in the job */
     struct mca_pml_proc_t*    proc_pml;    /* PML specific proc data */
     struct mca_base_modex_t*  proc_modex;  /* MCA module exchange data */
+    int                       proc_arch;
+    lam_convertor_t*          proc_convertor;
 
   /* JMS: need to have the following information:
 
diff --git a/src/runtime/lam_mpi_init.c b/src/runtime/lam_mpi_init.c
index ecae0c65d6..6aac602e91 100644
--- a/src/runtime/lam_mpi_init.c
+++ b/src/runtime/lam_mpi_init.c
@@ -142,5 +142,6 @@ int lam_mpi_init(int argc, char **argv, int requested, int *provided)
     /* All done */
 
     lam_mpi_initialized = true;
+    lam_mpi_finalized = false;
     return MPI_SUCCESS;
 }
diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h
index 2a4e237645..64fba01eb0 100644
--- a/src/runtime/runtime.h
+++ b/src/runtime/runtime.h
@@ -12,8 +12,11 @@
  * Global variables and symbols for the MPI layer
  */
 
+#define LAM_MPI_INVALID_STATE (!lam_mpi_initialized || lam_mpi_finalized)
+
 extern bool lam_mpi_initialized;
 extern bool lam_mpi_finalized;
+extern bool lam_mpi_invalid_state;
 
 extern bool lam_mpi_thread_multiple;
 extern int lam_mpi_thread_requested;