Merge pull request #5933 from hppritcha/topic/remove_bfo_pml

remove the bfo pml
2018-10-17 09:39:58 -06:00 · 2018-10-17 09:39:58 -06:00 · a435bfe1cf
--- a/ompi/mca/pml/bfo/.opal_ignore
+++ b/ompi/mca/pml/bfo/.opal_ignore
--- a/ompi/mca/pml/bfo/Makefile.am
+++ b/ompi/mca/pml/bfo/Makefile.am
@ -1,78 +0,0 @@
-#
-# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
-#                         University Research and Technology
-#                         Corporation.  All rights reserved.
-# Copyright (c) 2004-2005 The University of Tennessee and The University
-#                         of Tennessee Research Foundation.  All rights
-#                         reserved.
-# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
-#                         University of Stuttgart.  All rights reserved.
-# Copyright (c) 2004-2005 The Regents of the University of California.
-#                         All rights reserved.
-# Copyright (c) 2009-2010 Oracle and/or its affiliates.  All rights reserved.
-# Copyright (c) 2009-2010 Cisco Systems, Inc.  All rights reserved.
-#
-# Copyright (c) 2017      IBM Corporation.  All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-AM_CPPFLAGS = -DPML_BFO=1
-
-dist_ompidata_DATA = \
-	help-mpi-pml-bfo.txt
-
-EXTRA_DIST = post_configure.sh
-
-bfo_sources  = \
-	pml_bfo.c \
-	pml_bfo.h \
-	pml_bfo_comm.c \
-	pml_bfo_comm.h \
-	pml_bfo_component.c \
-	pml_bfo_component.h \
-	pml_bfo_failover.c \
-	pml_bfo_failover.h \
-	pml_bfo_hdr.h \
-	pml_bfo_iprobe.c \
-	pml_bfo_irecv.c \
-	pml_bfo_isend.c \
-	pml_bfo_progress.c \
-	pml_bfo_rdma.c \
-	pml_bfo_rdma.h \
-	pml_bfo_rdmafrag.c \
-	pml_bfo_rdmafrag.h \
-	pml_bfo_recvfrag.c \
-	pml_bfo_recvfrag.h \
-	pml_bfo_recvreq.c \
-	pml_bfo_recvreq.h \
-	pml_bfo_sendreq.c \
-	pml_bfo_sendreq.h \
-	pml_bfo_start.c
-
-# If we have CUDA support requested, build the CUDA file also
-if OPAL_cuda_support
-bfo_sources += \
-    pml_bfo_cuda.c
-endif
-
-if MCA_BUILD_ompi_pml_bfo_DSO
-component_noinst =
-component_install = mca_pml_bfo.la
-else
-component_noinst = libmca_pml_bfo.la
-component_install =
-endif
-
-mcacomponentdir = $(ompilibdir)
-mcacomponent_LTLIBRARIES = $(component_install)
-mca_pml_bfo_la_SOURCES = $(bfo_sources)
-mca_pml_bfo_la_LDFLAGS = -module -avoid-version
-mca_pml_bfo_la_LIBADD = $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la
-
-noinst_LTLIBRARIES = $(component_noinst)
-libmca_pml_bfo_la_SOURCES = $(bfo_sources)
-libmca_pml_bfo_la_LDFLAGS = -module -avoid-version
--- a/ompi/mca/pml/bfo/README
+++ b/ompi/mca/pml/bfo/README
@ -1,340 +0,0 @@
-Copyright (c) 2010 Oracle and/or its affiliates.  All rights reserved.
-
-BFO DESIGN DOCUMENT
-This document describes the use and design of the bfo.  In addition,
-there is a section at the end explaining why this functionality was
-not merged into the ob1 PML.
-
-1. GENERAL USAGE
-First, one has to configure the failover code into the openib BTL so
-that bfo will work correctly.  To do this:
-configure --enable-btl-openib-failover.
-
-Then, when running one needs to select the bfo PML explicitly.
-mpirun --mca pml bfo
-
-Note that one needs to both configure with --enable-btl-openib-failover
-and run with --mca pml bfo to get the failover support.  If one of
-these two steps is skipped, then the MPI job will just abort in the
-case of an error like it normally does with the ob1 PML.
-
-2. GENERAL FUNCTION
-The bfo failover feature requires two or more openib BTLs in use.  In
-normal operation, it will stripe the communication over the multiple
-BTLs.  When an error is detected, it will stop using the BTL that
-incurred the error and continue the communication over the remaining
-BTL.  Once a BTL has been mapped out, it cannot be used by the job
-again, even if the underlying fabric becomes functional again.  Only
-new jobs started after the fabric comes back up will use both BTLs.
-
-The bfo works in conjunction with changes that were made in the openib
-BTL.  As noted above, those changes need to be configured into the
-BTL for everything to work properly.
-
-The bfo only fails over between openib BTLs.  It cannot failover from
-an openib BTL to TCP, for example.
-
-3. GENERAL DESIGN
-The bfo (Btl FailOver) PML was designed to work in clusters that have
-multiple openib BTLs.  It was designed to be lightweight so as to
-avoid any adverse effects on latency.  To that end, there is no
-tracking of fragments or messages in the bfo PML.  Rather, it depends
-on the underlying BTL to notify it of each fragment that has an error.
-The bfo then decides what needs to be done based on the type of
-fragment that gets an error.
-
-No additional sequence numbers were introduced in the bfo.  Instead,
-it makes use of the sequence numbers that exist in the MATCH, RNDV and
-RGET fragment header.  In that way, duplicate fragments that have
-MATCH information in them can be detected.  Other fragments, like PUT
-and ACK, are never retransmitted so it does not matter that they do
-not have sequence numbers.  The FIN header was a special case in that
-it was changed to include the MATCH header so that the tag, source,
-and context fields could be used to check for duplicate FINs.
-
-Note that the assumption is that the underlying BTL will always issue
-a callback with an error flag when it thinks a fragment has an error.
-This means that even after an error is detected on a BTL, the BTL
-continues to be checked for any other messages that may also complete
-with an error.  This is potentially a unique characteristic of the
-openib BTL when running over RC connections that allows the BFO to
-work properly.
-
-One scenario that is particularly difficult to handle is the case
-where a fragment has an error but the message actually makes it to the
-other side.  It is because of this that all fragments need to be
-checked to make sure they are not a duplicate.  This scenario also
-complicates some of the rendezvous protocols as the two sides may not
-agree where the problem occurred.  For example, one can imagine a
-sender getting an error on a final FIN message, but the FIN message
-actually arrives at the other side.  The receiver thinks the
-communication is done and moves on.  The sender thinks there was a
-problem, and that the communication needs to restart.
-
-It is also important to note that a message cannot signal a successful
-completion and *not* make it to the receiver.  This would probably cause
-the bfo to hang.
-
-4. ERRORS
-Errors are detected in the openib BTL layer and propagated to the PML
-layer.  Typically, the errors occur while polling the completion
-queue, but can happen in other areas as well.  When an error occurs,
-an additional callback is called so the PML can map out the connection
-for future sending.  Then the callback associated with the fragment is
-called, but with the error field set to OMPI_ERROR.  This way, the PML
-knows that this fragment may not have made it to the remote side.
-
-The first callback into the PML is via the mca_pml_bfo_error_handler()
-callback and the PML uses this to remove a connection for future
-sending.  If the error_proc_t field is NULL, then the entire BTL is
-removed for any future communication.  If the error_proc_t is not
-NULL, then the BTL is only removed for the connection associated with
-the error_proc_t.
-
-The second callback is the standard one for a completion event, and
-this can trigger various activities in the PML.  The regular callback
-function is called but the status is set to OMPI_ERROR.  The PML layer
-detects this and calls some failover specific routines depending on
-the type of fragment that got the error.
-
-
-5. RECOVERY OF MATCH FRAGMENTS
-Note: For a general description of how the various fragments interact,
-see Appendix 1 at the end of this document.
-
-In the case of a MATCH fragment, the fragment is simply resent.  Care
-has to be taken with a MATCH fragment that is sent via the standard
-interface and one that is sent via the sendi interface.  In the
-standard send, the send request is still available and is therefore
-reset reused to send the MATCH fragment.  In the case of the sendi
-fragment, the send request is gone, so the fragment is regenerated
-from the information contained within the fragment.
-
-6. RECOVERY OF RNDV or LARGE MESSAGE RDMA
-In the case of a large message RDMA transfer or a RNDV transfer where
-the message consists of several fragments, the restart is a little
-more complicated.  This includes fragments like RNDV, PUT, RGET, FRAG,
-FIN, and RDMA write and RDMA read completions. In most cases, the
-requests associated with these fragments are reset and restarted.
-
-First, it should be pointed out that a new variable was added to the
-send and receive requests.  This variable tracks outstanding send
-events that have not yet received their completion events.  This new
-variable is used so that a request is not restarted until all the
-outstanding events have completed.  If one does not wait for the
-outstanding events to complete, then one may restart a request and
-then a completion event will happen on the wrong request.
-
-There is a second variable added to each request and that is one that
-shows whether the request is already in an error state.  When a request
-reaches the state that it has an error flagged on it and the outstanding
-completion events are down to zero, it can start the restart dance
-as described below.
-
-7. SPECIAL CASE FOR FIN FRAGMENT
-Like the MATCH fragment, the FIN message is also simply resent.  Like
-the sendi MATCH fragment, there may be no request associated with the
-FIN message when it gets an error, so the fragment is recreated from
-the information in the fragment.  The FIN fragment was modified to
-have additional information like what is in a MATCH fragment including
-the context, source, and tag.  In this way, we can figure out if the
-FIN message is a duplicate on the receiving side.
-
-8. RESTART DANCE
-When the bfo determines that there are no outstanding completion events,
-a restart dance is initiated.  There are four new PML message types that
-have been created to participate in the dance.
-  1. RNDVRESTARTNOTIFY
-  2. RECVERRNOTIFY
-  3. RNDVRESTARTACK
-  4. RNDVRESTARTNACK
-
-When the send request is in an error state and the outstanding
-completion events is zero, RNDVRESTARTNOTIFY is sent from the sender
-to the receiver to let it know that the communication needs to be
-restarted.  Upon receipt of the RNDVRESTARTNOTIFY, the receiver first
-checks to make sure that it is still pointing to a valid receiver
-request.  If so, it marks the receive request in error.  It then
-checks to see if there are any outstanding completion events on the
-receiver.  If there are no outstanding completion events, the receiver
-sends the RNDVRESTARTACK.  If there are outstanding completion events,
-then the RNDVRESTARTACK gets sent later when a completion event occurs
-that brings the outstanding event count to zero.
-
-In the case that the receiver determines that it is no longer looking
-at a valid receive request, which means the request is complete, the
-receiver responds with a RNDVRESTARTNACK.  While rare, this case can
-happen for example, when a final FRAG message triggers an error on the
-sender, but actually makes it to the receiver.
-
-The RECVERRNOTIFY fragment is used so the receiver can let the sender
-sender know that it had an error.  The sender then waits for all of
-its completion events, and then sends a RNDVRESTARTNOTIFY.
-
-All the handling of these new messages is contained in the
-pml_bfo_failover files.
-
-9. BTL SUPPORT
-The openib BTL also supplies a lot of support for the bfo PML.  First,
-fragments can be stored in the BTL during normal operation if
-resources become scarce.  This means that when an error is detected in
-the BTL, it needs to scour its internal queues for fragments that are
-destined for the BTL and error them out.  The function
-error_out_all_pending_frags() takes care of this functionality.  And
-some of the fragments stored can be coalesced, so care has to be taken
-to tease out each message from a coalesced fragment.
-
-There is also some special code in the BTL to handle some strange
-occurrences that were observed in the BTL.  First, there are times
-where only one half of the connection gets an error.  This can result
-in a mismatch between what the PML thinks is available to it and can
-cause hangs.  Therefore, when a BTL detects an error, it sends a
-special message down the working BTL connection to tell the remote
-side that it needs to be brought down as well.
-
-Secondly, it has been observed that a message can get stuck in the
-eager RDMA connection between two BTLs.  In this case, an error is
-detected on one side, but the other side never sees the message.
-Therefore, a special message is sent to the other side telling it to
-move along in the eager RDMA connection.  This is all somewhat
-confusing.  See the code in the btl_openib_failover.c file for the
-details.
-
-10. MERGING
-Every effort was made to try and merge the bfo PML into the ob1 PML.
-The idea was that any upgrades to the ob1 PML would automatically make
-it into the bfo PML and this would enhance maintainability of all the
-code.  However, it was deemed that this merging would cause more
-problems than it would solve.  What was attempted and why the
-conclusion was made are documented here.
-
-One can look at the bfo and easily see the differences between it and
-ob1.  All the bfo specific code is surrounded by #if PML_BFO.  In
-addition, there are two additional files in the bfo,
-pml_bfo_failover.c and pml_bfo_failover.h.
-
-To merge them, the following was attempted.  First, add all the code
-in #if regions into the ob1 PML.  As of this writing, there are 73
-#ifs that would have to be added into ob1.
-
-Secondly, remove almost all the pml_bfo files and replace them with
-links to the ob1 files.
-
-Third, create a new header file that did name shifting of all the
-functions so that ob1 and bfo could live together.  This also included
-having to create macros for the names of header files as well.  To
-help illustrate the name shifting issue, here is what the file might
-look like in the bfo directory.
-
-/* Need macros for the header files as they are different in the
- * different PMLs */
-#define PML "bfo"
-#define PML_OB1_H "pml_bfo.h"
-#define PML_OB1_COMM_H "pml_bfo_comm.h"
-#define PML_OB1_COMPONENT_H "pml_bfo_component.h"
-#define PML_OB1_HDR_H "pml_bfo_hdr.h"
-#define PML_OB1_RDMA_H "pml_bfo_rdma.h"
-#define PML_OB1_RDMAFRAG_H "pml_bfo_rdmafrag.h"
-#define PML_OB1_RECVFRAG_H "pml_bfo_recvfrag.h"
-#define PML_OB1_RECVREQ_H "pml_bfo_recvreq.h"
-#define PML_OB1_SENDREQ_H "pml_bfo_sendreq.h"
-
-/* Name shifting of functions from ob1 to bfo (incomplete list) */
-#define mca_pml_ob1 mca_pml_bfo
-#define mca_pml_ob1_t mca_pml_bfo_t
-#define mca_pml_ob1_component mca_pml_bfo_component
-#define mca_pml_ob1_add_procs mca_pml_bfo_add_procs
-#define mca_pml_ob1_del_procs mca_pml_bfo_del_procs
-#define mca_pml_ob1_enable mca_pml_bfo_enable
-#define mca_pml_ob1_progress mca_pml_bfo_progress
-#define mca_pml_ob1_add_comm mca_pml_bfo_add_comm
-#define mca_pml_ob1_del_comm mca_pml_bfo_del_comm
-#define mca_pml_ob1_irecv_init mca_pml_bfo_irecv_init
-#define mca_pml_ob1_irecv mca_pml_bfo_irecv
-#define mca_pml_ob1_recv mca_pml_bfo_recv
-#define mca_pml_ob1_isend_init mca_pml_bfo_isend_init
-#define mca_pml_ob1_isend mca_pml_bfo_isend
-#define mca_pml_ob1_send mca_pml_bfo_send
-#define mca_pml_ob1_iprobe mca_pml_bfo_iprobe
-[...and much more ...]
-
-The pml_bfo_hdr.h file was not a link because the changes in it were
-so extensive.  Also the Makefile was kept separate so it could include
-the additional failover files as well as add a compile directive that
-would force the files to be compiled as bfo instead of ob1.
-
-After these changes were made, several independent developers reviewed
-the results and concluded that making these changes would have too
-much of a negative impact on ob1 maintenance.  First, the code became
-much harder to read with all the additional #ifdefs.  Secondly, the
-possibility of adding other features, like csum, to ob1 would only
-make this issue even worse.  Therefore, it was decided to keep the bfo
-PML separate from ob1.
-
-11. UTILITIES
-In an ideal world, any bug fixes that are made in the ob1 PML would
-also be made in the csum and the bfo PMLs.  However, that does not
-always happen.  Therefore, there are two new utilities added to the
-contrib directory.
-
-check-ob1-revision.pl
-check-ob1-pml-diffs.pl
-
-The first one can be run to see if ob1 has changed from its last known
-state.  Here is an example.
-
- machine =>check-ob1-revision.pl
-Running svn diff -r24138 ../ompi/mca/pml/ob1
-No new changes detected in ob1.  Everything is fine.
-
-If there are differences, then one needs to review them and potentially
-add them to the bfo (and csum also if one feels like it).
-After that, bump up the value in the script to the latest value.
-
-The second script allows one to see the differences between the ob1
-and bfo PML.  Here is an example.
-
- machine =>check-ob1-pml-diffs.pl
-
-Starting script to check differences between bfo and ob1...
-Files Compared: pml_ob1.c and pml_bfo.c
-No differences encountered
-Files Compared: pml_ob1.h and pml_bfo.h
-[...snip...]
-Files Compared: pml_ob1_start.c and pml_bfo_start.c
-No differences encountered
-
-There is a lot more in the script that tells how it is used.
-
-
-Appendix 1: SIMPLE OVERVIEW OF COMMUNICATION PROTOCOLS
-The drawings below attempt to describe some of the general flow of
-fragments in the various protocols that are supported in the PMLs.
-The "read" and "write" are actual RDMA actions and do not pertain to
-fragments that are sent.  As can be inferred, they use FIN messages to
-indicate their completion.
-
-
-MATCH PROTOCOL
-sender >->->-> MATCH >->->-> receiver
-
-SEND WITH MULTIPLE FRAGMENTS
-sender >->->-> RNDV  >->->-> receiver
-       <-<-<-<  ACK  <-<-<-<
-       >->->-> FRAG  >->->->
-       >->->-> FRAG  >->->->
-       >->->-> FRAG  >->->->
-
-RDMA PUT
-sender >->->-> RNDV  >->->-> receiver
-       <-<-<-<  PUT  <-<-<-<
-       <-<-<-<  PUT  <-<-<-<
-       >->->-> write >->->->
-       >->->->  FIN  >->->->
-       >->->-> write >->->->
-       >->->->  FIN  >->->->
-
-RMA GET
-sender >->->-> RGET  >->->-> receiver
-       <-<-<-< read  <-<-<-<
-       <-<-<-<  FIN  <-<-<-<
--- a/ompi/mca/pml/bfo/configure.m4
+++ b/ompi/mca/pml/bfo/configure.m4
@ -1,27 +0,0 @@
-# -*- shell-script -*-
-#
-# Copyright (c) 2013      Sandia National Laboratories.  All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-
-# MCA_ompi_pml_bfo_POST_CONFIG(will_build)
-# ----------------------------------------
-# The BFO PML requires a BML endpoint tag to compile, so require it.
-# Require in POST_CONFIG instead of CONFIG so that we only require it
-# if we're not disabled.
-AC_DEFUN([MCA_ompi_pml_bfo_POST_CONFIG], [
-    AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([BML])])
-])dnl
-
-# MCA_ompi_pml_bfo_CONFIG(action-if-can-compile,
-#                        [action-if-cant-compile])
-# ------------------------------------------------
-# We can always build, unless we were explicitly disabled.
-AC_DEFUN([MCA_ompi_pml_bfo_CONFIG],[
-    AC_CONFIG_FILES([ompi/mca/pml/bfo/Makefile])
-    [$1]
-])dnl
--- a/ompi/mca/pml/bfo/help-mpi-pml-bfo.txt
+++ b/ompi/mca/pml/bfo/help-mpi-pml-bfo.txt
@ -1,20 +0,0 @@
-# -*- text -*-
-#
-# Copyright (c) 2009-2010 Oracle and/or its affiliates.  All rights reserved.
-# $COPYRIGHT$
-#
-# Additional copyrights may follow
-#
-# $HEADER$
-#
-[eager_limit_too_small]
-The "eager limit" MCA parameter in the %s BTL was set to a value which
-is too low for Open MPI to function properly.  Please re-run your job
-with a higher eager limit value for this BTL; the exact MCA parameter
-name and its corresponding minimum value is shown below.
-
-  Local host:              %s
-  BTL name:                %s
-  BTL eager limit value:   %d (set via btl_%s_eager_limit)
-  BTL eager limit minimum: %d
-  MCA parameter name:      btl_%s_eager_limit
--- a/ompi/mca/pml/bfo/owner.txt
+++ b/ompi/mca/pml/bfo/owner.txt
@ -1,7 +0,0 @@
-#
-# owner/status file
-# owner: institution that is responsible for this package
-# status: e.g. active, maintenance, unmaintained
-#
-owner: NVIDIA
-status: unmaintained
--- a/ompi/mca/pml/bfo/pml_bfo.c
+++ b/ompi/mca/pml/bfo/pml_bfo.c
@ -1,897 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2009 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2008      UT-Battelle, LLC. All rights reserved.
- * Copyright (c) 2006-2008 University of Houston.  All rights reserved.
- * Copyright (c) 2009-2010 Oracle and/or its affiliates.  All rights reserved.
- * Copyright (c) 2011      Sandia National Laboratories. All rights reserved.
- * Copyright (c) 2011-2012 Los Alamos National Security, LLC.
- *                         All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "opal/class/opal_bitmap.h"
-#include "opal/util/output.h"
-#include "opal/util/show_help.h"
-#include "opal/mca/btl/btl.h"
-#include "opal/mca/btl/base/base.h"
-#include "opal/mca/pmix/pmix.h"
-
-#include "ompi/mca/pml/pml.h"
-#include "ompi/mca/pml/base/base.h"
-#include "ompi/mca/pml/base/base.h"
-#include "ompi/mca/bml/base/base.h"
-#include "ompi/runtime/ompi_cr.h"
-
-#include "pml_bfo.h"
-#include "pml_bfo_component.h"
-#include "pml_bfo_comm.h"
-#include "pml_bfo_hdr.h"
-#include "pml_bfo_recvfrag.h"
-#include "pml_bfo_sendreq.h"
-#include "pml_bfo_recvreq.h"
-#include "pml_bfo_rdmafrag.h"
-#if PML_BFO
-#include "pml_bfo_failover.h"
-#endif /* PML_BFO */
-
-mca_pml_bfo_t mca_pml_bfo = {
-    {
-        mca_pml_bfo_add_procs,
-        mca_pml_bfo_del_procs,
-        mca_pml_bfo_enable,
-        mca_pml_bfo_progress,
-        mca_pml_bfo_add_comm,
-        mca_pml_bfo_del_comm,
-        mca_pml_bfo_irecv_init,
-        mca_pml_bfo_irecv,
-        mca_pml_bfo_recv,
-        mca_pml_bfo_isend_init,
-        mca_pml_bfo_isend,
-        mca_pml_bfo_send,
-        mca_pml_bfo_iprobe,
-        mca_pml_bfo_probe,
-        mca_pml_bfo_start,
-        mca_pml_bfo_improbe,
-        mca_pml_bfo_mprobe,
-        mca_pml_bfo_imrecv,
-        mca_pml_bfo_mrecv,
-        mca_pml_bfo_dump,
-        mca_pml_bfo_ft_event,
-        65535,
-        INT_MAX
-    }
-};
-
-
-void mca_pml_bfo_error_handler( struct mca_btl_base_module_t* btl,
-                                int32_t flags, ompi_proc_t* errproc,
-                                char* btlinfo );
-
-int mca_pml_bfo_enable(bool enable)
-{
-    if( false == enable ) {
-        return OMPI_SUCCESS;
-    }
-
-    OBJ_CONSTRUCT(&mca_pml_bfo.lock, opal_mutex_t);
-
-    /* fragments */
-    OBJ_CONSTRUCT(&mca_pml_bfo.rdma_frags, opal_free_list_t);
-    opal_free_list_init( &mca_pml_bfo.rdma_frags,
-                         sizeof(mca_pml_bfo_rdma_frag_t),
-                         opal_cache_line_size,
-                         OBJ_CLASS(mca_pml_bfo_rdma_frag_t),
-                         0,opal_cache_line_size,
-                         mca_pml_bfo.free_list_num,
-                         mca_pml_bfo.free_list_max,
-                         mca_pml_bfo.free_list_inc,
-                         NULL, 0, NULL, NULL, NULL );
-
-    OBJ_CONSTRUCT(&mca_pml_bfo.recv_frags, opal_free_list_t);
-    opal_free_list_init( &mca_pml_bfo.recv_frags,
-                         sizeof(mca_pml_bfo_recv_frag_t) + mca_pml_bfo.unexpected_limit,
-                         opal_cache_line_size,
-                         OBJ_CLASS(mca_pml_bfo_recv_frag_t),
-                         0,opal_cache_line_size,
-                         mca_pml_bfo.free_list_num,
-                         mca_pml_bfo.free_list_max,
-                         mca_pml_bfo.free_list_inc,
-                         NULL, 0, NULL, NULL, NULL );
-
-    OBJ_CONSTRUCT(&mca_pml_bfo.pending_pckts, opal_free_list_t);
-    opal_free_list_init( &mca_pml_bfo.pending_pckts,
-                         sizeof(mca_pml_bfo_pckt_pending_t),
-                         opal_cache_line_size,
-                         OBJ_CLASS(mca_pml_bfo_pckt_pending_t),
-                         0,opal_cache_line_size,
-                         mca_pml_bfo.free_list_num,
-                         mca_pml_bfo.free_list_max,
-                         mca_pml_bfo.free_list_inc,
-                         NULL, 0, NULL, NULL, NULL );
-
-    OBJ_CONSTRUCT(&mca_pml_bfo.buffers, opal_free_list_t);
-    OBJ_CONSTRUCT(&mca_pml_bfo.send_ranges, opal_free_list_t);
-    opal_free_list_init( &mca_pml_bfo.send_ranges,
-                         sizeof(mca_pml_bfo_send_range_t) +
-                         (mca_pml_bfo.max_send_per_range - 1) * sizeof(mca_pml_bfo_com_btl_t),
-                         opal_cache_line_size,
-                         OBJ_CLASS(mca_pml_bfo_send_range_t),
-                         0,opal_cache_line_size,
-                         mca_pml_bfo.free_list_num,
-                         mca_pml_bfo.free_list_max,
-                         mca_pml_bfo.free_list_inc,
-                         NULL, 0, NULL, NULL, NULL );
-
-    /* pending operations */
-    OBJ_CONSTRUCT(&mca_pml_bfo.send_pending, opal_list_t);
-    OBJ_CONSTRUCT(&mca_pml_bfo.recv_pending, opal_list_t);
-    OBJ_CONSTRUCT(&mca_pml_bfo.pckt_pending, opal_list_t);
-    OBJ_CONSTRUCT(&mca_pml_bfo.rdma_pending, opal_list_t);
-    /* missing communicator pending list */
-    OBJ_CONSTRUCT(&mca_pml_bfo.non_existing_communicator_pending, opal_list_t);
-
-    /**
-     * If we get here this is the PML who get selected for the run. We
-     * should get ownership for the send and receive requests list, and
-     * initialize them with the size of our own requests.
-     */
-    opal_free_list_init( &mca_pml_base_send_requests,
-                         sizeof(mca_pml_bfo_send_request_t) +
-                         (mca_pml_bfo.max_rdma_per_request - 1) *
-                         sizeof(mca_pml_bfo_com_btl_t),
-                         opal_cache_line_size,
-                         OBJ_CLASS(mca_pml_bfo_send_request_t),
-                         0,opal_cache_line_size,
-                         mca_pml_bfo.free_list_num,
-                         mca_pml_bfo.free_list_max,
-                         mca_pml_bfo.free_list_inc,
-                         NULL, 0, NULL, NULL, NULL );
-
-    opal_free_list_init( &mca_pml_base_recv_requests,
-                         sizeof(mca_pml_bfo_recv_request_t) +
-                         (mca_pml_bfo.max_rdma_per_request - 1) *
-                         sizeof(mca_pml_bfo_com_btl_t),
-                         opal_cache_line_size,
-                         OBJ_CLASS(mca_pml_bfo_recv_request_t),
-                         0,opal_cache_line_size,
-                         mca_pml_bfo.free_list_num,
-                         mca_pml_bfo.free_list_max,
-                         mca_pml_bfo.free_list_inc,
-                         NULL, 0, NULL, NULL, NULL );
-
-    mca_pml_bfo.enabled = true;
-    return OMPI_SUCCESS;
-}
-
-int mca_pml_bfo_add_comm(ompi_communicator_t* comm)
-{
-    /* allocate pml specific comm data */
-    mca_pml_bfo_comm_t* pml_comm = OBJ_NEW(mca_pml_bfo_comm_t);
-    opal_list_item_t *item, *next_item;
-    mca_pml_bfo_recv_frag_t* frag;
-    mca_pml_bfo_comm_proc_t* pml_proc;
-    mca_pml_bfo_match_hdr_t* hdr;
-    int i;
-
-    if (NULL == pml_comm) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    /* should never happen, but it was, so check */
-    if (comm->c_contextid > mca_pml_bfo.super.pml_max_contextid) {
-        OBJ_RELEASE(pml_comm);
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    mca_pml_bfo_comm_init_size(pml_comm, comm->c_remote_group->grp_proc_count);
-    comm->c_pml_comm = pml_comm;
-
-    for( i = 0; i < comm->c_remote_group->grp_proc_count; i++ ) {
-        pml_comm->procs[i].ompi_proc = ompi_group_peer_lookup(comm->c_remote_group,i);
-        OBJ_RETAIN(pml_comm->procs[i].ompi_proc);
-    }
-    /* Grab all related messages from the non_existing_communicator pending queue */
-    for( item = opal_list_get_first(&mca_pml_bfo.non_existing_communicator_pending);
-         item != opal_list_get_end(&mca_pml_bfo.non_existing_communicator_pending);
-         item = next_item ) {
-        frag = (mca_pml_bfo_recv_frag_t*)item;
-        next_item = opal_list_get_next(item);
-        hdr = &frag->hdr.hdr_match;
-
-        /* Is this fragment for the current communicator ? */
-        if( frag->hdr.hdr_match.hdr_ctx != comm->c_contextid )
-            continue;
-
-        /* As we now know we work on a fragment for this communicator
-         * we should remove it from the
-         * non_existing_communicator_pending list. */
-        opal_list_remove_item( &mca_pml_bfo.non_existing_communicator_pending,
-                               item );
-
-      add_fragment_to_unexpected:
-
-        /* We generate the MSG_ARRIVED event as soon as the PML is aware
-         * of a matching fragment arrival. Independing if it is received
-         * on the correct order or not. This will allow the tools to
-         * figure out if the messages are not received in the correct
-         * order (if multiple network interfaces).
-         */
-        PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm,
-                               hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
-
-        /* There is no matching to be done, and no lock to be held on the communicator as
-         * we know at this point that the communicator has not yet been returned to the user.
-         * The only required protection is around the non_existing_communicator_pending queue.
-         * We just have to push the fragment into the unexpected list of the corresponding
-         * proc, or into the out-of-order (cant_match) list.
-         */
-        pml_proc = &(pml_comm->procs[hdr->hdr_src]);
-
-        if( ((uint16_t)hdr->hdr_seq) == ((uint16_t)pml_proc->expected_sequence) ) {
-            /* We're now expecting the next sequence number. */
-            pml_proc->expected_sequence++;
-            opal_list_append( &pml_proc->unexpected_frags, (opal_list_item_t*)frag );
-            PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm,
-                                   hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
-            /* And now the ugly part. As some fragments can be inserted in the cant_match list,
-             * every time we succesfully add a fragment in the unexpected list we have to make
-             * sure the next one is not in the cant_match. Otherwise, we will endup in a deadlock
-             * situation as the cant_match is only checked when a new fragment is received from
-             * the network.
-             */
-           for(frag = (mca_pml_bfo_recv_frag_t *)opal_list_get_first(&pml_proc->frags_cant_match);
-               frag != (mca_pml_bfo_recv_frag_t *)opal_list_get_end(&pml_proc->frags_cant_match);
-               frag = (mca_pml_bfo_recv_frag_t *)opal_list_get_next(frag)) {
-               hdr = &frag->hdr.hdr_match;
-               /* If the message has the next expected seq from that proc...  */
-               if(hdr->hdr_seq != pml_proc->expected_sequence)
-                   continue;
-
-               opal_list_remove_item(&pml_proc->frags_cant_match, (opal_list_item_t*)frag);
-               goto add_fragment_to_unexpected;
-           }
-        } else {
-            opal_list_append( &pml_proc->frags_cant_match, (opal_list_item_t*)frag );
-        }
-    }
-    return OMPI_SUCCESS;
-}
-
-int mca_pml_bfo_del_comm(ompi_communicator_t* comm)
-{
-    mca_pml_bfo_comm_t* pml_comm = comm->c_pml_comm;
-    int i;
-
-    for( i = 0; i < comm->c_remote_group->grp_proc_count; i++ ) {
-        OBJ_RELEASE(pml_comm->procs[i].ompi_proc);
-    }
-    OBJ_RELEASE(comm->c_pml_comm);
-    comm->c_pml_comm = NULL;
-    return OMPI_SUCCESS;
-}
-
-
-/*
- *   For each proc setup a datastructure that indicates the BTLs
- *   that can be used to reach the destination.
- *
- */
-
-int mca_pml_bfo_add_procs(ompi_proc_t** procs, size_t nprocs)
-{
-    opal_bitmap_t reachable;
-    int rc;
-    opal_list_item_t *item;
-
-    if(nprocs == 0)
-        return OMPI_SUCCESS;
-
-    OBJ_CONSTRUCT(&reachable, opal_bitmap_t);
-    rc = opal_bitmap_init(&reachable, (int)nprocs);
-    if(OMPI_SUCCESS != rc)
-        return rc;
-
-    /*
-     * JJH: Disable this in FT enabled builds since
-     * we use a wrapper PML. It will cause this check to
-     * return failure as all processes will return the wrapper PML
-     * component in use instead of the wrapped PML component underneath.
-     */
-#if OPAL_ENABLE_FT_CR == 0
-    /* make sure remote procs are using the same PML as us */
-    if (OMPI_SUCCESS != (rc = mca_pml_base_pml_check_selected("bfo",
-                                                              procs,
-                                                              nprocs))) {
-        return rc;
-    }
-#endif
-
-    rc = mca_bml.bml_add_procs( nprocs,
-                                procs,
-                                &reachable );
-    if(OMPI_SUCCESS != rc)
-        goto cleanup_and_return;
-
-    /* Check that values supplied by all initialized btls will work
-       for us.  Note that this is the list of all initialized BTLs,
-       not the ones used for the just added procs.  This is a little
-       overkill and inaccurate, as we may end up not using the BTL in
-       question and all add_procs calls after the first one are
-       duplicating an already completed check.  But the final
-       initialization of the PML occurs before the final
-       initialization of the BTLs, and iterating through the in-use
-       BTLs requires iterating over the procs, as the BML does not
-       expose all currently in use btls. */
-
-    for (item = opal_list_get_first(&mca_btl_base_modules_initialized) ;
-         item != opal_list_get_end(&mca_btl_base_modules_initialized) ;
-         item = opal_list_get_next(item)) {
-        mca_btl_base_selected_module_t *sm =
-            (mca_btl_base_selected_module_t*) item;
-        if (sm->btl_module->btl_eager_limit < sizeof(mca_pml_bfo_hdr_t)) {
-	    opal_show_help("help-mpi-pml-bfo.txt", "eager_limit_too_small",
-			   true,
-			   sm->btl_component->btl_version.mca_component_name,
-			   ompi_process_info.nodename,
-			   sm->btl_component->btl_version.mca_component_name,
-			   sm->btl_module->btl_eager_limit,
-			   sm->btl_component->btl_version.mca_component_name,
-			   sizeof(mca_pml_bfo_hdr_t),
-			   sm->btl_component->btl_version.mca_component_name);
-            rc = OMPI_ERR_BAD_PARAM;
-            goto cleanup_and_return;
-        }
-    }
-
-
-    /* TODO: Move these callback registration to another place */
-    rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_MATCH,
-                               mca_pml_bfo_recv_frag_callback_match,
-                               NULL );
-    if(OMPI_SUCCESS != rc)
-        goto cleanup_and_return;
-
-    rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RNDV,
-                               mca_pml_bfo_recv_frag_callback_rndv,
-                               NULL );
-    if(OMPI_SUCCESS != rc)
-        goto cleanup_and_return;
-
-    rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_RGET,
-                               mca_pml_bfo_recv_frag_callback_rget,
-                               NULL );
-    if(OMPI_SUCCESS != rc)
-        goto cleanup_and_return;
-
-    rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_ACK,
-                               mca_pml_bfo_recv_frag_callback_ack,
-                               NULL );
-    if(OMPI_SUCCESS != rc)
-        goto cleanup_and_return;
-
-    rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_FRAG,
-                               mca_pml_bfo_recv_frag_callback_frag,
-                               NULL );
-    if(OMPI_SUCCESS != rc)
-        goto cleanup_and_return;
-
-    rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_PUT,
-                               mca_pml_bfo_recv_frag_callback_put,
-                               NULL );
-    if(OMPI_SUCCESS != rc)
-        goto cleanup_and_return;
-
-    rc = mca_bml.bml_register( MCA_PML_BFO_HDR_TYPE_FIN,
-                               mca_pml_bfo_recv_frag_callback_fin,
-                               NULL );
-    if(OMPI_SUCCESS != rc)
-        goto cleanup_and_return;
-
-#if PML_BFO
-    rc = mca_pml_bfo_register_callbacks();
-    if(OMPI_SUCCESS != rc)
-        goto cleanup_and_return;
-#endif /* PML_BFO */
-    /* register error handlers */
-    rc = mca_bml.bml_register_error((mca_btl_base_module_error_cb_fn_t)mca_pml_bfo_error_handler);
-    if(OMPI_SUCCESS != rc)
-        goto cleanup_and_return;
-
-  cleanup_and_return:
-    OBJ_DESTRUCT(&reachable);
-
-    return rc;
-}
-
-/*
- * iterate through each proc and notify any PTLs associated
- * with the proc that it is/has gone away
- */
-
-int mca_pml_bfo_del_procs(ompi_proc_t** procs, size_t nprocs)
-{
-    return mca_bml.bml_del_procs(nprocs, procs);
-}
-
-/*
- * diagnostics
- */
-
-int mca_pml_bfo_dump(struct ompi_communicator_t* comm, int verbose)
-{
-    struct mca_pml_comm_t* pml_comm = comm->c_pml_comm;
-    int i;
-
-    /* iterate through all procs on communicator */
-    for( i = 0; i < (int)pml_comm->num_procs; i++ ) {
-        mca_pml_bfo_comm_proc_t* proc = &pml_comm->procs[i];
-        mca_bml_base_endpoint_t* ep = (mca_bml_base_endpoint_t*)proc->ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
-        size_t n;
-
-        opal_output(0, "[Rank %d]\n", i);
-        /* dump all receive queues */
-
-        /* dump all btls */
-        for(n=0; n<ep->btl_eager.arr_size; n++) {
-            mca_bml_base_btl_t* bml_btl = &ep->btl_eager.bml_btls[n];
-            bml_btl->btl->btl_dump(bml_btl->btl, bml_btl->btl_endpoint, verbose);
-        }
-    }
-    return OMPI_SUCCESS;
-}
-
-static void mca_pml_bfo_fin_completion( mca_btl_base_module_t* btl,
-                                        struct mca_btl_base_endpoint_t* ep,
-                                        struct mca_btl_base_descriptor_t* des,
-                                        int status )
-{
-
-    mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
-
-#if PML_BFO
-    if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
-        mca_pml_bfo_repost_fin(des);
-        return;
-    }
-    MCA_PML_BFO_CHECK_EAGER_BML_BTL_ON_FIN_COMPLETION(bml_btl, btl, des);
-#endif /* PML_BFO */
-    /* check for pending requests */
-    MCA_PML_BFO_PROGRESS_PENDING(bml_btl);
-}
-
-/**
- * Send an FIN to the peer. If we fail to send this ack (no more available
- * fragments or the send failed) this function automatically add the FIN
- * to the list of pending FIN, Which guarantee that the FIN will be sent
- * later.
- */
-int mca_pml_bfo_send_fin( ompi_proc_t* proc,
-                          mca_bml_base_btl_t* bml_btl,
-                          opal_ptr_t hdr_des,
-                          uint8_t order,
-#if PML_BFO
-                          uint32_t status,
-                          uint16_t seq,
-                          uint8_t restartseq,
-                          uint16_t ctx, uint32_t src)
-#else /* PML_BFO */
-                          uint32_t status )
-#endif /* PML_BFO */
-{
-    mca_btl_base_descriptor_t* fin;
-    mca_pml_bfo_fin_hdr_t* hdr;
-    int rc;
-
-    mca_bml_base_alloc(bml_btl, &fin, order, sizeof(mca_pml_bfo_fin_hdr_t),
-                       MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
-
-    if(NULL == fin) {
-        MCA_PML_BFO_ADD_FIN_TO_PENDING(proc, hdr_des, bml_btl, order, status);
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-    fin->des_cbfunc = mca_pml_bfo_fin_completion;
-    fin->des_cbdata = NULL;
-
-    /* fill in header */
-    hdr = (mca_pml_bfo_fin_hdr_t*)fin->des_local->seg_addr.pval;
-    hdr->hdr_common.hdr_flags = 0;
-    hdr->hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_FIN;
-    hdr->hdr_des = hdr_des;
-    hdr->hdr_fail = status;
-#if PML_BFO
-    fin->des_cbdata = proc;
-    hdr->hdr_match.hdr_seq = seq;
-    hdr->hdr_match.hdr_ctx = ctx;
-    hdr->hdr_match.hdr_src = src;
-    hdr->hdr_match.hdr_common.hdr_flags = restartseq;  /* use unused hdr_flags field */
-#endif /* PML_BFO */
-
-    bfo_hdr_hton(hdr, MCA_PML_BFO_HDR_TYPE_FIN, proc);
-
-    /* queue request */
-    rc = mca_bml_base_send( bml_btl,
-                            fin,
-                            MCA_PML_BFO_HDR_TYPE_FIN );
-    if( OPAL_LIKELY( rc >= 0 ) ) {
-        if( OPAL_LIKELY( 1 == rc ) ) {
-            MCA_PML_BFO_PROGRESS_PENDING(bml_btl);
-        }
-        return OMPI_SUCCESS;
-    }
-    mca_bml_base_free(bml_btl, fin);
-    MCA_PML_BFO_ADD_FIN_TO_PENDING(proc, hdr_des, bml_btl, order, status);
-    return OMPI_ERR_OUT_OF_RESOURCE;
-}
-
-void mca_pml_bfo_process_pending_packets(mca_bml_base_btl_t* bml_btl)
-{
-    mca_pml_bfo_pckt_pending_t *pckt;
-    int32_t i, rc, s = (int32_t)opal_list_get_size(&mca_pml_bfo.pckt_pending);
-
-    for(i = 0; i < s; i++) {
-        mca_bml_base_btl_t *send_dst = NULL;
-        OPAL_THREAD_LOCK(&mca_pml_bfo.lock);
-        pckt = (mca_pml_bfo_pckt_pending_t*)
-            opal_list_remove_first(&mca_pml_bfo.pckt_pending);
-        OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock);
-        if(NULL == pckt)
-            break;
-        if(pckt->bml_btl != NULL &&
-                pckt->bml_btl->btl == bml_btl->btl) {
-            send_dst = pckt->bml_btl;
-        } else {
-            mca_bml_base_endpoint_t* endpoint =
-                (mca_bml_base_endpoint_t*) pckt->proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
-            send_dst = mca_bml_base_btl_array_find(
-                    &endpoint->btl_eager, bml_btl->btl);
-        }
-        if(NULL == send_dst) {
-            OPAL_THREAD_LOCK(&mca_pml_bfo.lock);
-            opal_list_append(&mca_pml_bfo.pckt_pending,
-                             (opal_list_item_t*)pckt);
-            OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock);
-            continue;
-        }
-
-        switch(pckt->hdr.hdr_common.hdr_type) {
-            case MCA_PML_BFO_HDR_TYPE_ACK:
-                rc = mca_pml_bfo_recv_request_ack_send_btl(pckt->proc,
-                        send_dst,
-                        pckt->hdr.hdr_ack.hdr_src_req.lval,
-                        pckt->hdr.hdr_ack.hdr_dst_req.pval,
-                        pckt->hdr.hdr_ack.hdr_send_offset,
-                        pckt->hdr.hdr_common.hdr_flags & MCA_PML_BFO_HDR_FLAGS_NORDMA);
-                if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) {
-                    OPAL_THREAD_LOCK(&mca_pml_bfo.lock);
-                    opal_list_append(&mca_pml_bfo.pckt_pending,
-                                     (opal_list_item_t*)pckt);
-                    OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock);
-                    return;
-                }
-                break;
-            case MCA_PML_BFO_HDR_TYPE_FIN:
-                rc = mca_pml_bfo_send_fin(pckt->proc, send_dst,
-                                          pckt->hdr.hdr_fin.hdr_des,
-                                          pckt->order,
-#if PML_BFO
-                                          pckt->hdr.hdr_fin.hdr_fail,
-                                          pckt->hdr.hdr_fin.hdr_match.hdr_seq,
-                                          pckt->hdr.hdr_fin.hdr_match.hdr_common.hdr_flags,
-                                          pckt->hdr.hdr_fin.hdr_match.hdr_ctx,
-                                          pckt->hdr.hdr_fin.hdr_match.hdr_src);
-#else /* PML_BFO */
-                                          pckt->hdr.hdr_fin.hdr_fail);
-#endif /* PML_BFO */
-                if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) {
-                    return;
-                }
-                break;
-            default:
-                opal_output(0, "[%s:%d] wrong header type\n",
-                            __FILE__, __LINE__);
-                break;
-        }
-        /* We're done with this packet, return it back to the free list */
-        MCA_PML_BFO_PCKT_PENDING_RETURN(pckt);
-    }
-}
-
-void mca_pml_bfo_process_pending_rdma(void)
-{
-    mca_pml_bfo_rdma_frag_t* frag;
-    int32_t i, rc, s = (int32_t)opal_list_get_size(&mca_pml_bfo.rdma_pending);
-
-    for(i = 0; i < s; i++) {
-        OPAL_THREAD_LOCK(&mca_pml_bfo.lock);
-        frag = (mca_pml_bfo_rdma_frag_t*)
-            opal_list_remove_first(&mca_pml_bfo.rdma_pending);
-        OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock);
-        if(NULL == frag)
-            break;
-        if(frag->rdma_state == MCA_PML_BFO_RDMA_PUT) {
-            frag->retries++;
-            rc = mca_pml_bfo_send_request_put_frag(frag);
-        } else {
-            rc = mca_pml_bfo_recv_request_get_frag(frag);
-        }
-        if(OMPI_ERR_OUT_OF_RESOURCE == rc)
-            break;
-    }
-}
-
-
-void mca_pml_bfo_error_handler(
-        struct mca_btl_base_module_t* btl, int32_t flags,
-        ompi_proc_t* errproc, char* btlinfo ) {
-#if PML_BFO
-    if (flags & MCA_BTL_ERROR_FLAGS_NONFATAL) {
-        mca_pml_bfo_failover_error_handler(btl, flags, errproc, btlinfo);
-        return;
-    }
-#endif /* PML_BFO */
-    ompi_rte_abort(-1, NULL);
-}
-
-#if OPAL_ENABLE_FT_CR    == 0
-int mca_pml_bfo_ft_event( int state ) {
-    return OMPI_SUCCESS;
-}
-#else
-int mca_pml_bfo_ft_event( int state )
-{
-    static bool first_continue_pass = false;
-    ompi_proc_t** procs = NULL;
-    size_t num_procs;
-    int ret, p;
-
-    if(OPAL_CRS_CHECKPOINT == state) {
-        if( opal_cr_timing_barrier_enabled ) {
-            OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCPBR1);
-            if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
-                opal_output(0, "pml:bfo: ft_event(Restart): Failed to fence complete");
-                return ret;
-            }
-        }
-
-        OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P0);
-    }
-    else if(OPAL_CRS_CONTINUE == state) {
-        first_continue_pass = !first_continue_pass;
-
-        if( !first_continue_pass ) {
-            if( opal_cr_timing_barrier_enabled ) {
-                OPAL_CR_SET_TIMER(OPAL_CR_TIMER_COREBR0);
-                if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
-                    opal_output(0, "pml:bfo: ft_event(Restart): Failed to fence complete");
-                    return ret;
-                }
-            }
-            OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P2);
-        }
-
-        if (opal_cr_continue_like_restart && !first_continue_pass) {
-            /*
-             * Get a list of processes
-             */
-            procs = ompi_proc_all(&num_procs);
-            if(NULL == procs) {
-                return OMPI_ERR_OUT_OF_RESOURCE;
-            }
-
-            /*
-             * Refresh the proc structure, and publish our proc info in the modex.
-             * NOTE: Do *not* call ompi_proc_finalize as there are many places in
-             *       the code that point to indv. procs in this strucutre. For our
-             *       needs here we only need to fix up the modex, bml and pml
-             *       references.
-             */
-            if (OMPI_SUCCESS != (ret = ompi_proc_refresh())) {
-                opal_output(0,
-                            "pml:bfo: ft_event(Restart): proc_refresh Failed %d",
-                            ret);
-                for(p = 0; p < (int)num_procs; ++p) {
-                    OBJ_RELEASE(procs[p]);
-                }
-                free (procs);
-                return ret;
-            }
-        }
-    }
-    else if(OPAL_CRS_RESTART_PRE == state ) {
-        /* Nothing here */
-    }
-    else if(OPAL_CRS_RESTART == state ) {
-        /*
-         * Get a list of processes
-         */
-        procs = ompi_proc_all(&num_procs);
-        if(NULL == procs) {
-            return OMPI_ERR_OUT_OF_RESOURCE;
-        }
-
-        /*
-         * Clean out the modex information since it is invalid now.
-         *    ompi_rte_purge_proc_attrs();
-         * This happens at the ORTE level, so doing it again here will cause
-         * some issues with socket caching.
-         */
-
-
-        /*
-         * Refresh the proc structure, and publish our proc info in the modex.
-         * NOTE: Do *not* call ompi_proc_finalize as there are many places in
-         *       the code that point to indv. procs in this strucutre. For our
-         *       needs here we only need to fix up the modex, bml and pml
-         *       references.
-         */
-        if (OMPI_SUCCESS != (ret = ompi_proc_refresh())) {
-            opal_output(0,
-                        "pml:bfo: ft_event(Restart): proc_refresh Failed %d",
-                        ret);
-            for(p = 0; p < (int)num_procs; ++p) {
-                OBJ_RELEASE(procs[p]);
-            }
-            free (procs);
-            return ret;
-        }
-    }
-    else if(OPAL_CRS_TERM == state ) {
-        ;
-    }
-    else {
-        ;
-    }
-
-    /* Call the BML
-     * BML is expected to call ft_event in
-     * - BTL(s)
-     * - MPool(s)
-     */
-    if( OMPI_SUCCESS != (ret = mca_bml.bml_ft_event(state))) {
-        opal_output(0, "pml:base: ft_event: BML ft_event function failed: %d\n",
-                    ret);
-    }
-
-    if(OPAL_CRS_CHECKPOINT == state) {
-        OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P1);
-
-        if( opal_cr_timing_barrier_enabled ) {
-            OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR0);
-            /* JJH Cannot barrier here due to progress engine -- ompi_rte_barrier();*/
-        }
-    }
-    else if(OPAL_CRS_CONTINUE == state) {
-        if( !first_continue_pass ) {
-            if( opal_cr_timing_barrier_enabled ) {
-                OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR1);
-                if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
-                    opal_output(0, "pml:bfo: ft_event(Restart): Failed to fence complete");
-                    return ret;
-                }
-            }
-            OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P3);
-        }
-
-        if (opal_cr_continue_like_restart && !first_continue_pass) {
-            /*
-             * Exchange the modex information once again.
-             * BTLs will have republished their modex information.
-             */
-            if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
-                opal_output(0, "pml:bfo: ft_event(Restart): Failed to fence complete");
-                return ret;
-            }
-
-            /*
-             * Startup the PML stack now that the modex is running again
-             * Add the new procs (BTLs redo modex recv's)
-             */
-            if( OMPI_SUCCESS != (ret = mca_pml_bfo_add_procs(procs, num_procs) ) ) {
-                opal_output(0, "pml:bfo: ft_event(Restart): Failed in add_procs (%d)", ret);
-                return ret;
-            }
-
-            /* Is this barrier necessary ? JJH */
-            if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
-                opal_output(0, "pml:bfo: ft_event(Restart): Failed to fence complete");
-                return ret;
-            }
-
-            if( NULL != procs ) {
-                for(p = 0; p < (int)num_procs; ++p) {
-                    OBJ_RELEASE(procs[p]);
-                }
-                free(procs);
-                procs = NULL;
-            }
-        }
-        if( !first_continue_pass ) {
-            if( opal_cr_timing_barrier_enabled ) {
-                OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR2);
-                if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
-                    opal_output(0, "pml:bfo: ft_event(Restart): Failed to fence complete");
-                    return ret;
-                }
-            }
-            OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCP1);
-        }
-    }
-    else if(OPAL_CRS_RESTART_PRE == state ) {
-        /* Nothing here */
-    }
-    else if(OPAL_CRS_RESTART == state  ) {
-        /*
-         * Exchange the modex information once again.
-         * BTLs will have republished their modex information.
-         */
-        if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
-            opal_output(0, "pml:bfo: ft_event(Restart): Failed to fence complete");
-            return ret;
-        }
-
-        /*
-         * Startup the PML stack now that the modex is running again
-         * Add the new procs (BTLs redo modex recv's)
-         */
-        if( OMPI_SUCCESS != (ret = mca_pml_bfo_add_procs(procs, num_procs) ) ) {
-            opal_output(0, "pml:bfo: ft_event(Restart): Failed in add_procs (%d)", ret);
-            return ret;
-        }
-
-        /* Is this barrier necessary ? JJH */
-        if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
-            opal_output(0, "pml:bfo: ft_event(Restart): Failed to fence complete");
-            return ret;
-        }
-
-        if( NULL != procs ) {
-            for(p = 0; p < (int)num_procs; ++p) {
-                OBJ_RELEASE(procs[p]);
-            }
-            free(procs);
-            procs = NULL;
-        }
-    }
-    else if(OPAL_CRS_TERM == state ) {
-        ;
-    }
-    else {
-        ;
-    }
-
-    return OMPI_SUCCESS;
-}
-#endif /* OPAL_ENABLE_FT_CR */
-
-int mca_pml_bfo_com_btl_comp(const void *v1, const void *v2)
-{
-    const mca_pml_bfo_com_btl_t *b1 = (const mca_pml_bfo_com_btl_t *) v1;
-    const mca_pml_bfo_com_btl_t *b2 = (const mca_pml_bfo_com_btl_t *) v2;
-
-    if(b1->bml_btl->btl_weight < b2->bml_btl->btl_weight)
-        return 1;
-    if(b1->bml_btl->btl_weight > b2->bml_btl->btl_weight)
-        return -1;
-
-    return 0;
-}
-
--- a/ompi/mca/pml/bfo/pml_bfo.h
+++ b/ompi/mca/pml/bfo/pml_bfo.h
@ -1,362 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2013 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2010      Oracle and/or its affiliates.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/**
- *  @file
- */
-
-#ifndef MCA_PML_BFO_H
-#define MCA_PML_BFO_H
-
-#include "ompi_config.h"
-#include "opal/class/opal_free_list.h"
-#include "ompi/request/request.h"
-#include "ompi/mca/pml/pml.h"
-#include "ompi/mca/pml/base/pml_base_request.h"
-#include "ompi/mca/pml/base/pml_base_bsend.h"
-#include "ompi/mca/pml/base/pml_base_sendreq.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "pml_bfo_hdr.h"
-#include "ompi/mca/bml/base/base.h"
-#include "ompi/proc/proc.h"
-#include "opal/mca/allocator/base/base.h"
-
-BEGIN_C_DECLS
-
-/**
- * BFO PML module
- */
-
-struct mca_pml_bfo_t {
-    mca_pml_base_module_t super;
-
-    int priority;
-    int free_list_num;      /* initial size of free list */
-    int free_list_max;      /* maximum size of free list */
-    int free_list_inc;      /* number of elements to grow free list */
-    unsigned int send_pipeline_depth;
-    unsigned int recv_pipeline_depth;
-    unsigned int rdma_put_retries_limit;
-    int max_rdma_per_request;
-    int max_send_per_range;
-    bool leave_pinned;
-    int leave_pinned_pipeline;
-
-    /* lock queue access */
-    opal_mutex_t lock;
-
-    /* free lists */
-    opal_free_list_t rdma_frags;
-    opal_free_list_t recv_frags;
-    opal_free_list_t pending_pckts;
-    opal_free_list_t buffers;
-    opal_free_list_t send_ranges;
-
-    /* list of pending operations */
-    opal_list_t pckt_pending;
-    opal_list_t send_pending;
-    opal_list_t recv_pending;
-    opal_list_t rdma_pending;
-    /* List of pending fragments without a matching communicator */
-    opal_list_t non_existing_communicator_pending;
-    bool enabled;
-    char* allocator_name;
-    mca_allocator_base_module_t* allocator;
-    unsigned int unexpected_limit;
-};
-typedef struct mca_pml_bfo_t mca_pml_bfo_t;
-
-extern mca_pml_bfo_t mca_pml_bfo;
-extern int mca_pml_bfo_output;
-
-/*
- * PML interface functions.
- */
-
-extern int mca_pml_bfo_add_comm(
-    struct ompi_communicator_t* comm
-);
-
-extern int mca_pml_bfo_del_comm(
-    struct ompi_communicator_t* comm
-);
-
-extern int mca_pml_bfo_add_procs(
-    struct ompi_proc_t **procs,
-    size_t nprocs
-);
-
-extern int mca_pml_bfo_del_procs(
-    struct ompi_proc_t **procs,
-    size_t nprocs
-);
-
-extern int mca_pml_bfo_enable( bool enable );
-
-extern int mca_pml_bfo_progress(void);
-
-extern int mca_pml_bfo_iprobe( int dst,
-                               int tag,
-                               struct ompi_communicator_t* comm,
-                               int *matched,
-                               ompi_status_public_t* status );
-
-extern int mca_pml_bfo_probe( int dst,
-                              int tag,
-                              struct ompi_communicator_t* comm,
-                              ompi_status_public_t* status );
-
-extern int mca_pml_bfo_improbe( int dst,
-                               int tag,
-                               struct ompi_communicator_t* comm,
-                               int *matched,
-                               struct ompi_message_t **message,
-                               ompi_status_public_t* status );
-
-extern int mca_pml_bfo_mprobe( int dst,
-                              int tag,
-                              struct ompi_communicator_t* comm,
-                              struct ompi_message_t **message,
-                              ompi_status_public_t* status );
-
-extern int mca_pml_bfo_isend_init( void *buf,
-                                   size_t count,
-                                   ompi_datatype_t *datatype,
-                                   int dst,
-                                   int tag,
-                                   mca_pml_base_send_mode_t mode,
-                                   struct ompi_communicator_t* comm,
-                                   struct ompi_request_t **request );
-
-extern int mca_pml_bfo_isend( void *buf,
-                              size_t count,
-                              ompi_datatype_t *datatype,
-                              int dst,
-                              int tag,
-                              mca_pml_base_send_mode_t mode,
-                              struct ompi_communicator_t* comm,
-                              struct ompi_request_t **request );
-
-extern int mca_pml_bfo_send( void *buf,
-                             size_t count,
-                             ompi_datatype_t *datatype,
-                             int dst,
-                             int tag,
-                             mca_pml_base_send_mode_t mode,
-                             struct ompi_communicator_t* comm );
-
-extern int mca_pml_bfo_irecv_init( void *buf,
-                                   size_t count,
-                                   ompi_datatype_t *datatype,
-                                   int src,
-                                   int tag,
-                                   struct ompi_communicator_t* comm,
-                                   struct ompi_request_t **request );
-
-extern int mca_pml_bfo_irecv( void *buf,
-                              size_t count,
-                              ompi_datatype_t *datatype,
-                              int src,
-                              int tag,
-                              struct ompi_communicator_t* comm,
-                              struct ompi_request_t **request );
-
-extern int mca_pml_bfo_recv( void *buf,
-                             size_t count,
-                             ompi_datatype_t *datatype,
-                             int src,
-                             int tag,
-                             struct ompi_communicator_t* comm,
-                             ompi_status_public_t* status );
-
-extern int mca_pml_bfo_imrecv( void *buf,
-                               size_t count,
-                               ompi_datatype_t *datatype,
-                               struct ompi_message_t **message,
-                               struct ompi_request_t **request );
-
-extern int mca_pml_bfo_mrecv( void *buf,
-                              size_t count,
-                              ompi_datatype_t *datatype,
-                              struct ompi_message_t **message,
-                              ompi_status_public_t* status );
-
-extern int mca_pml_bfo_dump( struct ompi_communicator_t* comm,
-                             int verbose );
-
-extern int mca_pml_bfo_start( size_t count,
-                              ompi_request_t** requests );
-
-extern int mca_pml_bfo_ft_event( int state );
-
-END_C_DECLS
-
-struct mca_pml_bfo_pckt_pending_t {
-    opal_free_list_item_t super;
-    ompi_proc_t* proc;
-    mca_pml_bfo_hdr_t hdr;
-    struct mca_bml_base_btl_t *bml_btl;
-    uint8_t order;
-};
-typedef struct mca_pml_bfo_pckt_pending_t mca_pml_bfo_pckt_pending_t;
-OBJ_CLASS_DECLARATION(mca_pml_bfo_pckt_pending_t);
-
-#define MCA_PML_BFO_PCKT_PENDING_ALLOC(pckt)                    \
-do {                                                            \
-    opal_free_list_item_t* item;                                \
-    OPAL_FREE_LIST_WAIT(&mca_pml_bfo.pending_pckts, item);      \
-    pckt = (mca_pml_bfo_pckt_pending_t*)item;                   \
-} while (0)
-
-#define MCA_PML_BFO_PCKT_PENDING_RETURN(pckt)                   \
-do {                                                            \
-    /* return packet */                                         \
-    OPAL_FREE_LIST_RETURN(&mca_pml_bfo.pending_pckts,           \
-        (opal_free_list_item_t*)pckt);                          \
-} while(0)
-
-#define MCA_PML_BFO_ADD_FIN_TO_PENDING(P, D, B, O, S)               \
-    do {                                                            \
-        mca_pml_bfo_pckt_pending_t *_pckt;                          \
-                                                                    \
-        MCA_PML_BFO_PCKT_PENDING_ALLOC(_pckt);                      \
-        _pckt->hdr.hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_FIN;  \
-        _pckt->hdr.hdr_fin.hdr_des = (D);                           \
-        _pckt->hdr.hdr_fin.hdr_fail = (S);                          \
-        _pckt->proc = (P);                                          \
-        _pckt->bml_btl = (B);                                       \
-        _pckt->order = (O);                                         \
-        OPAL_THREAD_LOCK(&mca_pml_bfo.lock);                        \
-        opal_list_append(&mca_pml_bfo.pckt_pending,                 \
-                (opal_list_item_t*)_pckt);                          \
-        OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock);                      \
-    } while(0)
-
-
-int mca_pml_bfo_send_fin(ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl,
-#if PML_BFO
-        opal_ptr_t hdr_des, uint8_t order, uint32_t status,
-        uint16_t seq, uint8_t reqseq, uint16_t ctx, uint32_t src);
-#else /* PML_BFO */
-        opal_ptr_t hdr_des, uint8_t order, uint32_t status);
-#endif /* PML_BFO */
-
-/* This function tries to resend FIN/ACK packets from pckt_pending queue.
- * Packets are added to the queue when sending of FIN or ACK is failed due to
- * resource unavailability. bml_btl passed to the function doesn't represents
- * packet's destination, it represents BTL on which resource was freed, so only
- * this BTL should be considered for resending packets */
-void mca_pml_bfo_process_pending_packets(mca_bml_base_btl_t* bml_btl);
-
-/* This function retries failed PUT/GET operations on frag. When RDMA operation
- * cannot be accomplished for some reason, frag is put on the rdma_pending list.
- * Later the operation is retried. The destination of RDMA operation is stored
- * inside the frag structure */
-void mca_pml_bfo_process_pending_rdma(void);
-
-#define MCA_PML_BFO_PROGRESS_PENDING(bml_btl)                   \
-    do {                                                        \
-        if(opal_list_get_size(&mca_pml_bfo.pckt_pending))       \
-            mca_pml_bfo_process_pending_packets(bml_btl);       \
-        if(opal_list_get_size(&mca_pml_bfo.recv_pending))       \
-            mca_pml_bfo_recv_request_process_pending();         \
-        if(opal_list_get_size(&mca_pml_bfo.send_pending))       \
-            mca_pml_bfo_send_request_process_pending(bml_btl);  \
-        if(opal_list_get_size(&mca_pml_bfo.rdma_pending))       \
-            mca_pml_bfo_process_pending_rdma();                 \
-    } while (0)
-
-/*
- * Compute the total number of bytes on supplied descriptor
- */
-static inline int mca_pml_bfo_compute_segment_length (size_t seg_size, void *segments, size_t count,
-                                                      size_t hdrlen) {
-    size_t i, length;
-
-    for (i = 0, length = -hdrlen ; i < count ; ++i) {
-        mca_btl_base_segment_t *segment =
-            (mca_btl_base_segment_t *)((char *) segments + i * seg_size);
-
-        length += segment->seg_len;
-    }
-
-    return length;
-}
-
-static inline int mca_pml_bfo_compute_segment_length_base (mca_btl_base_segment_t *segments,
-                                                           size_t count, size_t hdrlen) {
-    size_t i, length;
-
-    for (i = 0, length = -hdrlen ; i < count ; ++i) {
-        length += segments[i].seg_len;
-    }
-
-    return length;
-}
-
-/* represent BTL chosen for sending request */
-struct mca_pml_bfo_com_btl_t {
-    mca_bml_base_btl_t *bml_btl;
-    struct mca_mpool_base_registration_t* btl_reg;
-    size_t length;
-};
-typedef struct mca_pml_bfo_com_btl_t mca_pml_bfo_com_btl_t;
-
-int mca_pml_bfo_com_btl_comp(const void *v1, const void *v2);
-
-/* Calculate what percentage of a message to send through each BTL according to
- * relative weight */
-static inline void
-mca_pml_bfo_calc_weighted_length( mca_pml_bfo_com_btl_t *btls, int num_btls, size_t size,
-                                  double weight_total )
-{
-    int i;
-    size_t length_left;
-
-    /* shortcut for common case for only one BTL */
-    if( OPAL_LIKELY(1 == num_btls) ) {
-        btls[0].length = size;
-        return;
-    }
-
-    /* sort BTLs according of their weights so BTLs with smaller weight will
-     * not hijack all of the traffic */
-    qsort( btls, num_btls, sizeof(mca_pml_bfo_com_btl_t),
-           mca_pml_bfo_com_btl_comp );
-
-    for(length_left = size, i = 0; i < num_btls; i++) {
-        mca_bml_base_btl_t* bml_btl = btls[i].bml_btl;
-        size_t length = 0;
-        if( OPAL_UNLIKELY(0 != length_left) ) {
-            length = (length_left > bml_btl->btl->btl_eager_limit)?
-                ((size_t)(size * (bml_btl->btl_weight / weight_total))) :
-                length_left;
-
-            if(length > length_left)
-                length = length_left;
-            length_left -= length;
-        }
-        btls[i].length = length;
-    }
-
-    /* account for rounding errors */
-    btls[0].length += length_left;
-}
-
-#endif
--- a/ompi/mca/pml/bfo/pml_bfo_comm.c
+++ b/ompi/mca/pml/bfo/pml_bfo_comm.c
@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2006 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2010-2012 Oracle and/or its affiliates.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-#include <string.h>
-
-#include "pml_bfo.h"
-#include "pml_bfo_comm.h"
-
-
-
-static void mca_pml_bfo_comm_proc_construct(mca_pml_bfo_comm_proc_t* proc)
-{
-    proc->expected_sequence = 1;
-    proc->ompi_proc = NULL;
-    proc->send_sequence = 0;
-    OBJ_CONSTRUCT(&proc->frags_cant_match, opal_list_t);
-    OBJ_CONSTRUCT(&proc->specific_receives, opal_list_t);
-    OBJ_CONSTRUCT(&proc->unexpected_frags, opal_list_t);
-}
-
-
-static void mca_pml_bfo_comm_proc_destruct(mca_pml_bfo_comm_proc_t* proc)
-{
-    OBJ_DESTRUCT(&proc->frags_cant_match);
-    OBJ_DESTRUCT(&proc->specific_receives);
-    OBJ_DESTRUCT(&proc->unexpected_frags);
-}
-
-
-static OBJ_CLASS_INSTANCE(
-    mca_pml_bfo_comm_proc_t,
-    opal_object_t,
-    mca_pml_bfo_comm_proc_construct,
-    mca_pml_bfo_comm_proc_destruct);
-
-
-static void mca_pml_bfo_comm_construct(mca_pml_bfo_comm_t* comm)
-{
-    OBJ_CONSTRUCT(&comm->wild_receives, opal_list_t);
-    OBJ_CONSTRUCT(&comm->matching_lock, opal_mutex_t);
-    comm->recv_sequence = 0;
-    comm->procs = NULL;
-    comm->last_probed = 0;
-    comm->num_procs = 0;
-}
-
-
-static void mca_pml_bfo_comm_destruct(mca_pml_bfo_comm_t* comm)
-{
-    size_t i;
-    for(i=0; i<comm->num_procs; i++)
-        OBJ_DESTRUCT((&comm->procs[i]));
-    if(NULL != comm->procs)
-        free(comm->procs);
-    OBJ_DESTRUCT(&comm->wild_receives);
-    OBJ_DESTRUCT(&comm->matching_lock);
-}
-
-
-OBJ_CLASS_INSTANCE(
-    mca_pml_bfo_comm_t,
-    opal_object_t,
-    mca_pml_bfo_comm_construct,
-    mca_pml_bfo_comm_destruct);
-
-
-int mca_pml_bfo_comm_init_size(mca_pml_bfo_comm_t* comm, size_t size)
-{
-    size_t i;
-
-    /* send message sequence-number support - sender side */
-    comm->procs = (mca_pml_bfo_comm_proc_t*)malloc(sizeof(mca_pml_bfo_comm_proc_t)*size);
-    if(NULL == comm->procs) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-    for(i=0; i<size; i++) {
-        OBJ_CONSTRUCT(comm->procs+i, mca_pml_bfo_comm_proc_t);
-    }
-    comm->num_procs = size;
-    return OMPI_SUCCESS;
-}
-
-
--- a/ompi/mca/pml/bfo/pml_bfo_comm.h
+++ b/ompi/mca/pml/bfo/pml_bfo_comm.h
@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2006 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2010-2012 Oracle and/or its affiliates.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/**
- * @file
- */
-#ifndef MCA_PML_BFO_COMM_H
-#define MCA_PML_BFO_COMM_H
-
-#include "opal/threads/mutex.h"
-#include "opal/class/opal_list.h"
-#include "ompi/proc/proc.h"
-BEGIN_C_DECLS
-
-
-struct mca_pml_bfo_comm_proc_t {
-    opal_object_t super;
-    uint16_t expected_sequence;    /**< send message sequence number - receiver side */
-    struct ompi_proc_t* ompi_proc;
-#if OPAL_ENABLE_MULTI_THREADS
-    volatile int32_t send_sequence; /**< send side sequence number */
-#else
-    int32_t send_sequence; /**< send side sequence number */
-#endif
-    opal_list_t frags_cant_match;  /**< out-of-order fragment queues */
-    opal_list_t specific_receives; /**< queues of unmatched specific receives */
-    opal_list_t unexpected_frags;  /**< unexpected fragment queues */
-};
-typedef struct mca_pml_bfo_comm_proc_t mca_pml_bfo_comm_proc_t;
-
-
-/**
- *  Cached on ompi_communicator_t to hold queues/state
- *  used by the PML<->PTL interface for matching logic.
- */
-struct mca_pml_comm_t {
-    opal_object_t super;
-#if OPAL_ENABLE_MULTI_THREADS
-    volatile uint32_t recv_sequence;  /**< recv request sequence number - receiver side */
-#else
-    uint32_t recv_sequence;  /**< recv request sequence number - receiver side */
-#endif
-    opal_mutex_t matching_lock;   /**< matching lock */
-    opal_list_t wild_receives;    /**< queue of unmatched wild (source process not specified) receives */
-    mca_pml_bfo_comm_proc_t* procs;
-    size_t num_procs;
-    size_t last_probed;
-};
-typedef struct mca_pml_comm_t mca_pml_bfo_comm_t;
-
-OBJ_CLASS_DECLARATION(mca_pml_bfo_comm_t);
-
-
-/**
- * Initialize an instance of mca_pml_bfo_comm_t based on the communicator size.
- *
- * @param  comm   Instance of mca_pml_bfo_comm_t
- * @param  size   Size of communicator
- * @return        OMPI_SUCCESS or error status on failure.
- */
-
-extern int mca_pml_bfo_comm_init_size(mca_pml_bfo_comm_t* comm, size_t size);
-
-END_C_DECLS
-#endif
-
--- a/ompi/mca/pml/bfo/pml_bfo_component.c
+++ b/ompi/mca/pml/bfo/pml_bfo_component.c
@ -1,274 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2009 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2007-2010 Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2010      Oracle and/or its affiliates.  All rights reserved.
- * Copyright (c) 2014      Research Organization for Information Science
- *                         and Technology (RIST). All rights reserved.
- * Copyright (c) 2015      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-#include "opal/mca/event/event.h"
-#include "mpi.h"
-#include "ompi/runtime/params.h"
-#include "ompi/mca/pml/pml.h"
-#include "ompi/mca/pml/base/pml_base_bsend.h"
-#include "pml_bfo.h"
-#include "pml_bfo_hdr.h"
-#include "pml_bfo_sendreq.h"
-#include "pml_bfo_recvreq.h"
-#include "pml_bfo_rdmafrag.h"
-#include "pml_bfo_recvfrag.h"
-#include "ompi/mca/bml/base/base.h"
-#include "pml_bfo_component.h"
-#include "opal/mca/allocator/base/base.h"
-#include "opal/runtime/opal_params.h"
-
-OBJ_CLASS_INSTANCE( mca_pml_bfo_pckt_pending_t,
-                    ompi_free_list_item_t,
-                    NULL,
-                    NULL );
-
-static int mca_pml_bfo_component_register(void);
-static int mca_pml_bfo_component_open(void);
-static int mca_pml_bfo_component_close(void);
-static mca_pml_base_module_t*
-mca_pml_bfo_component_init( int* priority, bool enable_progress_threads,
-                            bool enable_mpi_threads );
-static int mca_pml_bfo_component_fini(void);
-int mca_pml_bfo_output = 0;
-static int mca_pml_bfo_verbose = 0;
-
-mca_pml_base_component_2_0_0_t mca_pml_bfo_component = {
-
-    /* First, the mca_base_component_t struct containing meta
-       information about the component itself */
-
-    .pmlm_version = {
-        MCA_PML_BASE_VERSION_2_0_0,
-
-        .mca_component_name = "bfo",
-        MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
-                              OMPI_RELEASE_VERSION),
-        .mca_open_component = mca_pml_bfo_component_open,
-        .mca_close_component = mca_pml_bfo_component_close,
-        .mca_register_component_params = mca_pml_bfo_component_register,
-    },
-    .pmlm_data = {
-        /* The component is checkpoint ready */
-        MCA_BASE_METADATA_PARAM_CHECKPOINT
-    },
-
-    .pmlm_init = mca_pml_bfo_component_init,
-    .pmlm_finalize = mca_pml_bfo_component_fini,
-};
-
-void *mca_pml_bfo_seg_alloc( struct mca_mpool_base_module_t* mpool,
-                             size_t* size,
-                             mca_mpool_base_registration_t** registration);
-
-void mca_pml_bfo_seg_free( struct mca_mpool_base_module_t* mpool,
-                           void* segment );
-
-static inline int mca_pml_bfo_param_register_int(
-    const char* param_name,
-    int default_value,
-    int *storage)
-{
-    *storage = default_value;
-    (void) mca_base_component_var_register(&mca_pml_bfo_component.pmlm_version, param_name,
-                                           NULL, MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                           OPAL_INFO_LVL_9,
-                                           MCA_BASE_VAR_SCOPE_READONLY, storage);
-
-    return *storage;
-}
-
-static inline unsigned int mca_pml_bfo_param_register_uint(
-    const char* param_name,
-    unsigned int default_value,
-    unsigned int *storage)
-{
-    *storage = default_value;
-    (void) mca_base_component_var_register(&mca_pml_bfo_component.pmlm_version, param_name,
-                                           NULL, MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0,
-                                           OPAL_INFO_LVL_9,
-                                           MCA_BASE_VAR_SCOPE_READONLY, storage);
-
-    return *storage;
-}
-
-static int mca_pml_bfo_component_register(void)
-{
-    int default_priority;
-
-#if PML_BFO
-    default_priority = 5;
-#else /* PML_BFO */
-    default_priority = 20;
-        mca_pml_bfo_param_register_int("priority", 20);
-#endif /* PML_BFO */
-
-    (void) mca_pml_bfo_param_register_int("verbose", 0, &mca_pml_bfo_verbose);
-    (void) mca_pml_bfo_param_register_int("free_list_num", 4, &mca_pml_bfo.free_list_num);
-    (void) mca_pml_bfo_param_register_int("free_list_max", -1, &mca_pml_bfo.free_list_max);
-    (void) mca_pml_bfo_param_register_int("free_list_inc", 64, &mca_pml_bfo.free_list_inc);
-    (void) mca_pml_bfo_param_register_int("priority", default_priority, &mca_pml_bfo.priority);
-    (void) mca_pml_bfo_param_register_uint("send_pipeline_depth", 3, &mca_pml_bfo.send_pipeline_depth);
-    (void) mca_pml_bfo_param_register_uint("recv_pipeline_depth", 4, &mca_pml_bfo.recv_pipeline_depth);
-    (void) mca_pml_bfo_param_register_uint("rdma_put_retries_limit", 5, &mca_pml_bfo.rdma_put_retries_limit);
-    (void) mca_pml_bfo_param_register_int("max_rdma_per_request", 4, &mca_pml_bfo.max_rdma_per_request);
-    (void) mca_pml_bfo_param_register_int("max_send_per_range", 4, &mca_pml_bfo.max_send_per_range);
-    (void) mca_pml_bfo_param_register_uint("unexpected_limit", 128, &mca_pml_bfo.unexpected_limit);
-
-    mca_pml_bfo.allocator_name = "bucket";
-    (void) mca_base_component_var_register(&mca_pml_bfo_component.pmlm_version,
-                                           "allocator",
-                                           "Name of allocator component for unexpected messages",
-                                           MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
-                                           OPAL_INFO_LVL_9,
-                                           MCA_BASE_VAR_SCOPE_READONLY,
-                                           &mca_pml_bfo.allocator_name);
-
-    return OMPI_SUCCESS;
-}
-
-static int mca_pml_bfo_component_open(void)
-{
-    mca_pml_bfo_output = opal_output_open(NULL);
-    opal_output_set_verbosity(mca_pml_bfo_output, mca_pml_bfo_verbose);
-
-    mca_pml_bfo.enabled = false;
-    return mca_base_framework_open(&ompi_bml_base_framework, 0);
-}
-
-
-static int mca_pml_bfo_component_close(void)
-{
-    int rc;
-
-    if (OMPI_SUCCESS != (rc = mca_base_framework_close(&ompi_bml_base_framework))) {
-         return rc;
-    }
-    opal_output_close(mca_pml_bfo_output);
-
-    return OMPI_SUCCESS;
-}
-
-
-static mca_pml_base_module_t*
-mca_pml_bfo_component_init( int* priority,
-                            bool enable_progress_threads,
-                            bool enable_mpi_threads )
-{
-    mca_allocator_base_component_t* allocator_component;
-
-    opal_output_verbose( 10, mca_pml_bfo_output,
-                         "in bfo, my priority is %d\n", mca_pml_bfo.priority);
-
-    if((*priority) > mca_pml_bfo.priority) {
-        *priority = mca_pml_bfo.priority;
-        return NULL;
-    }
-    *priority = mca_pml_bfo.priority;
-
-    allocator_component = mca_allocator_component_lookup( mca_pml_bfo.allocator_name );
-    if(NULL == allocator_component) {
-        opal_output(0, "mca_pml_bfo_component_init: can't find allocator: %s\n", mca_pml_bfo.allocator_name);
-        return NULL;
-    }
-
-    mca_pml_bfo.allocator = allocator_component->allocator_init(true,
-                                                                mca_pml_bfo_seg_alloc,
-                                                                mca_pml_bfo_seg_free, NULL);
-    if(NULL == mca_pml_bfo.allocator) {
-        opal_output(0, "mca_pml_bfo_component_init: unable to initialize allocator\n");
-        return NULL;
-    }
-
-
-    if(OMPI_SUCCESS != mca_bml_base_init( enable_progress_threads,
-                                          enable_mpi_threads)) {
-        return NULL;
-    }
-
-    /* Set this here (vs in component_open()) because
-       opal_leave_pinned* may have been set after MCA params were
-       read (e.g., by the openib btl) */
-    mca_pml_bfo.leave_pinned = (1 == opal_leave_pinned);
-    mca_pml_bfo.leave_pinned_pipeline = (int) opal_leave_pinned_pipeline;
-
-    return &mca_pml_bfo.super;
-}
-
-int mca_pml_bfo_component_fini(void)
-{
-    int rc;
-
-    /* Shutdown BML */
-    if(OMPI_SUCCESS != (rc = mca_bml.bml_finalize()))
-        return rc;
-
-    if(!mca_pml_bfo.enabled)
-        return OMPI_SUCCESS; /* never selected.. return success.. */
-    mca_pml_bfo.enabled = false;  /* not anymore */
-
-    OBJ_DESTRUCT(&mca_pml_bfo.rdma_pending);
-    OBJ_DESTRUCT(&mca_pml_bfo.pckt_pending);
-    OBJ_DESTRUCT(&mca_pml_bfo.recv_pending);
-    OBJ_DESTRUCT(&mca_pml_bfo.send_pending);
-    OBJ_DESTRUCT(&mca_pml_bfo.non_existing_communicator_pending);
-    OBJ_DESTRUCT(&mca_pml_bfo.buffers);
-    OBJ_DESTRUCT(&mca_pml_bfo.pending_pckts);
-    OBJ_DESTRUCT(&mca_pml_bfo.recv_frags);
-    OBJ_DESTRUCT(&mca_pml_bfo.rdma_frags);
-    OBJ_DESTRUCT(&mca_pml_bfo.lock);
-
-    if(OMPI_SUCCESS != (rc = mca_pml_bfo.allocator->alc_finalize(mca_pml_bfo.allocator))) {
-        return rc;
-    }
-
-#if 0
-    if (mca_pml_base_send_requests.fl_num_allocated !=
-        mca_pml_base_send_requests.super.opal_list_length) {
-        opal_output(0, "bfo send requests: %d allocated %d returned\n",
-                    mca_pml_base_send_requests.fl_num_allocated,
-                    mca_pml_base_send_requests.super.opal_list_length);
-    }
-    if (mca_pml_base_recv_requests.fl_num_allocated !=
-        mca_pml_base_recv_requests.super.opal_list_length) {
-        opal_output(0, "bfo recv requests: %d allocated %d returned\n",
-                    mca_pml_base_recv_requests.fl_num_allocated,
-                    mca_pml_base_recv_requests.super.opal_list_length);
-    }
-#endif
-
-    return OMPI_SUCCESS;
-}
-
-void *mca_pml_bfo_seg_alloc( struct mca_mpool_base_module_t* mpool,
-                             size_t* size,
-                             mca_mpool_base_registration_t** registration) {
-    return malloc(*size);
-}
-
-void mca_pml_bfo_seg_free( struct mca_mpool_base_module_t* mpool,
-                           void* segment ) {
-    free(segment);
-}
--- a/ompi/mca/pml/bfo/pml_bfo_component.h
+++ b/ompi/mca/pml/bfo/pml_bfo_component.h
@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2006 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2010      Oracle and/or its affiliates.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/**
- *  @file
- */
-
-#ifndef MCA_PML_BFO_COMPONENT_H
-#define MCA_PML_BFO_COMPONENT_H
-
-BEGIN_C_DECLS
-
-/*
- * PML module functions.
- */
-OMPI_MODULE_DECLSPEC extern mca_pml_base_component_2_0_0_t mca_pml_bfo_component;
-
-END_C_DECLS
-
-#endif
--- a/ompi/mca/pml/bfo/pml_bfo_cuda.c
+++ b/ompi/mca/pml/bfo/pml_bfo_cuda.c
@ -1,157 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2008 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2008      UT-Battelle, LLC. All rights reserved.
- * Copyright (c) 2010-2012 Oracle and/or its affiliates.  All rights reserved.
- * Copyright (c) 2012-2015 NVIDIA Corporation.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-
-#include "ompi_config.h"
-#include "opal/prefetch.h"
-#include "opal/mca/btl/btl.h"
-#include "opal/mca/mpool/mpool.h"
-#include "ompi/constants.h"
-#include "ompi/mca/pml/pml.h"
-#include "pml_bfo.h"
-#include "pml_bfo_hdr.h"
-#include "pml_bfo_rdmafrag.h"
-#include "pml_bfo_recvreq.h"
-#include "pml_bfo_sendreq.h"
-#include "ompi/mca/bml/base/base.h"
-#include "ompi/memchecker.h"
-
-size_t mca_pml_bfo_rdma_cuda_btls(
-    mca_bml_base_endpoint_t* bml_endpoint,
-    unsigned char* base,
-    size_t size,
-    mca_pml_bfo_com_btl_t* rdma_btls);
-
-int mca_pml_bfo_cuda_need_buffers(void * rreq,
-                                  mca_btl_base_module_t* btl);
-
-/**
- * Handle the CUDA buffer.
- */
-int mca_pml_bfo_send_request_start_cuda(mca_pml_bfo_send_request_t* sendreq,
-                                        mca_bml_base_btl_t* bml_btl,
-                                        size_t size) {
-    int rc;
-    sendreq->req_send.req_base.req_convertor.flags &= ~CONVERTOR_CUDA;
-    if (opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) {
-        unsigned char *base;
-        opal_convertor_get_current_pointer( &sendreq->req_send.req_base.req_convertor, (void**)&base );
-        /* Set flag back */
-        sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA;
-        if( 0 != (sendreq->req_rdma_cnt = (uint32_t)mca_pml_bfo_rdma_cuda_btls(
-                                                                           sendreq->req_endpoint,
-                                                                           base,
-                                                                           sendreq->req_send.req_bytes_packed,
-                                                                           sendreq->req_rdma))) {
-            rc = mca_pml_bfo_send_request_start_rdma(sendreq, bml_btl,
-                                                     sendreq->req_send.req_bytes_packed);
-            if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
-                mca_pml_bfo_free_rdma_resources(sendreq);
-            }
-        } else {
-            if (bml_btl->btl_flags & MCA_BTL_FLAGS_CUDA_PUT) {
-                rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, size,
-                                                         MCA_PML_BFO_HDR_FLAGS_CONTIG);
-            } else {
-                rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, size, 0);
-            }
-        }
-    } else {
-        /* Do not send anything with first rendezvous message as copying GPU
-         * memory into RNDV message is expensive. */
-        sendreq->req_send.req_base.req_convertor.flags |= CONVERTOR_CUDA;
-        rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, 0, 0);
-    }
-    return rc;
-}
-
-
-
-size_t mca_pml_bfo_rdma_cuda_btls(
-    mca_bml_base_endpoint_t* bml_endpoint,
-    unsigned char* base,
-    size_t size,
-    mca_pml_bfo_com_btl_t* rdma_btls)
-{
-    int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send);
-    double weight_total = 0;
-    int num_btls_used = 0, n;
-
-    /* shortcut when there are no rdma capable btls */
-    if(num_btls == 0) {
-        return 0;
-    }
-
-    /* check to see if memory is registered */
-    for(n = 0; n < num_btls && num_btls_used < mca_pml_bfo.max_rdma_per_request;
-            n++) {
-        mca_bml_base_btl_t* bml_btl =
-            mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n);
-
-        if (bml_btl->btl_flags & MCA_BTL_FLAGS_CUDA_GET) {
-            mca_mpool_base_registration_t* reg = NULL;
-            mca_mpool_base_module_t *btl_mpool = bml_btl->btl->btl_mpool;
-
-            if( NULL != btl_mpool ) {
-                /* register the memory */
-                btl_mpool->mpool_register(btl_mpool, base, size, 0, &reg);
-            }
-
-            if(NULL == reg)
-                continue;
-
-            rdma_btls[num_btls_used].bml_btl = bml_btl;
-            rdma_btls[num_btls_used].btl_reg = reg;
-            weight_total += bml_btl->btl_weight;
-            num_btls_used++;
-        }
-    }
-
-    /* if we don't use leave_pinned and all BTLs that already have this memory
-     * registered amount to less then half of available bandwidth - fall back to
-     * pipeline protocol */
-    if(0 == num_btls_used || (!mca_pml_bfo.leave_pinned && weight_total < 0.5))
-        return 0;
-
-    mca_pml_bfo_calc_weighted_length(rdma_btls, num_btls_used, size,
-                                     weight_total);
-
-    return num_btls_used;
-}
-
-int mca_pml_bfo_cuda_need_buffers(void * rreq,
-                                  mca_btl_base_module_t* btl)
-{
-    mca_pml_bfo_recv_request_t* recvreq = (mca_pml_bfo_recv_request_t*)rreq;
-    if ((recvreq->req_recv.req_base.req_convertor.flags & CONVERTOR_CUDA) &&
-        (btl->btl_flags & MCA_BTL_FLAGS_CUDA_GET)) {
-        recvreq->req_recv.req_base.req_convertor.flags &= ~CONVERTOR_CUDA;
-        if(opal_convertor_need_buffers(&recvreq->req_recv.req_base.req_convertor) == true) {
-            recvreq->req_recv.req_base.req_convertor.flags |= CONVERTOR_CUDA;
-            return true;
-        } else {
-            recvreq->req_recv.req_base.req_convertor.flags |= CONVERTOR_CUDA;
-            return false;
-        }
-    }
-    return true;
-}
-
--- a/ompi/mca/pml/bfo/pml_bfo_failover.c
+++ b/ompi/mca/pml/bfo/pml_bfo_failover.c
--- a/ompi/mca/pml/bfo/pml_bfo_failover.h
+++ b/ompi/mca/pml/bfo/pml_bfo_failover.h
@ -1,398 +0,0 @@
-/*
- * Copyright (c) 2010      Oracle and/or its affiliates.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- * Functions that implement failover capabilities.
- */
-
-#ifndef MCA_PML_BFO_FAILOVER_H
-#define MCA_PML_BFO_FAILOVER_H
-
-#include "opal/mca/btl/btl.h"
-#include "pml_bfo_hdr.h"
-
-BEGIN_C_DECLS
-
-bool mca_pml_bfo_is_duplicate_msg(mca_pml_bfo_comm_proc_t* proc,
-                                  mca_pml_bfo_match_hdr_t *hdr);
-bool mca_pml_bfo_is_duplicate_fin(mca_pml_bfo_hdr_t* hdr, mca_btl_base_descriptor_t* rdma,
-                                  mca_btl_base_module_t* btl);
-
-mca_pml_bfo_recv_request_t* mca_pml_bfo_get_request(mca_pml_bfo_match_hdr_t *hdr);
-
-void mca_pml_bfo_send_request_restart(mca_pml_bfo_send_request_t* sendreq,
-                                      bool repost, mca_btl_base_tag_t tag);
-void mca_pml_bfo_send_request_rndvrestartnotify(mca_pml_bfo_send_request_t* sendreq,
-                                      bool repost, mca_btl_base_tag_t tag, int status,
-                                      mca_btl_base_module_t* btl);
-
-void
-mca_pml_bfo_rndvrestartnotify_completion(mca_btl_base_module_t* btl,
-                                         struct mca_btl_base_endpoint_t* ep,
-                                         struct mca_btl_base_descriptor_t* des,
-                                         int status);
-void
-mca_pml_bfo_check_recv_ctl_completion_status(mca_btl_base_module_t* btl,
-                                             struct mca_btl_base_descriptor_t* des,
-                                             int status);
-
-/* Reset a receive request to the beginning */
-void mca_pml_bfo_recv_request_reset(mca_pml_bfo_recv_request_t* recvreq);
-/* Notify sender that receiver detected an error */
-void mca_pml_bfo_recv_request_recverrnotify(mca_pml_bfo_recv_request_t* recvreq,
-                                            mca_btl_base_tag_t tag, int status);
-/* Ack the RNDVRESTARTNOTIFY message */
-void mca_pml_bfo_recv_request_rndvrestartack(mca_pml_bfo_recv_request_t* recvreq,
-                                             mca_btl_base_tag_t tag, int status,
-                                             mca_btl_base_module_t* btl);
-/* Nack the RNDVRESTARTNOTIFY message */
-void mca_pml_bfo_recv_request_rndvrestartnack(mca_btl_base_descriptor_t* olddes,
-                                              ompi_proc_t* ompi_proc, bool repost);
-
-void mca_pml_bfo_recv_restart_completion(mca_btl_base_module_t* btl,
-                                         struct mca_btl_base_endpoint_t* ep,
-                                         struct mca_btl_base_descriptor_t* des,
-                                         int status);
-void mca_pml_bfo_failover_error_handler(struct mca_btl_base_module_t* btl,
-                                        int32_t flags, ompi_proc_t *errproc, char *btlname);
-void mca_pml_bfo_repost_match_fragment(struct mca_btl_base_descriptor_t* des);
-void mca_pml_bfo_repost_fin(struct mca_btl_base_descriptor_t* des);
-
-void mca_pml_bfo_map_out_btl(struct mca_btl_base_module_t* btl,
-                             ompi_proc_t *errproc, char *btlname);
-
-extern void mca_pml_bfo_map_out( mca_btl_base_module_t *btl,
-                                 mca_btl_base_tag_t tag,
-                                 mca_btl_base_descriptor_t* descriptor,
-                                 void* cbdata );
-
-int mca_pml_bfo_register_callbacks(void);
-
-void mca_pml_bfo_update_rndv_fields(mca_pml_bfo_hdr_t* hdr,
-                                    mca_pml_bfo_send_request_t*, char *type);
-
-void mca_pml_bfo_update_bml_btl(mca_bml_base_btl_t** bml_btl, mca_btl_base_module_t* btl,
-				struct mca_btl_base_descriptor_t* des);
-
-void mca_pml_bfo_find_recvreq_eager_bml_btl(mca_bml_base_btl_t** bml_btl,
-                                            mca_btl_base_module_t* btl,
-                                            mca_pml_bfo_recv_request_t* recvreq,
-                                            char* type);
-
-void mca_pml_bfo_find_sendreq_eager_bml_btl(mca_bml_base_btl_t** bml_btl,
-                                            mca_btl_base_module_t* btl,
-                                            mca_pml_bfo_send_request_t* sendreq,
-                                            char* type);
-
-void mca_pml_bfo_find_sendreq_rdma_bml_btl(mca_bml_base_btl_t** bml_btl,
-                                           mca_btl_base_module_t* btl,
-                                           mca_pml_bfo_send_request_t* sendreq,
-                                           char* type);
-
-void mca_pml_bfo_update_eager_bml_btl_recv_ctl(mca_bml_base_btl_t** bml_btl,
-                                               mca_btl_base_module_t* btl,
-                                               struct mca_btl_base_descriptor_t* des);
-void mca_pml_bfo_find_recvreq_rdma_bml_btl(mca_bml_base_btl_t** bml_btl,
-                                           mca_btl_base_module_t* btl,
-                                           mca_pml_bfo_recv_request_t* recvreq,
-                                           char* type);
-
-bool mca_pml_bfo_rndv_completion_status_error(struct mca_btl_base_descriptor_t* des,
-                                              mca_pml_bfo_send_request_t* sendreq);
-void mca_pml_bfo_send_ctl_completion_status_error(struct mca_btl_base_descriptor_t* des);
-
-
-void mca_pml_bfo_completion_sendreq_has_error(mca_pml_bfo_send_request_t* sendreq,
-					      int status,
-					      mca_btl_base_module_t* btl,
-					      int type,
-					      char *description);
-/**
- * Four new callbacks for the four new message types.
- */
-extern void mca_pml_bfo_recv_frag_callback_rndvrestartnotify( mca_btl_base_module_t *btl,
-                                                              mca_btl_base_tag_t tag,
-                                                              mca_btl_base_descriptor_t* descriptor,
-                                                              void* cbdata );
-
-extern void mca_pml_bfo_recv_frag_callback_rndvrestartack( mca_btl_base_module_t *btl,
-                                                           mca_btl_base_tag_t tag,
-                                                           mca_btl_base_descriptor_t* descriptor,
-                                                           void* cbdata );
-
-extern void mca_pml_bfo_recv_frag_callback_rndvrestartnack( mca_btl_base_module_t *btl,
-                                                            mca_btl_base_tag_t tag,
-                                                            mca_btl_base_descriptor_t* descriptor,
-                                                            void* cbdata );
-
-extern void mca_pml_bfo_recv_frag_callback_recverrnotify( mca_btl_base_module_t *btl,
-                                                          mca_btl_base_tag_t tag,
-                                                          mca_btl_base_descriptor_t* descriptor,
-                                                          void* cbdata );
-
-/**
- * A bunch of macros to help isolate failover code from regular ob1 code.
- */
-
-/* Drop any ACK fragments if request is in error state.  Do not want
- * to initiate any more activity. */
-#define MCA_PML_BFO_ERROR_CHECK_ON_ACK_CALLBACK(sendreq)                          \
-    if( OPAL_UNLIKELY((sendreq)->req_error)) {                                    \
-         opal_output_verbose(20, mca_pml_bfo_output,                              \
-                             "ACK: received: dropping because request in error, " \
-                             "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d",   \
-                             (uint16_t)(sendreq)->req_send.req_base.req_sequence, \
-                             (sendreq)->req_restartseq,                           \
-                             (void *)(sendreq), (sendreq)->req_recv.pval,         \
-                             (sendreq)->req_send.req_base.req_peer);              \
-        return;                                                                   \
-    }
-
-/* Drop any FRAG fragments if request is in error state.  Do not want
- * to initiate any more activity. */
-#define MCA_PML_BFO_ERROR_CHECK_ON_FRAG_CALLBACK(recvreq)                                \
-    if( OPAL_UNLIKELY((recvreq)->req_errstate)) {                                        \
-        opal_output_verbose(20, mca_pml_bfo_output,                                      \
-                            "FRAG: received: dropping because request in error, "        \
-                            "PML=%d, src_req=%p, dst_req=%p, peer=%d, offset=%d",        \
-                            (uint16_t)(recvreq)->req_msgseq,                             \
-                            (recvreq)->remote_req_send.pval,                             \
-                            (void *)(recvreq),                                           \
-                            (recvreq)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, \
-                            (int)hdr->hdr_frag.hdr_frag_offset);                         \
-        return;                                                                          \
-    }
-
-/* Drop any PUT fragments if request is in error state.  Do not want
- * to initiate any more activity. */
-#define MCA_PML_BFO_ERROR_CHECK_ON_PUT_CALLBACK(sendreq)                          \
-    if( OPAL_UNLIKELY((sendreq)->req_error)) {                                    \
-         opal_output_verbose(20, mca_pml_bfo_output,                              \
-                             "PUT: received: dropping because request in error, " \
-                             "PML=%d, src_req=%p, dst_req=%p, peer=%d",           \
-                             (uint16_t)(sendreq)->req_send.req_base.req_sequence, \
-                             (void *)(sendreq), (sendreq)->req_recv.pval,         \
-                             (sendreq)->req_send.req_base.req_peer);              \
-        return;                                                                   \
-    }
-
-/**
- * Macros for pml_bfo_recvreq.c file.
- */
-
-/* This can happen if a FIN message arrives after the request was
- * marked in error.  So, just drop the message.  Note that the status
- * field is not being checked.  That is because the status field is the
- * value returned in the FIN hdr.hdr_fail field and may be used for
- * other things.  Note that we allow the various fields to be updated
- * in case this actually completes the request and the sending side
- * thinks it is done. */
-#define MCA_PML_BFO_ERROR_CHECK_ON_FIN_FOR_PUT(recvreq)                                   \
-    if( OPAL_UNLIKELY((recvreq)->req_errstate)) {                                         \
-        opal_output_verbose(20, mca_pml_bfo_output,                                       \
-                            "FIN: received on broken request, skipping, "                 \
-                            "PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d",            \
-                            (recvreq)->req_msgseq, (recvreq)->req_restartseq,             \
-                            (recvreq)->remote_req_send.pval, (void *)(recvreq),           \
-                            (recvreq)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); \
-        /* Even though in error, it still might complete.  */                             \
-        recv_request_pml_complete_check(recvreq);                                         \
-        return;                                                                           \
-    }
-
-#define MCA_PML_BFO_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq)                            \
-    if ((recvreq)->req_errstate) {                                                          \
-        opal_output_verbose(30, mca_pml_bfo_output,                                         \
-			    "RDMA read: completion failed, error already seen, "            \
-			    "PML=%d, RQS=%d, src_req=%lx, dst_req=%lx, peer=%d",            \
-			    (recvreq)->req_msgseq, (recvreq)->req_restartseq,               \
-			    (unsigned long)(recvreq)->remote_req_send.pval,                 \
-			    (unsigned long)(recvreq),                                       \
-			    (recvreq)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE);   \
-	return;                                                                             \
-    } else {                                                                                \
-	opal_output_verbose(30, mca_pml_bfo_output,                                         \
-			    "RDMA read: completion failed, sending RECVERRNOTIFY to "       \
-			    "sender, PML=%d, RQS=%d, src_req=%lx, dst_req=%lx, peer=%d",    \
-			    (recvreq)->req_msgseq, (recvreq)->req_restartseq,               \
-			    (unsigned long)(recvreq)->remote_req_send.pval,                 \
-			    (unsigned long)(recvreq),                                       \
-			    (recvreq)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE);   \
-	mca_pml_bfo_recv_request_recverrnotify(recvreq, MCA_PML_BFO_HDR_TYPE_RGET, status); \
-    }
-
-#define MCA_PML_BFO_SECOND_ERROR_CHECK_ON_RDMA_READ_COMPLETION(recvreq, status, btl)        \
-    /* See if the request has received a RNDVRESTARTNOTIFY */                               \
-    if( OPAL_UNLIKELY(recvreq->req_errstate)) {                                             \
-        if (recvreq->req_errstate & RECVREQ_RNDVRESTART_RECVED) {                           \
-            opal_output_verbose(30, mca_pml_bfo_output,                                     \
-                                "RDMA read: completion: recvreq has error, outstanding events=%d " \
-                                "PML=%d, RQS=%d, src_req=%lx, dst_req=%lx, status=%d, peer=%d",    \
-                                recvreq->req_events, recvreq->req_msgseq, recvreq->req_restartseq, \
-                                (unsigned long)recvreq->remote_req_send.pval,               \
-                                (unsigned long)recvreq, status,                             \
-                                recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); \
-            if (0 == recvreq->req_events) {                                                 \
-                mca_pml_bfo_recv_request_rndvrestartack(recvreq, MCA_PML_BFO_HDR_TYPE_RGET, \
-                                                        status, btl);                       \
-            }                                                                               \
-        }                                                                                   \
-        MCA_PML_BFO_RDMA_FRAG_RETURN(frag);                                                 \
-        return;                                                                             \
-    }
-
-/**
- * Macros for pml_bfo_sendreq.c file.
- */
-
-/* This macro is called on the sending side after receiving
- * a PUT message.  There is a chance that this PUT message
- * has shown up and is attempting to modify the state of
- * the req_state, but the req_state is no longer being tracked
- * because the RNDV message has turned into a RGET message
- * because it got an error on the RNDV completion.
- */
-#define MCA_PML_BFO_VERIFY_SENDREQ_REQ_STATE_VALUE(sendreq)  \
-    if (sendreq->req_state == -1) {                          \
-        OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, 1);           \
-    }
-
-/* Now check the error state.  This request can be in error if the
- * RNDV message made it over, but the receiver got an error trying to
- * send the ACK back and therefore sent a RECVERRNOTIFY message.  In
- * that case, we want to start the restart dance as the receiver has
- * matched this message already.  Only restart if there are no
- * outstanding events on send request. */
-#define MCA_PML_BFO_RNDV_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, type, description) \
-    if( OPAL_UNLIKELY ((sendreq)->req_error)) {                                             \
-        mca_pml_bfo_completion_sendreq_has_error(sendreq, status,                           \
-                                                 btl, type, description);                   \
-        return;                                                                             \
-    }
-
-/**
- * This macro is called within the frag completion function in two
- * places.  It is called to see if any errors occur prior to the
- * completion event on the frag.  It is then called a second time
- * after the scheduling routine is called as the scheduling routine
- * may have detected that a BTL that was cached on the request had
- * been removed and therefore marked the request in error.  In that
- * case, the scheduling of fragments can no longer proceed properly,
- * and if there are no outstanding events, iniated the restart dance.
- */
-#define MCA_PML_BFO_FRAG_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl, type, description) \
-    if( OPAL_UNLIKELY((sendreq)->req_error)) {                                              \
-        mca_pml_bfo_completion_sendreq_has_error(sendreq, status,                           \
-                                                 btl, type, description);                   \
-        return;                                                                             \
-    }
-
-/* This can happen if a FIN message arrives after the request was
- * marked in error.  So, just drop the message.  Note that the status
- * field is not checked here.  That is because that is the value
- * returned in the FIN hdr.hdr_fail field and may be used for other
- * things. */
-#define MCA_PML_BFO_RGET_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, btl, des)                 \
-    if( OPAL_UNLIKELY(sendreq->req_error)) {                                               \
-        opal_output_verbose(30, mca_pml_bfo_output,                                        \
-                            "FIN: received on broken request, skipping, "                  \
-                            "PML=%d, src_req=%lx, dst_req=%lx, peer=%d",                   \
-                            (uint16_t)sendreq->req_send.req_base.req_sequence,             \
-                            (unsigned long)sendreq, (unsigned long)sendreq->req_recv.pval, \
-                            sendreq->req_send.req_base.req_peer);                          \
-        btl->btl_free(btl, des);                                                           \
-        return;                                                                            \
-    }
-
-
-/* Check if there has been an error on the send request when we get
- * a completion event on the RDMA write. */
-#define MCA_PML_BFO_PUT_COMPLETION_SENDREQ_ERROR_CHECK(sendreq, status, btl)              \
-    if ( OPAL_UNLIKELY(sendreq->req_error)) {                                             \
-        mca_pml_bfo_completion_sendreq_has_error(sendreq, status, btl,                    \
-                                                 MCA_PML_BFO_HDR_TYPE_PUT, "RDMA write"); \
-        MCA_PML_BFO_RDMA_FRAG_RETURN(frag);                                               \
-        return;                                                                           \
-    }
-
-#define MCA_PML_BFO_CHECK_FOR_RNDV_RESTART(hdr, sendreq, type)  \
-    if (0 < sendreq->req_restartseq) {                          \
-        mca_pml_bfo_update_rndv_fields(hdr, sendreq, type);     \
-    }
-
-/* If a bml_btl gets mapped out, then we need to adjust it based
- * on the btl from the callback function.  These macros are called on
- * every callback to make sure things are copacetic.
- */
-#define MCA_PML_BFO_CHECK_EAGER_BML_BTL_ON_FIN_COMPLETION(bml_btl, btl, des)               \
-    if (bml_btl->btl != btl) {                                                             \
-        ompi_proc_t *proc = (ompi_proc_t*) des->des_cbdata;                                \
-        mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]; \
-        bml_btl = mca_bml_base_btl_array_find(&bml_endpoint->btl_eager, btl);              \
-    }
-#define MCA_PML_BFO_CHECK_SENDREQ_EAGER_BML_BTL(bml_btl, btl, sendreq, type)   \
-    if (bml_btl->btl != btl) {                                                 \
-        mca_pml_bfo_find_sendreq_eager_bml_btl(&bml_btl, btl, sendreq, type);  \
-    }
-#define MCA_PML_BFO_CHECK_SENDREQ_RDMA_BML_BTL(bml_btl, btl, sendreq, type)    \
-    if (bml_btl->btl != btl) {                                                 \
-        mca_pml_bfo_find_sendreq_rdma_bml_btl(&bml_btl, btl, sendreq, type);   \
-    }
-
-#define MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL(bml_btl, btl, recvreq, type)   \
-    if (bml_btl->btl != btl) {                                                 \
-        mca_pml_bfo_find_recvreq_eager_bml_btl(&bml_btl, btl, recvreq, type);  \
-    }
-
-#define MCA_PML_BFO_CHECK_RECVREQ_RDMA_BML_BTL(bml_btl, btl, recvreq, type)    \
-    if (bml_btl->btl != btl) {                                                 \
-        mca_pml_bfo_find_recvreq_rdma_bml_btl(&bml_btl, btl, recvreq, type);   \
-    }
-
-#define MCA_PML_BFO_CHECK_RECVREQ_EAGER_BML_BTL_RECV_CTL(bml_btl, btl, des)    \
-    if (bml_btl->btl != btl) {                                                 \
-        mca_pml_bfo_update_eager_bml_btl_recv_ctl(&bml_btl, btl, des);         \
-    }
-
-#define MCA_PML_BFO_CHECK_FOR_REMOVED_BML(sendreq, frag, btl)                             \
-    if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) {                                         \
-        opal_output_verbose(30, mca_pml_bfo_output,                                       \
-                            "PUT received: no matching BTL to RDMA write to, oustanding " \
-                            "events=%d, PML=%d, RQS=%d, src_req=%p, dst_req=%p, peer=%d", \
-                            sendreq->req_events,                                          \
-                            (uint16_t)sendreq->req_send.req_base.req_sequence,            \
-                            sendreq->req_restartseq, (void *)sendreq,                     \
-                            sendreq->req_recv.pval, sendreq->req_send.req_base.req_peer); \
-        MCA_PML_BFO_RDMA_FRAG_RETURN(frag);                                               \
-        sendreq->req_error++;                                                             \
-        if (0 == sendreq->req_events) {                                                   \
-            mca_pml_bfo_send_request_rndvrestartnotify(sendreq, false,                    \
-                                                       MCA_PML_BFO_HDR_TYPE_PUT,          \
-                                                       OMPI_ERROR, btl);                  \
-        }                                                                                 \
-        return;                                                                           \
-    }
-
-/* This macro checks to see if the cached number of BTLs in the
- * send request still matches the value from the endpoint.
- * If it does not, this means that a BTL was removed from the
- * available list.  In this case, start the request over.
- */
-#define MCA_PML_BFO_CHECK_FOR_REMOVED_BTL(sendreq, range)                       \
-    if ((int)mca_bml_base_btl_array_get_size(&sendreq->req_endpoint->btl_send)  \
-        != range->range_btl_cnt) {                                              \
-        sendreq->req_error++;                                                   \
-        return OMPI_ERROR;                                                      \
-    }
-
-
-END_C_DECLS
-
-#endif
--- a/ompi/mca/pml/bfo/pml_bfo_hdr.h
+++ b/ompi/mca/pml/bfo/pml_bfo_hdr.h
@ -1,539 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2009      IBM Corporation.  All rights reserved.
- * Copyright (c) 2010      Oracle and/or its affiliates.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/**
- * @file
- */
-#ifndef MCA_PML_BFO_HEADER_H
-#define MCA_PML_BFO_HEADER_H
-
-#include "ompi_config.h"
-#ifdef HAVE_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-#ifdef HAVE_NETINET_IN_H
-#include <netinet/in.h>
-#endif
-
-#include "opal/types.h"
-#include "opal/util/arch.h"
-#include "opal/mca/btl/btl.h"
-#include "ompi/proc/proc.h"
-
-#define MCA_PML_BFO_HDR_TYPE_MATCH     (MCA_BTL_TAG_PML + 1)
-#define MCA_PML_BFO_HDR_TYPE_RNDV      (MCA_BTL_TAG_PML + 2)
-#define MCA_PML_BFO_HDR_TYPE_RGET      (MCA_BTL_TAG_PML + 3)
-#define MCA_PML_BFO_HDR_TYPE_ACK       (MCA_BTL_TAG_PML + 4)
-#define MCA_PML_BFO_HDR_TYPE_NACK      (MCA_BTL_TAG_PML + 5)
-#define MCA_PML_BFO_HDR_TYPE_FRAG      (MCA_BTL_TAG_PML + 6)
-#define MCA_PML_BFO_HDR_TYPE_GET       (MCA_BTL_TAG_PML + 7)
-#define MCA_PML_BFO_HDR_TYPE_PUT       (MCA_BTL_TAG_PML + 8)
-#define MCA_PML_BFO_HDR_TYPE_FIN       (MCA_BTL_TAG_PML + 9)
-#if PML_BFO
-#define MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNOTIFY (MCA_BTL_TAG_PML + 10)
-#define MCA_PML_BFO_HDR_TYPE_RNDVRESTARTACK    (MCA_BTL_TAG_PML + 11)
-#define MCA_PML_BFO_HDR_TYPE_RNDVRESTARTNACK   (MCA_BTL_TAG_PML + 12)
-#define MCA_PML_BFO_HDR_TYPE_RECVERRNOTIFY     (MCA_BTL_TAG_PML + 13)
-#endif /* PML_BFO */
-
-#define MCA_PML_BFO_HDR_FLAGS_ACK     1  /* is an ack required */
-#define MCA_PML_BFO_HDR_FLAGS_NBO     2  /* is the hdr in network byte order */
-#define MCA_PML_BFO_HDR_FLAGS_PIN     4  /* is user buffer pinned */
-#define MCA_PML_BFO_HDR_FLAGS_CONTIG  8  /* is user buffer contiguous */
-#define MCA_PML_BFO_HDR_FLAGS_NORDMA  16 /* rest will be send by copy-in-out */
-#if PML_BFO
-#define MCA_PML_BFO_HDR_FLAGS_RESTART 32 /* restart RNDV because of error */
-#endif /* PML_BFO */
-
-/**
- * Common hdr attributes - must be first element in each hdr type
- */
-struct mca_pml_bfo_common_hdr_t {
-    uint8_t hdr_type;  /**< type of envelope */
-    uint8_t hdr_flags; /**< flags indicating how fragment should be processed */
-};
-typedef struct mca_pml_bfo_common_hdr_t mca_pml_bfo_common_hdr_t;
-
-#define MCA_PML_BFO_COMMON_HDR_NTOH(h)
-#define MCA_PML_BFO_COMMON_HDR_HTON(h)
-
-/**
- *  Header definition for the first fragment, contains the
- *  attributes required to match the corresponding posted receive.
- */
-struct mca_pml_bfo_match_hdr_t {
-    mca_pml_bfo_common_hdr_t hdr_common;   /**< common attributes */
-    uint16_t hdr_ctx;                      /**< communicator index */
-    int32_t  hdr_src;                      /**< source rank */
-    int32_t  hdr_tag;                      /**< user tag */
-    uint16_t hdr_seq;                      /**< message sequence number */
-#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
-    uint8_t  hdr_padding[2];               /**< explicitly pad to 16 bytes.  Compilers seem to already prefer to do this, but make it explicit just in case */
-#endif
-};
-#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
-#define OMPI_PML_BFO_MATCH_HDR_LEN  16
-#else
-#define OMPI_PML_BFO_MATCH_HDR_LEN  14
-#endif
-
-typedef struct mca_pml_bfo_match_hdr_t mca_pml_bfo_match_hdr_t;
-
-#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
-#define MCA_PML_BFO_MATCH_HDR_FILL(h) \
-do {                                  \
-    (h).hdr_padding[0] = 0;           \
-    (h).hdr_padding[1] = 0;           \
-} while(0)
-#else
-#define MCA_PML_BFO_MATCH_HDR_FILL(h)
-#endif  /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
-
-#define MCA_PML_BFO_MATCH_HDR_NTOH(h) \
-do { \
-    MCA_PML_BFO_COMMON_HDR_NTOH((h).hdr_common); \
-    (h).hdr_ctx = ntohs((h).hdr_ctx); \
-    (h).hdr_src = ntohl((h).hdr_src); \
-    (h).hdr_tag = ntohl((h).hdr_tag); \
-    (h).hdr_seq = ntohs((h).hdr_seq); \
-} while (0)
-
-#define MCA_PML_BFO_MATCH_HDR_HTON(h) \
-do { \
-    MCA_PML_BFO_COMMON_HDR_HTON((h).hdr_common); \
-    MCA_PML_BFO_MATCH_HDR_FILL(h);    \
-    (h).hdr_ctx = htons((h).hdr_ctx); \
-    (h).hdr_src = htonl((h).hdr_src); \
-    (h).hdr_tag = htonl((h).hdr_tag); \
-    (h).hdr_seq = htons((h).hdr_seq); \
-} while (0)
-
-/**
- * Header definition for the first fragment when an acknowledgment
- * is required. This could be the first fragment of a large message
- * or a short message that requires an ack (synchronous).
- */
-struct mca_pml_bfo_rendezvous_hdr_t {
-    mca_pml_bfo_match_hdr_t hdr_match;
-    uint64_t hdr_msg_length;            /**< message length */
-    opal_ptr_t hdr_src_req;             /**< pointer to source request - returned in ack */
-#if PML_BFO
-    opal_ptr_t hdr_dst_req;             /**< pointer to dst req */
-    uint8_t hdr_restartseq;             /**< restart sequence */
-#endif /* PML_BFO */
-};
-typedef struct mca_pml_bfo_rendezvous_hdr_t mca_pml_bfo_rendezvous_hdr_t;
-
-#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
-#define MCA_PML_BFO_RNDV_HDR_FILL(h) \
-    MCA_PML_BFO_MATCH_HDR_FILL((h).hdr_match)
-#else
-#define MCA_PML_BFO_RNDV_HDR_FILL(h)
-#endif  /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
-
-/* Note that hdr_src_req is not put in network byte order because it
-   is never processed by the receiver, other than being copied into
-   the ack header */
-#define MCA_PML_BFO_RNDV_HDR_NTOH(h) \
-    do { \
-        MCA_PML_BFO_MATCH_HDR_NTOH((h).hdr_match); \
-        (h).hdr_msg_length = ntoh64((h).hdr_msg_length); \
-    } while (0)
-
-#define MCA_PML_BFO_RNDV_HDR_HTON(h) \
-    do { \
-        MCA_PML_BFO_MATCH_HDR_HTON((h).hdr_match); \
-        MCA_PML_BFO_RNDV_HDR_FILL(h); \
-        (h).hdr_msg_length = hton64((h).hdr_msg_length); \
-    } while (0)
-
-/**
- * Header definition for a combined rdma rendezvous/get
- */
-struct mca_pml_bfo_rget_hdr_t {
-    mca_pml_bfo_rendezvous_hdr_t hdr_rndv;
-    uint32_t hdr_seg_cnt;                     /**< number of segments for rdma */
-#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
-    uint8_t hdr_padding[4];
-#endif
-    opal_ptr_t hdr_des;                       /**< source descriptor */
-};
-typedef struct mca_pml_bfo_rget_hdr_t mca_pml_bfo_rget_hdr_t;
-
-#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
-#define MCA_PML_BFO_RGET_HDR_FILL(h)         \
-do {                                         \
-    MCA_PML_BFO_RNDV_HDR_FILL((h).hdr_rndv); \
-    (h).hdr_padding[0] = 0;                  \
-    (h).hdr_padding[1] = 0;                  \
-    (h).hdr_padding[2] = 0;                  \
-    (h).hdr_padding[3] = 0;                  \
-} while(0)
-#else
-#define MCA_PML_BFO_RGET_HDR_FILL(h)
-#endif  /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
-
-#define MCA_PML_BFO_RGET_HDR_NTOH(h) \
-    do { \
-       MCA_PML_BFO_RNDV_HDR_NTOH((h).hdr_rndv); \
-        (h).hdr_seg_cnt = ntohl((h).hdr_seg_cnt); \
-    } while (0)
-
-#define MCA_PML_BFO_RGET_HDR_HTON(h) \
-    do { \
-        MCA_PML_BFO_RNDV_HDR_HTON((h).hdr_rndv); \
-        MCA_PML_BFO_RGET_HDR_FILL(h); \
-        (h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \
-    } while (0)
-
-/**
- *  Header for subsequent fragments.
- */
-struct mca_pml_bfo_frag_hdr_t {
-    mca_pml_bfo_common_hdr_t hdr_common;     /**< common attributes */
-#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
-    uint8_t hdr_padding[6];
-#endif
-    uint64_t hdr_frag_offset;                /**< offset into message */
-    opal_ptr_t hdr_src_req;                  /**< pointer to source request */
-    opal_ptr_t hdr_dst_req;                  /**< pointer to matched receive */
-};
-typedef struct mca_pml_bfo_frag_hdr_t mca_pml_bfo_frag_hdr_t;
-
-#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
-#define MCA_PML_BFO_FRAG_HDR_FILL(h) \
-do {                                 \
-  (h).hdr_padding[0] = 0;            \
-  (h).hdr_padding[1] = 0;            \
-  (h).hdr_padding[2] = 0;            \
-  (h).hdr_padding[3] = 0;            \
-  (h).hdr_padding[4] = 0;            \
-  (h).hdr_padding[5] = 0;            \
-} while(0)
-#else
-#define MCA_PML_BFO_FRAG_HDR_FILL(h)
-#endif  /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
-
-#define MCA_PML_BFO_FRAG_HDR_NTOH(h) \
-    do { \
-        MCA_PML_BFO_COMMON_HDR_NTOH((h).hdr_common); \
-        (h).hdr_frag_offset = ntoh64((h).hdr_frag_offset); \
-    } while (0)
-
-#define MCA_PML_BFO_FRAG_HDR_HTON(h) \
-    do { \
-        MCA_PML_BFO_COMMON_HDR_HTON((h).hdr_common); \
-        MCA_PML_BFO_FRAG_HDR_FILL(h); \
-        (h).hdr_frag_offset = hton64((h).hdr_frag_offset); \
-    } while (0)
-
-/**
- *  Header used to acknowledgment outstanding fragment(s).
- */
-
-struct mca_pml_bfo_ack_hdr_t {
-    mca_pml_bfo_common_hdr_t hdr_common;      /**< common attributes */
-#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
-    uint8_t hdr_padding[6];
-#endif
-    opal_ptr_t hdr_src_req;                   /**< source request */
-    opal_ptr_t hdr_dst_req;                   /**< matched receive request */
-    uint64_t hdr_send_offset;                 /**< starting point of copy in/out */
-};
-typedef struct mca_pml_bfo_ack_hdr_t mca_pml_bfo_ack_hdr_t;
-
-#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
-#define MCA_PML_BFO_ACK_HDR_FILL(h) \
-do {                                \
-    (h).hdr_padding[0] = 0;         \
-    (h).hdr_padding[1] = 0;         \
-    (h).hdr_padding[2] = 0;         \
-    (h).hdr_padding[3] = 0;         \
-    (h).hdr_padding[4] = 0;         \
-    (h).hdr_padding[5] = 0;         \
-} while (0)
-#else
-#define MCA_PML_BFO_ACK_HDR_FILL(h)
-#endif  /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
-
-/* Note that the request headers are not put in NBO because the
-   src_req is already in receiver's byte order and the dst_req is not
-   used by the receiver for anything other than backpointers in return
-   headers */
-#define MCA_PML_BFO_ACK_HDR_NTOH(h) \
-    do { \
-        MCA_PML_BFO_COMMON_HDR_NTOH((h).hdr_common); \
-        (h).hdr_send_offset = ntoh64((h).hdr_send_offset); \
-    } while (0)
-
-#define MCA_PML_BFO_ACK_HDR_HTON(h) \
-    do { \
-        MCA_PML_BFO_COMMON_HDR_HTON((h).hdr_common); \
-        MCA_PML_BFO_ACK_HDR_FILL(h); \
-        (h).hdr_send_offset = hton64((h).hdr_send_offset); \
-    } while (0)
-
-/**
- *  Header used to initiate an RDMA operation.
- */
-
-struct mca_pml_bfo_rdma_hdr_t {
-    mca_pml_bfo_common_hdr_t hdr_common;      /**< common attributes */
-#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
-    uint8_t hdr_padding[2];                   /** two to pad out the hdr to a 4 byte alignment.  hdr_req will then be 8 byte aligned after 4 for hdr_seg_cnt */
-#endif
-    uint32_t hdr_seg_cnt;                     /**< number of segments for rdma */
-    opal_ptr_t hdr_req;                       /**< destination request */
-#if PML_BFO
-    opal_ptr_t hdr_dst_req;                   /**< pointer to destination request */
-#endif /* PML_BFO */
-    opal_ptr_t hdr_des;                       /**< source descriptor */
-    uint64_t hdr_rdma_offset;                 /**< current offset into user buffer */
-    mca_btl_base_segment_t hdr_segs[1];       /**< list of segments for rdma */
-};
-typedef struct mca_pml_bfo_rdma_hdr_t mca_pml_bfo_rdma_hdr_t;
-
-#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
-#define MCA_PML_BFO_RDMA_HDR_FILL(h) \
-do {                                 \
-    (h).hdr_padding[0] = 0;          \
-    (h).hdr_padding[1] = 0;          \
-} while(0)
-#else
-#define MCA_PML_BFO_RDMA_HDR_FILL(h)
-#endif  /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
-
-#define MCA_PML_BFO_RDMA_HDR_NTOH(h) \
-    do { \
-        MCA_PML_BFO_COMMON_HDR_NTOH((h).hdr_common); \
-        (h).hdr_seg_cnt = ntohl((h).hdr_seg_cnt); \
-        (h).hdr_rdma_offset = ntoh64((h).hdr_rdma_offset); \
-    } while (0)
-
-#define MCA_PML_BFO_RDMA_HDR_HTON(h) \
-    do { \
-        MCA_PML_BFO_COMMON_HDR_HTON((h).hdr_common); \
-        MCA_PML_BFO_RDMA_HDR_FILL(h); \
-        (h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \
-        (h).hdr_rdma_offset = hton64((h).hdr_rdma_offset); \
-    } while (0)
-
-/**
- *  Header used to complete an RDMA operation.
- */
-
-struct mca_pml_bfo_fin_hdr_t {
-    mca_pml_bfo_common_hdr_t hdr_common;      /**< common attributes */
-#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
-    uint8_t hdr_padding[2];
-#endif
-#if PML_BFO
-    /* Match info is needed to check for duplicate FIN messages. */
-    mca_pml_bfo_match_hdr_t hdr_match;
-#endif /* PML_BFO */
-    uint32_t hdr_fail;                        /**< RDMA operation failed */
-    opal_ptr_t hdr_des;                       /**< completed descriptor */
-};
-typedef struct mca_pml_bfo_fin_hdr_t mca_pml_bfo_fin_hdr_t;
-
-#if PML_BFO
-#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
-#define MCA_PML_BFO_FIN_HDR_FILL(h) \
-do {                                \
-    (h).hdr_padding[0] = 0;         \
-    (h).hdr_padding[1] = 0;         \
-    MCA_PML_BFO_MATCH_HDR_FILL((h).hdr_match); \
-} while (0)
-#else
-#define MCA_PML_BFO_FIN_HDR_FILL(h)
-#endif  /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
-
-#define MCA_PML_BFO_FIN_HDR_NTOH(h) \
-    do { \
-        MCA_PML_BFO_COMMON_HDR_NTOH((h).hdr_common); \
-        MCA_PML_BFO_MATCH_HDR_NTOH((h).hdr_match); \
-    } while (0)
-
-#define MCA_PML_BFO_FIN_HDR_HTON(h) \
-    do { \
-        MCA_PML_BFO_COMMON_HDR_HTON((h).hdr_common); \
-        MCA_PML_BFO_MATCH_HDR_HTON((h).hdr_match); \
-        MCA_PML_BFO_FIN_HDR_FILL(h); \
-    } while (0)
-#else /* PML_BFO */
-#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
-#define MCA_PML_BFO_FIN_HDR_FILL(h) \
-do {                                \
-    (h).hdr_padding[0] = 0;         \
-    (h).hdr_padding[1] = 0;         \
-} while (0)
-#else
-#define MCA_PML_BFO_FIN_HDR_FILL(h)
-#endif  /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
-
-#define MCA_PML_BFO_FIN_HDR_NTOH(h) \
-    do { \
-        MCA_PML_BFO_COMMON_HDR_NTOH((h).hdr_common); \
-    } while (0)
-
-#define MCA_PML_BFO_FIN_HDR_HTON(h) \
-    do { \
-        MCA_PML_BFO_COMMON_HDR_HTON((h).hdr_common); \
-        MCA_PML_BFO_FIN_HDR_FILL(h); \
-    } while (0)
-#endif /* PML_BFO */
-
-#if PML_BFO
-/**
- *  Header used to restart a rendezvous request.
- */
-struct mca_pml_bfo_restart_hdr_t {
-    mca_pml_bfo_match_hdr_t hdr_match;        /**< needed to avoid duplicate messages */
-    uint8_t hdr_restartseq;                   /**< restart sequence */
-#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
-    uint8_t hdr_padding[3];
-#endif
-    opal_ptr_t hdr_src_req;                   /**< source request */
-    opal_ptr_t hdr_dst_req;                   /**< matched receive request */
-    int32_t  hdr_dst_rank;                    /**< needed to send NACK */
-    uint32_t hdr_jobid;                       /**< needed to send NACK */
-    uint32_t hdr_vpid;                        /**< needed to send NACK */
-};
-typedef struct mca_pml_bfo_restart_hdr_t mca_pml_bfo_restart_hdr_t;
-
-/* Only need to put parts of the restart header in NBO.  No need
-   to do hdr_src_req and hdr_dst_req as they are only used on the
-   by the process that originated them. */
-#define MCA_PML_BFO_RESTART_HDR_NTOH(h) \
-    do { \
-    MCA_PML_BFO_MATCH_HDR_NTOH((h).hdr_match); \
-    (h).hdr_dst_rank = ntohl((h).hdr_dst_rank); \
-    (h).hdr_jobid = ntohl((h).hdr_jobid); \
-    (h).hdr_vpid = ntohl((h).hdr_vpid); \
-    } while (0)
-
-#define MCA_PML_BFO_RESTART_HDR_HTON(h) \
-    do { \
-    MCA_PML_BFO_MATCH_HDR_HTON((h).hdr_match); \
-    (h).hdr_dst_rank = htonl((h).hdr_dst_rank); \
-    (h).hdr_jobid = htonl((h).hdr_jobid); \
-    (h).hdr_vpid = htonl((h).hdr_vpid); \
-    } while (0)
-
-#endif /* PML_BFO */
-/**
- * Union of defined hdr types.
- */
-union mca_pml_bfo_hdr_t {
-    mca_pml_bfo_common_hdr_t hdr_common;
-    mca_pml_bfo_match_hdr_t hdr_match;
-    mca_pml_bfo_rendezvous_hdr_t hdr_rndv;
-    mca_pml_bfo_rget_hdr_t hdr_rget;
-    mca_pml_bfo_frag_hdr_t hdr_frag;
-    mca_pml_bfo_ack_hdr_t hdr_ack;
-    mca_pml_bfo_rdma_hdr_t hdr_rdma;
-    mca_pml_bfo_fin_hdr_t hdr_fin;
-#if PML_BFO
-    mca_pml_bfo_restart_hdr_t hdr_restart;
-#endif /* PML_BFO */
-};
-typedef union mca_pml_bfo_hdr_t mca_pml_bfo_hdr_t;
-
-#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
-static inline __opal_attribute_always_inline__ void
-bfo_hdr_ntoh(mca_pml_bfo_hdr_t *hdr, const uint8_t hdr_type)
-{
-    if(!(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_FLAGS_NBO))
-        return;
-
-    switch(hdr_type) {
-        case MCA_PML_BFO_HDR_TYPE_MATCH:
-            MCA_PML_BFO_MATCH_HDR_NTOH(hdr->hdr_match);
-            break;
-        case MCA_PML_BFO_HDR_TYPE_RNDV:
-            MCA_PML_BFO_RNDV_HDR_NTOH(hdr->hdr_rndv);
-            break;
-        case MCA_PML_BFO_HDR_TYPE_RGET:
-            MCA_PML_BFO_RGET_HDR_NTOH(hdr->hdr_rget);
-            break;
-        case MCA_PML_BFO_HDR_TYPE_ACK:
-            MCA_PML_BFO_ACK_HDR_NTOH(hdr->hdr_ack);
-            break;
-        case MCA_PML_BFO_HDR_TYPE_FRAG:
-            MCA_PML_BFO_FRAG_HDR_NTOH(hdr->hdr_frag);
-            break;
-        case MCA_PML_BFO_HDR_TYPE_PUT:
-            MCA_PML_BFO_RDMA_HDR_NTOH(hdr->hdr_rdma);
-            break;
-        case MCA_PML_BFO_HDR_TYPE_FIN:
-            MCA_PML_BFO_FIN_HDR_NTOH(hdr->hdr_fin);
-            break;
-        default:
-            assert(0);
-            break;
-    }
-}
-#else
-#define bfo_hdr_ntoh(h, t) do{}while(0)
-#endif
-
-#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
-#define bfo_hdr_hton(h, t, p) \
-    bfo_hdr_hton_intr((mca_pml_bfo_hdr_t*)h, t, p)
-static inline __opal_attribute_always_inline__ void
-bfo_hdr_hton_intr(mca_pml_bfo_hdr_t *hdr, const uint8_t hdr_type,
-        const ompi_proc_t *proc)
-{
-#ifdef WORDS_BIGENDIAN
-    hdr->hdr_common.hdr_flags |= MCA_PML_BFO_HDR_FLAGS_NBO;
-#else
-
-    if(!(proc->super.proc_arch & OPAL_ARCH_ISBIGENDIAN))
-        return;
-
-    hdr->hdr_common.hdr_flags |= MCA_PML_BFO_HDR_FLAGS_NBO;
-    switch(hdr_type) {
-        case MCA_PML_BFO_HDR_TYPE_MATCH:
-            MCA_PML_BFO_MATCH_HDR_HTON(hdr->hdr_match);
-            break;
-        case MCA_PML_BFO_HDR_TYPE_RNDV:
-            MCA_PML_BFO_RNDV_HDR_HTON(hdr->hdr_rndv);
-            break;
-        case MCA_PML_BFO_HDR_TYPE_RGET:
-            MCA_PML_BFO_RGET_HDR_HTON(hdr->hdr_rget);
-            break;
-        case MCA_PML_BFO_HDR_TYPE_ACK:
-            MCA_PML_BFO_ACK_HDR_HTON(hdr->hdr_ack);
-            break;
-        case MCA_PML_BFO_HDR_TYPE_FRAG:
-            MCA_PML_BFO_FRAG_HDR_HTON(hdr->hdr_frag);
-            break;
-        case MCA_PML_BFO_HDR_TYPE_PUT:
-            MCA_PML_BFO_RDMA_HDR_HTON(hdr->hdr_rdma);
-            break;
-        case MCA_PML_BFO_HDR_TYPE_FIN:
-            MCA_PML_BFO_FIN_HDR_HTON(hdr->hdr_fin);
-            break;
-        default:
-            assert(0);
-            break;
-    }
-#endif
-}
-#else
-#define bfo_hdr_hton(h, t, p) do{}while(0)
-#endif
-#endif
--- a/ompi/mca/pml/bfo/pml_bfo_iprobe.c
+++ b/ompi/mca/pml/bfo/pml_bfo_iprobe.c
@ -1,171 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2013 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2009-2012 Oracle and/or its affiliates.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-#include "ompi/request/request.h"
-#include "ompi/message/message.h"
-#include "pml_bfo_recvreq.h"
-
-
-int mca_pml_bfo_iprobe(int src,
-                       int tag,
-                       struct ompi_communicator_t *comm,
-                       int *matched, ompi_status_public_t * status)
-{
-    int rc = OMPI_SUCCESS;
-    mca_pml_bfo_recv_request_t recvreq;
-
-    OBJ_CONSTRUCT( &recvreq, mca_pml_bfo_recv_request_t );
-    recvreq.req_recv.req_base.req_ompi.req_type = OMPI_REQUEST_PML;
-    recvreq.req_recv.req_base.req_type = MCA_PML_REQUEST_IPROBE;
-
-    MCA_PML_BFO_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char.dt, src, tag, comm, false);
-    MCA_PML_BFO_RECV_REQUEST_START(&recvreq);
-
-    if( recvreq.req_recv.req_base.req_ompi.req_complete == true ) {
-        if( NULL != status ) {
-            *status = recvreq.req_recv.req_base.req_ompi.req_status;
-        }
-        rc = recvreq.req_recv.req_base.req_ompi.req_status.MPI_ERROR;
-        *matched = 1;
-    } else {
-        *matched = 0;
-        opal_progress();
-    }
-    MCA_PML_BASE_RECV_REQUEST_FINI( &recvreq.req_recv );
-    return rc;
-}
-
-
-int mca_pml_bfo_probe(int src,
-                      int tag,
-                      struct ompi_communicator_t *comm,
-                      ompi_status_public_t * status)
-{
-    int rc = OMPI_SUCCESS;
-    mca_pml_bfo_recv_request_t recvreq;
-
-    OBJ_CONSTRUCT( &recvreq, mca_pml_bfo_recv_request_t );
-    recvreq.req_recv.req_base.req_ompi.req_type = OMPI_REQUEST_PML;
-    recvreq.req_recv.req_base.req_type = MCA_PML_REQUEST_PROBE;
-
-    MCA_PML_BFO_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char.dt, src, tag, comm, false);
-    MCA_PML_BFO_RECV_REQUEST_START(&recvreq);
-
-    ompi_request_wait_completion(&recvreq.req_recv.req_base.req_ompi);
-    rc = recvreq.req_recv.req_base.req_ompi.req_status.MPI_ERROR;
-    if (NULL != status) {
-        *status = recvreq.req_recv.req_base.req_ompi.req_status;
-    }
-
-    MCA_PML_BASE_RECV_REQUEST_FINI( &recvreq.req_recv );
-    return rc;
-}
-
-
-int
-mca_pml_bfo_improbe(int src,
-                    int tag,
-                    struct ompi_communicator_t *comm,
-                    int *matched,
-                    struct ompi_message_t **message,
-                    ompi_status_public_t * status)
-{
-    int rc = OMPI_SUCCESS;
-    mca_pml_bfo_recv_request_t *recvreq;
-
-    *message = ompi_message_alloc();
-    if (NULL == *message) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-
-    MCA_PML_BFO_RECV_REQUEST_ALLOC(recvreq);
-    if (NULL == recvreq)
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_IMPROBE;
-
-    /* initialize the request enough to probe and get the status */
-    MCA_PML_BFO_RECV_REQUEST_INIT(recvreq, NULL, 0, &ompi_mpi_char.dt,
-                                  src, tag, comm, false);
-    MCA_PML_BFO_RECV_REQUEST_START(recvreq);
-
-    if( recvreq->req_recv.req_base.req_ompi.req_complete == true ) {
-        if( NULL != status ) {
-            *status = recvreq->req_recv.req_base.req_ompi.req_status;
-        }
-        *matched = 1;
-
-        (*message)->comm = comm;
-        (*message)->req_ptr = recvreq;
-        (*message)->peer = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE;
-        (*message)->count = recvreq->req_recv.req_base.req_ompi.req_status._ucount;
-
-        rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR;
-    } else {
-        *matched = 0;
-
-        /* we only free if we didn't match, because we're going to
-           translate the request into a receive request later on if it
-           was matched */
-        MCA_PML_BFO_RECV_REQUEST_RETURN( recvreq );
-        ompi_message_return(*message);
-        *message = MPI_MESSAGE_NULL;
-
-        opal_progress();
-    }
-
-    return rc;
-}
-
-
-int
-mca_pml_bfo_mprobe(int src,
-                   int tag,
-                   struct ompi_communicator_t *comm,
-                   struct ompi_message_t **message,
-                   ompi_status_public_t * status)
-{
-    int rc = OMPI_SUCCESS;
-    mca_pml_bfo_recv_request_t *recvreq;
-
-    *message = ompi_message_alloc();
-    if (NULL == *message) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-
-    MCA_PML_BFO_RECV_REQUEST_ALLOC(recvreq);
-    if (NULL == recvreq)
-        return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
-    recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_MPROBE;
-
-    /* initialize the request enough to probe and get the status */
-    MCA_PML_BFO_RECV_REQUEST_INIT(recvreq, NULL, 0, &ompi_mpi_char.dt,
-                                  src, tag, comm, false);
-    MCA_PML_BFO_RECV_REQUEST_START(recvreq);
-
-    ompi_request_wait_completion(&recvreq->req_recv.req_base.req_ompi);
-    rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR;
-
-    if( NULL != status ) {
-        *status = recvreq->req_recv.req_base.req_ompi.req_status;
-    }
-
-    (*message)->comm = comm;
-    (*message)->req_ptr = recvreq;
-    (*message)->peer = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE;
-    (*message)->count = recvreq->req_recv.req_base.req_ompi.req_status._ucount;
-
-    return rc;
-}
--- a/ompi/mca/pml/bfo/pml_bfo_irecv.c
+++ b/ompi/mca/pml/bfo/pml_bfo_irecv.c
@ -1,308 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2013 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2007      Los Alamos National Security, LLC.  All rights
- *                         reserved.
- * Copyright (c) 2010-2012 Oracle and/or its affiliates.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-#include "ompi/request/request.h"
-#include "pml_bfo_recvreq.h"
-#include "pml_bfo_recvfrag.h"
-#include "ompi/peruse/peruse-internal.h"
-#include "ompi/message/message.h"
-
-int mca_pml_bfo_irecv_init(void *addr,
-                           size_t count,
-                           ompi_datatype_t * datatype,
-                           int src,
-                           int tag,
-                           struct ompi_communicator_t *comm,
-                           struct ompi_request_t **request)
-{
-    mca_pml_bfo_recv_request_t *recvreq;
-    MCA_PML_BFO_RECV_REQUEST_ALLOC(recvreq);
-    if (NULL == recvreq)
-        return OMPI_ERR_OUT_OF_RESOURCE;
-
-    MCA_PML_BFO_RECV_REQUEST_INIT(recvreq,
-                                   addr,
-                                   count, datatype, src, tag, comm, true);
-
-    PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
-                             &((recvreq)->req_recv.req_base),
-                             PERUSE_RECV);
-
-    *request = (ompi_request_t *) recvreq;
-    return OMPI_SUCCESS;
-}
-
-int mca_pml_bfo_irecv(void *addr,
-                      size_t count,
-                      ompi_datatype_t * datatype,
-                      int src,
-                      int tag,
-                      struct ompi_communicator_t *comm,
-                      struct ompi_request_t **request)
-{
-    mca_pml_bfo_recv_request_t *recvreq;
-    MCA_PML_BFO_RECV_REQUEST_ALLOC(recvreq);
-    if (NULL == recvreq)
-        return OMPI_ERR_OUT_OF_RESOURCE;
-
-    MCA_PML_BFO_RECV_REQUEST_INIT(recvreq,
-                                   addr,
-                                   count, datatype, src, tag, comm, false);
-
-    PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
-                             &((recvreq)->req_recv.req_base),
-                             PERUSE_RECV);
-
-    MCA_PML_BFO_RECV_REQUEST_START(recvreq);
-    *request = (ompi_request_t *) recvreq;
-    return OMPI_SUCCESS;
-}
-
-
-int mca_pml_bfo_recv(void *addr,
-                     size_t count,
-                     ompi_datatype_t * datatype,
-                     int src,
-                     int tag,
-                     struct ompi_communicator_t *comm,
-                     ompi_status_public_t * status)
-{
-    int rc;
-    mca_pml_bfo_recv_request_t *recvreq;
-    MCA_PML_BFO_RECV_REQUEST_ALLOC(recvreq);
-    if (NULL == recvreq)
-        return OMPI_ERR_OUT_OF_RESOURCE;
-
-    MCA_PML_BFO_RECV_REQUEST_INIT(recvreq,
-                                   addr,
-                                   count, datatype, src, tag, comm, false);
-
-    PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
-                             &((recvreq)->req_recv.req_base),
-                             PERUSE_RECV);
-
-    MCA_PML_BFO_RECV_REQUEST_START(recvreq);
-    ompi_request_wait_completion(&recvreq->req_recv.req_base.req_ompi);
-
-    if (NULL != status) {  /* return status */
-        *status = recvreq->req_recv.req_base.req_ompi.req_status;
-    }
-    rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR;
-    ompi_request_free( (ompi_request_t**)&recvreq );
-    return rc;
-}
-
-
-int
-mca_pml_bfo_imrecv( void *buf,
-                    size_t count,
-                    ompi_datatype_t *datatype,
-                    struct ompi_message_t **message,
-                    struct ompi_request_t **request )
-{
-    mca_pml_bfo_recv_frag_t* frag;
-    mca_pml_bfo_recv_request_t *recvreq;
-    mca_pml_bfo_hdr_t *hdr;
-    int src, tag;
-    ompi_communicator_t *comm;
-    mca_pml_bfo_comm_proc_t* proc;
-    mca_pml_bfo_comm_t* bfo_comm;
-    uint64_t seq;
-
-    /* get the request from the message and the frag from the request
-       before we overwrite everything */
-    recvreq = (mca_pml_bfo_recv_request_t*) (*message)->req_ptr;
-    frag = (mca_pml_bfo_recv_frag_t*) recvreq->req_recv.req_base.req_addr;
-    src = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE;
-    tag = recvreq->req_recv.req_base.req_ompi.req_status.MPI_TAG;
-    comm = (*message)->comm;
-    bfo_comm = recvreq->req_recv.req_base.req_comm->c_pml_comm;
-    seq = recvreq->req_recv.req_base.req_sequence;
-
-    /* make the request a recv request again */
-    /* The old request kept pointers to comm and the char datatype.
-       We're about to release those, but need to make sure comm
-       doesn't go out of scope (we don't care about the char datatype
-       anymore).  So retain comm, then release the frag, then reinit
-       the frag (which will retain comm), then release comm (but the
-       frag still has it's ref, so it'll stay in scope).  Make
-       sense? */
-    OBJ_RETAIN(comm);
-    MCA_PML_BASE_RECV_REQUEST_FINI(&recvreq->req_recv);
-    recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_RECV;
-    MCA_PML_BFO_RECV_REQUEST_INIT(recvreq,
-                                  buf,
-                                  count, datatype,
-                                  src, tag, comm, false);
-    OBJ_RELEASE(comm);
-
-    PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
-                             &((recvreq)->req_recv.req_base),
-                             PERUSE_RECV);
-
-    /* init/re-init the request */
-    recvreq->req_lock = 0;
-    recvreq->req_pipeline_depth  = 0;
-    recvreq->req_bytes_received  = 0;
-    /* What about req_rdma_cnt ? */
-    recvreq->req_rdma_idx = 0;
-    recvreq->req_pending = false;
-    recvreq->req_ack_sent = false;
-
-    MCA_PML_BASE_RECV_START(&recvreq->req_recv.req_base);
-
-    /* Note - sequence number already assigned */
-    recvreq->req_recv.req_base.req_sequence = seq;
-
-    proc = &bfo_comm->procs[recvreq->req_recv.req_base.req_peer];
-    recvreq->req_recv.req_base.req_proc = proc->ompi_proc;
-    prepare_recv_req_converter(recvreq);
-
-    /* we can't go through the match, since we already have the match.
-       Cheat and do what REQUEST_START does, but without the frag
-       search */
-    hdr = (mca_pml_bfo_hdr_t*)frag->segments->seg_addr.pval;
-    switch(hdr->hdr_common.hdr_type) {
-    case MCA_PML_BFO_HDR_TYPE_MATCH:
-        mca_pml_bfo_recv_request_progress_match(recvreq, frag->btl, frag->segments,
-                                                frag->num_segments);
-        break;
-    case MCA_PML_BFO_HDR_TYPE_RNDV:
-        mca_pml_bfo_recv_request_progress_rndv(recvreq, frag->btl, frag->segments,
-                                               frag->num_segments);
-        break;
-    case MCA_PML_BFO_HDR_TYPE_RGET:
-        mca_pml_bfo_recv_request_progress_rget(recvreq, frag->btl, frag->segments,
-                                               frag->num_segments);
-        break;
-    default:
-        assert(0);
-    }
-    MCA_PML_BFO_RECV_FRAG_RETURN(frag);
-
-    ompi_message_return(*message);
-    *message = MPI_MESSAGE_NULL;
-    *request = (ompi_request_t *) recvreq;
-
-    return OMPI_SUCCESS;
-}
-
-
-int
-mca_pml_bfo_mrecv( void *buf,
-                   size_t count,
-                   ompi_datatype_t *datatype,
-                   struct ompi_message_t **message,
-                   ompi_status_public_t* status )
-{
-    mca_pml_bfo_recv_frag_t* frag;
-    mca_pml_bfo_recv_request_t *recvreq;
-    mca_pml_bfo_hdr_t *hdr;
-    int src, tag, rc;
-    ompi_communicator_t *comm;
-    mca_pml_bfo_comm_proc_t* proc;
-    mca_pml_bfo_comm_t* bfo_comm;
-    uint64_t seq;
-
-    /* get the request from the message and the frag from the request
-       before we overwrite everything */
-    comm = (*message)->comm;
-    recvreq = (mca_pml_bfo_recv_request_t*) (*message)->req_ptr;
-    frag = (mca_pml_bfo_recv_frag_t*) recvreq->req_recv.req_base.req_addr;
-    src = recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE;
-    tag = recvreq->req_recv.req_base.req_ompi.req_status.MPI_TAG;
-    seq = recvreq->req_recv.req_base.req_sequence;
-    bfo_comm = recvreq->req_recv.req_base.req_comm->c_pml_comm;
-
-    /* make the request a recv request again */
-    /* The old request kept pointers to comm and the char datatype.
-       We're about to release those, but need to make sure comm
-       doesn't go out of scope (we don't care about the char datatype
-       anymore).  So retain comm, then release the frag, then reinit
-       the frag (which will retain comm), then release comm (but the
-       frag still has it's ref, so it'll stay in scope).  Make
-       sense? */
-    OBJ_RETAIN(comm);
-    MCA_PML_BASE_RECV_REQUEST_FINI(&recvreq->req_recv);
-    recvreq->req_recv.req_base.req_type = MCA_PML_REQUEST_RECV;
-    MCA_PML_BFO_RECV_REQUEST_INIT(recvreq,
-                                  buf,
-                                  count, datatype,
-                                  src, tag, comm, false);
-    OBJ_RELEASE(comm);
-
-    PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
-                             &((recvreq)->req_recv.req_base),
-                             PERUSE_RECV);
-
-    /* init/re-init the request */
-    recvreq->req_lock = 0;
-    recvreq->req_pipeline_depth  = 0;
-    recvreq->req_bytes_received  = 0;
-    recvreq->req_rdma_cnt = 0;
-    recvreq->req_rdma_idx = 0;
-    recvreq->req_pending = false;
-
-    MCA_PML_BASE_RECV_START(&recvreq->req_recv.req_base);
-
-    /* Note - sequence number already assigned */
-    recvreq->req_recv.req_base.req_sequence = seq;
-
-    proc = &bfo_comm->procs[recvreq->req_recv.req_base.req_peer];
-    recvreq->req_recv.req_base.req_proc = proc->ompi_proc;
-    prepare_recv_req_converter(recvreq);
-
-    /* we can't go through the match, since we already have the match.
-       Cheat and do what REQUEST_START does, but without the frag
-       search */
-    hdr = (mca_pml_bfo_hdr_t*)frag->segments->seg_addr.pval;
-    switch(hdr->hdr_common.hdr_type) {
-    case MCA_PML_BFO_HDR_TYPE_MATCH:
-        mca_pml_bfo_recv_request_progress_match(recvreq, frag->btl, frag->segments,
-                                                frag->num_segments);
-        break;
-    case MCA_PML_BFO_HDR_TYPE_RNDV:
-        mca_pml_bfo_recv_request_progress_rndv(recvreq, frag->btl, frag->segments,
-                                               frag->num_segments);
-        break;
-    case MCA_PML_BFO_HDR_TYPE_RGET:
-        mca_pml_bfo_recv_request_progress_rget(recvreq, frag->btl, frag->segments,
-                                               frag->num_segments);
-        break;
-    default:
-        assert(0);
-    }
-
-    ompi_message_return(*message);
-    *message = MPI_MESSAGE_NULL;
-    ompi_request_wait_completion(&(recvreq->req_recv.req_base.req_ompi));
-
-    MCA_PML_BFO_RECV_FRAG_RETURN(frag);
-
-    if (NULL != status) {  /* return status */
-        *status = recvreq->req_recv.req_base.req_ompi.req_status;
-    }
-    rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR;
-    ompi_request_free( (ompi_request_t**)&recvreq );
-    return rc;
-}
-
--- a/ompi/mca/pml/bfo/pml_bfo_isend.c
+++ b/ompi/mca/pml/bfo/pml_bfo_isend.c
@ -1,129 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2013 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2007      Los Alamos National Security, LLC.  All rights
- *                         reserved.
- * Copyright (c) 2010      Oracle and/or its affiliates.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "pml_bfo.h"
-#include "pml_bfo_sendreq.h"
-#include "pml_bfo_recvreq.h"
-#include "ompi/peruse/peruse-internal.h"
-
-int mca_pml_bfo_isend_init(void *buf,
-                           size_t count,
-                           ompi_datatype_t * datatype,
-                           int dst,
-                           int tag,
-                           mca_pml_base_send_mode_t sendmode,
-                           ompi_communicator_t * comm,
-                           ompi_request_t ** request)
-{
-    mca_pml_bfo_send_request_t *sendreq = NULL;
-    MCA_PML_BFO_SEND_REQUEST_ALLOC(comm, dst, sendreq);
-    if (NULL == sendreq)
-        return OMPI_ERR_OUT_OF_RESOURCE;
-
-    MCA_PML_BFO_SEND_REQUEST_INIT(sendreq,
-                                  buf,
-                                  count,
-                                  datatype,
-                                  dst, tag,
-                                  comm, sendmode, true);
-
-    PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
-                             &(sendreq)->req_send.req_base,
-                             PERUSE_SEND);
-
-    *request = (ompi_request_t *) sendreq;
-    return OMPI_SUCCESS;
-}
-
-
-int mca_pml_bfo_isend(void *buf,
-                      size_t count,
-                      ompi_datatype_t * datatype,
-                      int dst,
-                      int tag,
-                      mca_pml_base_send_mode_t sendmode,
-                      ompi_communicator_t * comm,
-                      ompi_request_t ** request)
-{
-    int rc;
-    mca_pml_bfo_send_request_t *sendreq = NULL;
-
-    MCA_PML_BFO_SEND_REQUEST_ALLOC(comm, dst, sendreq);
-    if (NULL == sendreq)
-        return OMPI_ERR_OUT_OF_RESOURCE;
-
-    MCA_PML_BFO_SEND_REQUEST_INIT(sendreq,
-                                  buf,
-                                  count,
-                                  datatype,
-                                  dst, tag,
-                                  comm, sendmode, false);
-
-    PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
-                             &(sendreq)->req_send.req_base,
-                             PERUSE_SEND);
-
-    MCA_PML_BFO_SEND_REQUEST_START(sendreq, rc);
-    *request = (ompi_request_t *) sendreq;
-    return rc;
-}
-
-
-int mca_pml_bfo_send(void *buf,
-                     size_t count,
-                     ompi_datatype_t * datatype,
-                     int dst,
-                     int tag,
-                     mca_pml_base_send_mode_t sendmode,
-                     ompi_communicator_t * comm)
-{
-    int rc;
-    mca_pml_bfo_send_request_t *sendreq;
-
-    MCA_PML_BFO_SEND_REQUEST_ALLOC(comm, dst, sendreq);
-    if (NULL == sendreq)
-        return OMPI_ERR_OUT_OF_RESOURCE;
-
-    MCA_PML_BFO_SEND_REQUEST_INIT(sendreq,
-                                  buf,
-                                  count,
-                                  datatype,
-                                  dst, tag,
-                                  comm, sendmode, false);
-
-    PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
-                             &(sendreq)->req_send.req_base,
-                             PERUSE_SEND);
-
-    MCA_PML_BFO_SEND_REQUEST_START(sendreq, rc);
-    if (rc != OMPI_SUCCESS) {
-        MCA_PML_BFO_SEND_REQUEST_RETURN( sendreq );
-        return rc;
-    }
-
-    ompi_request_wait_completion(&sendreq->req_send.req_base.req_ompi);
-
-    rc = sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR;
-    ompi_request_free( (ompi_request_t**)&sendreq );
-    return rc;
-}
--- a/ompi/mca/pml/bfo/pml_bfo_progress.c
+++ b/ompi/mca/pml/bfo/pml_bfo_progress.c
@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2008 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2010      Oracle and/or its affiliates.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "pml_bfo.h"
-#include "pml_bfo_sendreq.h"
-#include "ompi/mca/bml/base/base.h"
-
-int mca_pml_bfo_progress(void)
-{
-    int i, queue_length = opal_list_get_size(&mca_pml_bfo.send_pending);
-    int j, completed_requests = 0;
-    bool send_succedded;
-
-    if( OPAL_LIKELY(0 == queue_length) )
-        return 0;
-
-    for( i = 0; i < queue_length; i++ ) {
-        mca_pml_bfo_send_pending_t pending_type = MCA_PML_BFO_SEND_PENDING_NONE;
-        mca_pml_bfo_send_request_t* sendreq;
-        mca_bml_base_endpoint_t* endpoint;
-
-        sendreq = get_request_from_send_pending(&pending_type);
-        if(OPAL_UNLIKELY(NULL == sendreq))
-            break;
-
-        switch(pending_type) {
-        case MCA_PML_BFO_SEND_PENDING_NONE:
-            assert(0);
-            return 0;
-        case MCA_PML_BFO_SEND_PENDING_SCHEDULE:
-            if( mca_pml_bfo_send_request_schedule_exclusive(sendreq) ==
-                OMPI_ERR_OUT_OF_RESOURCE ) {
-                return 0;
-            }
-            completed_requests++;
-            break;
-        case MCA_PML_BFO_SEND_PENDING_START:
-            endpoint = sendreq->req_endpoint;
-            send_succedded = false;
-            for(j = 0; j < (int)mca_bml_base_btl_array_get_size(&endpoint->btl_eager); j++) {
-                mca_bml_base_btl_t* bml_btl;
-                int rc;
-
-                /* select a btl */
-                bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
-                rc = mca_pml_bfo_send_request_start_btl(sendreq, bml_btl);
-                if( OPAL_LIKELY(OMPI_SUCCESS == rc) ) {
-                    send_succedded = true;
-                    completed_requests++;
-                    break;
-                }
-            }
-            if( false == send_succedded ) {
-                add_request_to_send_pending(sendreq, MCA_PML_BFO_SEND_PENDING_START, true);
-            }
-        }
-    }
-    return completed_requests;
-}
-
--- a/ompi/mca/pml/bfo/pml_bfo_rdma.c
+++ b/ompi/mca/pml/bfo/pml_bfo_rdma.c
@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2006 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2010      Oracle and/or its affiliates.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-
-/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
-
-#include "ompi_config.h"
-#include "ompi/constants.h"
-#include "ompi/mca/pml/pml.h"
-#include "ompi/mca/bml/bml.h"
-#include "opal/mca/mpool/mpool.h"
-#include "pml_bfo.h"
-#include "pml_bfo_rdma.h"
-
-/* Use this registration if no registration needed for a BTL instead of NULL.
- * This will help other code to distinguish case when memory is not registered
- * from case when registration is not needed */
-static mca_mpool_base_registration_t pml_bfo_dummy_reg;
-
-/*
- * Check to see if memory is registered or can be registered. Build a
- * set of registrations on the request.
- */
-
-size_t mca_pml_bfo_rdma_btls(
-    mca_bml_base_endpoint_t* bml_endpoint,
-    unsigned char* base,
-    size_t size,
-    mca_pml_bfo_com_btl_t* rdma_btls)
-{
-    int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
-    double weight_total = 0;
-    int num_btls_used = 0, n;
-
-    /* shortcut when there are no rdma capable btls */
-    if(num_btls == 0) {
-        return 0;
-    }
-
-    /* check to see if memory is registered */
-    for(n = 0; n < num_btls && num_btls_used < mca_pml_bfo.max_rdma_per_request;
-            n++) {
-        mca_bml_base_btl_t* bml_btl =
-            mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma,
-                    (bml_endpoint->btl_rdma_index + n) % num_btls);
-        mca_mpool_base_registration_t* reg = &pml_bfo_dummy_reg;
-        mca_mpool_base_module_t *btl_mpool = bml_btl->btl->btl_mpool;
-
-        if( NULL != btl_mpool ) {
-            if(!mca_pml_bfo.leave_pinned) {
-                /* look through existing registrations */
-                btl_mpool->mpool_find(btl_mpool, base, size, &reg);
-            } else {
-                /* register the memory */
-                btl_mpool->mpool_register(btl_mpool, base, size, 0, &reg);
-            }
-
-            if(NULL == reg)
-                continue;
-        }
-
-        rdma_btls[num_btls_used].bml_btl = bml_btl;
-        rdma_btls[num_btls_used].btl_reg = reg;
-        weight_total += bml_btl->btl_weight;
-        num_btls_used++;
-    }
-
-    /* if we don't use leave_pinned and all BTLs that already have this memory
-     * registered amount to less then half of available bandwidth - fall back to
-     * pipeline protocol */
-    if(0 == num_btls_used || (!mca_pml_bfo.leave_pinned && weight_total < 0.5))
-        return 0;
-
-    mca_pml_bfo_calc_weighted_length(rdma_btls, num_btls_used, size,
-                                     weight_total);
-
-    bml_endpoint->btl_rdma_index = (bml_endpoint->btl_rdma_index + 1) % num_btls;
-    return num_btls_used;
-}
-
-size_t mca_pml_bfo_rdma_pipeline_btls( mca_bml_base_endpoint_t* bml_endpoint,
-                                       size_t size,
-                                       mca_pml_bfo_com_btl_t* rdma_btls )
-{
-    int i, num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
-    double weight_total = 0;
-
-    for(i = 0; i < num_btls && i < mca_pml_bfo.max_rdma_per_request; i++) {
-        rdma_btls[i].bml_btl =
-            mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma);
-        if(NULL != rdma_btls[i].bml_btl->btl->btl_mpool)
-            rdma_btls[i].btl_reg = NULL;
-        else
-            rdma_btls[i].btl_reg = &pml_bfo_dummy_reg;
-
-        weight_total += rdma_btls[i].bml_btl->btl_weight;
-    }
-
-    mca_pml_bfo_calc_weighted_length(rdma_btls, i, size, weight_total);
-
-    return i;
-}
--- a/ompi/mca/pml/bfo/pml_bfo_rdma.h
+++ b/ompi/mca/pml/bfo/pml_bfo_rdma.h
@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2005 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2010      Oracle and/or its affiliates.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/**
- *  @file
- */
-
-#ifndef MCA_PML_BFO_RDMA_H
-#define MCA_PML_BFO_RDMA_H
-
-struct mca_bml_base_endpoint_t;
-
-/*
- * Of the set of available btls that support RDMA,
- * find those that already have registrations - or
- * register if required (for leave_pinned option)
- */
-size_t mca_pml_bfo_rdma_btls(struct mca_bml_base_endpoint_t* endpoint,
-    unsigned char* base, size_t size, struct mca_pml_bfo_com_btl_t* btls);
-
-/* Choose RDMA BTLs to use for sending of a request by pipeline protocol.
- * Calculate number of bytes to send through each BTL according to available
- * bandwidth */
-size_t mca_pml_bfo_rdma_pipeline_btls(struct mca_bml_base_endpoint_t* endpoint,
-                size_t size, mca_pml_bfo_com_btl_t* rdma_btls);
-#endif
-
--- a/ompi/mca/pml/bfo/pml_bfo_rdmafrag.c
+++ b/ompi/mca/pml/bfo/pml_bfo_rdmafrag.c
@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2005 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2010      Oracle and/or its affiliates.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "pml_bfo.h"
-#include "pml_bfo_rdmafrag.h"
-
-
-OBJ_CLASS_INSTANCE(
-    mca_pml_bfo_rdma_frag_t,
-    ompi_free_list_item_t,
-    NULL,
-    NULL);
--- a/ompi/mca/pml/bfo/pml_bfo_rdmafrag.h
+++ b/ompi/mca/pml/bfo/pml_bfo_rdmafrag.h
@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2013 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2010      Oracle and/or its affiliates.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/**
- *  @file
- */
-
-#ifndef MCA_PML_BFO_RDMAFRAG_H
-#define MCA_PML_BFO_RDMAFRAG_H
-
-#include "pml_bfo_hdr.h"
-#include "opal/mca/mpool/base/base.h"
-
-BEGIN_C_DECLS
-
-typedef enum {
-    MCA_PML_BFO_RDMA_PUT,
-    MCA_PML_BFO_RDMA_GET
-} mca_pml_bfo_rdma_state_t;
-
-struct mca_pml_bfo_rdma_frag_t {
-    opal_free_list_item_t super;
-    mca_bml_base_btl_t* rdma_bml;
-#if PML_BFO
-    mca_btl_base_module_t* rdma_btl;
-#endif /* PML_BFO */
-    mca_pml_bfo_hdr_t rdma_hdr;
-    mca_pml_bfo_rdma_state_t rdma_state;
-    size_t rdma_length;
-    uint8_t rdma_segs[MCA_BTL_SEG_MAX_SIZE * MCA_BTL_DES_MAX_SEGMENTS];
-    void *rdma_req;
-    struct mca_bml_base_endpoint_t* rdma_ep;
-    opal_convertor_t convertor;
-    struct mca_mpool_base_registration_t* reg;
-    uint32_t retries;
-};
-typedef struct mca_pml_bfo_rdma_frag_t mca_pml_bfo_rdma_frag_t;
-
-OBJ_CLASS_DECLARATION(mca_pml_bfo_rdma_frag_t);
-
-
-#define MCA_PML_BFO_RDMA_FRAG_ALLOC(frag)                       \
-do {                                                            \
-    opal_free_list_item_t* item;                                \
-    OPAL_FREE_LIST_WAIT_MT(&mca_pml_bfo.rdma_frags, item);      \
-    frag = (mca_pml_bfo_rdma_frag_t*)item;                      \
-} while(0)
-
-#define MCA_PML_BFO_RDMA_FRAG_RETURN(frag)                      \
-do {                                                            \
-    /* return fragment */                                       \
-    OPAL_FREE_LIST_RETURN_MT(&mca_pml_bfo.rdma_frags,           \
-        (opal_free_list_item_t*)frag);                          \
-} while(0)
-
-
-END_C_DECLS
-
-#endif
-
--- a/ompi/mca/pml/bfo/pml_bfo_recvfrag.c
+++ b/ompi/mca/pml/bfo/pml_bfo_recvfrag.c
@ -1,743 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2013 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2008      UT-Battelle, LLC. All rights reserved.
- * Copyright (c) 2006-2008 University of Houston.  All rights reserved.
- * Copyright (c) 2009-2012 Oracle and/or its affiliates.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-/**
- * @file
- */
-
-#include "ompi_config.h"
-
-#include "opal/class/opal_list.h"
-#include "opal/threads/mutex.h"
-#include "opal/prefetch.h"
-
-#include "ompi/constants.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/pml/pml.h"
-#include "ompi/peruse/peruse-internal.h"
-#include "ompi/memchecker.h"
-
-#include "pml_bfo.h"
-#include "pml_bfo_comm.h"
-#include "pml_bfo_recvfrag.h"
-#include "pml_bfo_recvreq.h"
-#include "pml_bfo_sendreq.h"
-#include "pml_bfo_hdr.h"
-#if PML_BFO
-#include "pml_bfo_failover.h"
-#endif /* PML_BFO */
-
-OBJ_CLASS_INSTANCE( mca_pml_bfo_buffer_t,
-                    ompi_free_list_item_t,
-                    NULL,
-                    NULL );
-
-OBJ_CLASS_INSTANCE( mca_pml_bfo_recv_frag_t,
-                    opal_list_item_t,
-                    NULL,
-                    NULL );
-
-/**
- * Static functions.
- */
-
-/**
- * Append a unexpected descriptor to a queue. This function will allocate and
- * initialize the fragment (if necessary) and then will add it to the specified
- * queue. The allocated fragment is not returned to the caller.
- */
-static void
-append_frag_to_list(opal_list_t *queue, mca_btl_base_module_t *btl,
-                    mca_pml_bfo_match_hdr_t *hdr, mca_btl_base_segment_t* segments,
-                    size_t num_segments, mca_pml_bfo_recv_frag_t* frag)
-{
-    if(NULL == frag) {
-        MCA_PML_BFO_RECV_FRAG_ALLOC(frag);
-        MCA_PML_BFO_RECV_FRAG_INIT(frag, hdr, segments, num_segments, btl);
-    }
-    opal_list_append(queue, (opal_list_item_t*)frag);
-}
-
-/**
- * Match incoming recv_frags against posted receives.
- * Supports out of order delivery.
- *
- * @param frag_header (IN)          Header of received recv_frag.
- * @param frag_desc (IN)            Received recv_frag descriptor.
- * @param match_made (OUT)          Flag indicating wether a match was made.
- * @param additional_matches (OUT)  List of additional matches
- * @return                          OMPI_SUCCESS or error status on failure.
- */
-static int mca_pml_bfo_recv_frag_match( mca_btl_base_module_t *btl,
-                                        mca_pml_bfo_match_hdr_t *hdr,
-                                        mca_btl_base_segment_t* segments,
-                                        size_t num_segments,
-                                        int type);
-
-static mca_pml_bfo_recv_request_t*
-match_one(mca_btl_base_module_t *btl,
-          mca_pml_bfo_match_hdr_t *hdr, mca_btl_base_segment_t* segments,
-          size_t num_segments, ompi_communicator_t *comm_ptr,
-          mca_pml_bfo_comm_proc_t *proc,
-          mca_pml_bfo_recv_frag_t* frag);
-
-void mca_pml_bfo_recv_frag_callback_match(mca_btl_base_module_t* btl,
-                                          mca_btl_base_tag_t tag,
-                                          mca_btl_base_descriptor_t* des,
-                                          void* cbdata )
-{
-    mca_btl_base_segment_t* segments = des->des_local;
-    mca_pml_bfo_match_hdr_t* hdr = (mca_pml_bfo_match_hdr_t*)segments->seg_addr.pval;
-    ompi_communicator_t *comm_ptr;
-    mca_pml_bfo_recv_request_t *match = NULL;
-    mca_pml_bfo_comm_t *comm;
-    mca_pml_bfo_comm_proc_t *proc;
-    size_t num_segments = des->des_local_count;
-    size_t bytes_received = 0;
-
-    assert(num_segments <= MCA_BTL_DES_MAX_SEGMENTS);
-
-    if( OPAL_UNLIKELY(segments->seg_len < OMPI_PML_BFO_MATCH_HDR_LEN) ) {
-        return;
-    }
-    bfo_hdr_ntoh(((mca_pml_bfo_hdr_t*) hdr), MCA_PML_BFO_HDR_TYPE_MATCH);
-
-    /* communicator pointer */
-    comm_ptr = ompi_comm_lookup(hdr->hdr_ctx);
-    if(OPAL_UNLIKELY(NULL == comm_ptr)) {
-        /* This is a special case. A message for a not yet existing
-         * communicator can happens. Instead of doing a matching we
-         * will temporarily add it the a pending queue in the PML.
-         * Later on, when the communicator is completely instantiated,
-         * this pending queue will be searched and all matching fragments
-         * moved to the right communicator.
-         */
-        append_frag_to_list( &mca_pml_bfo.non_existing_communicator_pending,
-                             btl, hdr, segments, num_segments, NULL );
-        return;
-    }
-    comm = (mca_pml_bfo_comm_t *)comm_ptr->c_pml_comm;
-
-    /* source sequence number */
-    proc = &comm->procs[hdr->hdr_src];
-
-    /* We generate the MSG_ARRIVED event as soon as the PML is aware
-     * of a matching fragment arrival. Independing if it is received
-     * on the correct order or not. This will allow the tools to
-     * figure out if the messages are not received in the correct
-     * order (if multiple network interfaces).
-     */
-    PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm_ptr,
-                           hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
-
-    /* get next expected message sequence number - if threaded
-     * run, lock to make sure that if another thread is processing
-     * a frag from the same message a match is made only once.
-     * Also, this prevents other posted receives (for a pair of
-     * end points) from being processed, and potentially "loosing"
-     * the fragment.
-     */
-    OPAL_THREAD_LOCK(&comm->matching_lock);
-
-     /* get sequence number of next message that can be processed */
-    if(OPAL_UNLIKELY((((uint16_t) hdr->hdr_seq) != ((uint16_t) proc->expected_sequence)) ||
-                     (opal_list_get_size(&proc->frags_cant_match) > 0 ))) {
-        goto slow_path;
-    }
-
-    /* This is the sequence number we were expecting, so we can try
-     * matching it to already posted receives.
-     */
-
-    /* We're now expecting the next sequence number. */
-    proc->expected_sequence++;
-
-    /* We generate the SEARCH_POSTED_QUEUE only when the message is
-     * received in the correct sequence. Otherwise, we delay the event
-     * generation until we reach the correct sequence number.
-     */
-    PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_BEGIN, comm_ptr,
-                            hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
-
-    match = match_one(btl, hdr, segments, num_segments, comm_ptr, proc, NULL);
-
-    /* The match is over. We generate the SEARCH_POSTED_Q_END here,
-     * before going into the mca_pml_bfo_check_cantmatch_for_match so
-     * we can make a difference for the searching time for all
-     * messages.
-     */
-    PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr,
-                           hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
-
-    /* release matching lock before processing fragment */
-    OPAL_THREAD_UNLOCK(&comm->matching_lock);
-
-    if(OPAL_LIKELY(match)) {
-        bytes_received = segments->seg_len - OMPI_PML_BFO_MATCH_HDR_LEN;
-        match->req_recv.req_bytes_packed = bytes_received;
-
-        MCA_PML_BFO_RECV_REQUEST_MATCHED(match, hdr);
-        if(match->req_bytes_expected > 0) {
-            struct iovec iov[MCA_BTL_DES_MAX_SEGMENTS];
-            uint32_t iov_count = 1;
-
-            /*
-             *  Make user buffer accessable(defined) before unpacking.
-             */
-            MEMCHECKER(
-                       memchecker_call(&opal_memchecker_base_mem_defined,
-                                       match->req_recv.req_base.req_addr,
-                                       match->req_recv.req_base.req_count,
-                                       match->req_recv.req_base.req_datatype);
-                       );
-
-            iov[0].iov_len = bytes_received;
-            iov[0].iov_base = (IOVBASE_TYPE*)((unsigned char*)segments->seg_addr.pval +
-                                              OMPI_PML_BFO_MATCH_HDR_LEN);
-            while (iov_count < num_segments) {
-                bytes_received += segments[iov_count].seg_len;
-                iov[iov_count].iov_len = segments[iov_count].seg_len;
-                iov[iov_count].iov_base = (IOVBASE_TYPE*)((unsigned char*)segments[iov_count].seg_addr.pval);
-                iov_count++;
-            }
-            opal_convertor_unpack( &match->req_recv.req_base.req_convertor,
-                                   iov,
-                                   &iov_count,
-                                   &bytes_received );
-            match->req_bytes_received = bytes_received;
-            /*
-             *  Unpacking finished, make the user buffer unaccessable again.
-             */
-            MEMCHECKER(
-                       memchecker_call(&opal_memchecker_base_mem_noaccess,
-                                       match->req_recv.req_base.req_addr,
-                                       match->req_recv.req_base.req_count,
-                                       match->req_recv.req_base.req_datatype);
-                       );
-        }
-
-        /* no need to check if complete we know we are.. */
-        /*  don't need a rmb as that is for checking */
-        recv_request_pml_complete(match);
-    }
-    return;
-
- slow_path:
-    OPAL_THREAD_UNLOCK(&comm->matching_lock);
-#if PML_BFO
-    if (true == mca_pml_bfo_is_duplicate_msg(proc, hdr)) {
-        return;
-    }
-#endif /* PML_BFO */
-    mca_pml_bfo_recv_frag_match(btl, hdr, segments,
-                                num_segments, MCA_PML_BFO_HDR_TYPE_MATCH);
-}
-
-
-void mca_pml_bfo_recv_frag_callback_rndv(mca_btl_base_module_t* btl,
-                                         mca_btl_base_tag_t tag,
-                                         mca_btl_base_descriptor_t* des,
-                                         void* cbdata )
-{
-    mca_btl_base_segment_t* segments = des->des_local;
-    mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval;
-
-    if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_bfo_common_hdr_t)) ) {
-        return;
-    }
-    bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_RNDV);
-    mca_pml_bfo_recv_frag_match(btl, &hdr->hdr_match, segments,
-                                des->des_local_count, MCA_PML_BFO_HDR_TYPE_RNDV);
-    return;
-}
-
-void mca_pml_bfo_recv_frag_callback_rget(mca_btl_base_module_t* btl,
-                                         mca_btl_base_tag_t tag,
-                                         mca_btl_base_descriptor_t* des,
-                                         void* cbdata )
-{
-    mca_btl_base_segment_t* segments = des->des_local;
-    mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval;
-
-    if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_bfo_common_hdr_t)) ) {
-        return;
-    }
-    bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_RGET);
-    mca_pml_bfo_recv_frag_match(btl, &hdr->hdr_match, segments,
-                                des->des_local_count, MCA_PML_BFO_HDR_TYPE_RGET);
-    return;
-}
-
-
-
-void mca_pml_bfo_recv_frag_callback_ack(mca_btl_base_module_t* btl,
-                                        mca_btl_base_tag_t tag,
-                                        mca_btl_base_descriptor_t* des,
-                                        void* cbdata )
-{
-    mca_btl_base_segment_t* segments = des->des_local;
-    mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval;
-    mca_pml_bfo_send_request_t* sendreq;
-
-    if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_bfo_common_hdr_t)) ) {
-         return;
-    }
-
-    bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_ACK);
-    sendreq = (mca_pml_bfo_send_request_t*)hdr->hdr_ack.hdr_src_req.pval;
-    sendreq->req_recv = hdr->hdr_ack.hdr_dst_req;
-#if PML_BFO
-    MCA_PML_BFO_ERROR_CHECK_ON_ACK_CALLBACK(sendreq);
-#endif /* PML_BFO */
-
-    /* if the request should be delivered entirely by copy in/out
-     * then throttle sends */
-    if(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_FLAGS_NORDMA)
-        sendreq->req_throttle_sends = true;
-
-    mca_pml_bfo_send_request_copy_in_out(sendreq,
-                                         hdr->hdr_ack.hdr_send_offset,
-                                         sendreq->req_send.req_bytes_packed -
-                                         hdr->hdr_ack.hdr_send_offset);
-
-    if (sendreq->req_state != 0) {
-        /* Typical receipt of an ACK message causes req_state to be
-         * decremented. However, a send request that started as an
-         * RGET request can become a RNDV. For example, when the
-         * receiver determines that its receive buffer is not
-         * contiguous and therefore cannot support the RGET
-         * protocol. A send request that started with the RGET
-         * protocol has req_state == 0 and as such should not be
-         * decremented.
-         */
-        OPAL_THREAD_ADD_FETCH32(&sendreq->req_state, -1);
-    }
-
-    if(send_request_pml_complete_check(sendreq) == false)
-        mca_pml_bfo_send_request_schedule(sendreq);
-
-    return;
-}
-
-void mca_pml_bfo_recv_frag_callback_frag(mca_btl_base_module_t* btl,
-                                         mca_btl_base_tag_t tag,
-                                         mca_btl_base_descriptor_t* des,
-                                         void* cbdata ) {
-    mca_btl_base_segment_t* segments = des->des_local;
-    mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval;
-    mca_pml_bfo_recv_request_t* recvreq;
-
-    if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_bfo_common_hdr_t)) ) {
-        return;
-    }
-    bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_FRAG);
-    recvreq = (mca_pml_bfo_recv_request_t*)hdr->hdr_frag.hdr_dst_req.pval;
-#if PML_BFO
-    MCA_PML_BFO_ERROR_CHECK_ON_FRAG_CALLBACK(recvreq);
-#endif /* PML_BFO */
-    mca_pml_bfo_recv_request_progress_frag(recvreq,btl,segments,des->des_local_count);
-
-    return;
-}
-
-
-void mca_pml_bfo_recv_frag_callback_put(mca_btl_base_module_t* btl,
-                                        mca_btl_base_tag_t tag,
-                                        mca_btl_base_descriptor_t* des,
-                                        void* cbdata ) {
-    mca_btl_base_segment_t* segments = des->des_local;
-    mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval;
-    mca_pml_bfo_send_request_t* sendreq;
-
-    if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_bfo_common_hdr_t)) ) {
-        return;
-    }
-
-    bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_PUT);
-    sendreq = (mca_pml_bfo_send_request_t*)hdr->hdr_rdma.hdr_req.pval;
-#if PML_BFO
-    MCA_PML_BFO_ERROR_CHECK_ON_PUT_CALLBACK(sendreq);
-#endif /* PML_BFO */
-    mca_pml_bfo_send_request_put(sendreq,btl,&hdr->hdr_rdma);
-
-    return;
-}
-
-
-void mca_pml_bfo_recv_frag_callback_fin(mca_btl_base_module_t* btl,
-                                        mca_btl_base_tag_t tag,
-                                        mca_btl_base_descriptor_t* des,
-                                        void* cbdata ) {
-    mca_btl_base_segment_t* segments = des->des_local;
-    mca_pml_bfo_hdr_t* hdr = (mca_pml_bfo_hdr_t*)segments->seg_addr.pval;
-    mca_btl_base_descriptor_t* rdma;
-
-    if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_bfo_common_hdr_t)) ) {
-        return;
-    }
-
-    bfo_hdr_ntoh(hdr, MCA_PML_BFO_HDR_TYPE_FIN);
-    rdma = (mca_btl_base_descriptor_t*)hdr->hdr_fin.hdr_des.pval;
-#if PML_BFO
-    if (true == mca_pml_bfo_is_duplicate_fin(hdr, rdma, btl)) {
-        return;
-    }
-#endif /* PML_BFO */
-    rdma->des_cbfunc(btl, NULL, rdma,
-                     hdr->hdr_fin.hdr_fail ? OMPI_ERROR : OMPI_SUCCESS);
-
-    return;
-}
-
-
-
-#define PML_MAX_SEQ ~((mca_pml_sequence_t)0);
-
-static inline mca_pml_bfo_recv_request_t* get_posted_recv(opal_list_t *queue)
-{
-    if(opal_list_get_size(queue) == 0)
-        return NULL;
-
-    return (mca_pml_bfo_recv_request_t*)opal_list_get_first(queue);
-}
-
-static inline mca_pml_bfo_recv_request_t* get_next_posted_recv(
-        opal_list_t *queue,
-        mca_pml_bfo_recv_request_t* req)
-{
-    opal_list_item_t *i = opal_list_get_next((opal_list_item_t*)req);
-
-    if(opal_list_get_end(queue) == i)
-        return NULL;
-
-    return (mca_pml_bfo_recv_request_t*)i;
-}
-
-static mca_pml_bfo_recv_request_t *match_incomming(
-        mca_pml_bfo_match_hdr_t *hdr, mca_pml_bfo_comm_t *comm,
-        mca_pml_bfo_comm_proc_t *proc)
-{
-    mca_pml_bfo_recv_request_t *specific_recv, *wild_recv;
-    mca_pml_sequence_t wild_recv_seq, specific_recv_seq;
-    int tag = hdr->hdr_tag;
-
-    specific_recv = get_posted_recv(&proc->specific_receives);
-    wild_recv = get_posted_recv(&comm->wild_receives);
-
-    wild_recv_seq = wild_recv ?
-        wild_recv->req_recv.req_base.req_sequence : PML_MAX_SEQ;
-    specific_recv_seq = specific_recv ?
-        specific_recv->req_recv.req_base.req_sequence : PML_MAX_SEQ;
-
-    /* they are equal only if both are PML_MAX_SEQ */
-    while(wild_recv_seq != specific_recv_seq) {
-        mca_pml_bfo_recv_request_t **match;
-        opal_list_t *queue;
-        int req_tag;
-        mca_pml_sequence_t *seq;
-
-        if (OPAL_UNLIKELY(wild_recv_seq < specific_recv_seq)) {
-            match = &wild_recv;
-            queue = &comm->wild_receives;
-            seq = &wild_recv_seq;
-        } else {
-            match = &specific_recv;
-            queue = &proc->specific_receives;
-            seq = &specific_recv_seq;
-        }
-
-        req_tag = (*match)->req_recv.req_base.req_tag;
-        if(req_tag == tag || (req_tag == OMPI_ANY_TAG && tag >= 0)) {
-            opal_list_remove_item(queue, (opal_list_item_t*)(*match));
-            PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q,
-                    &((*match)->req_recv.req_base), PERUSE_RECV);
-            return *match;
-        }
-
-        *match = get_next_posted_recv(queue, *match);
-        *seq = (*match) ? (*match)->req_recv.req_base.req_sequence : PML_MAX_SEQ;
-    }
-
-    return NULL;
-}
-
-static mca_pml_bfo_recv_request_t*
-match_one(mca_btl_base_module_t *btl,
-          mca_pml_bfo_match_hdr_t *hdr, mca_btl_base_segment_t* segments,
-          size_t num_segments, ompi_communicator_t *comm_ptr,
-          mca_pml_bfo_comm_proc_t *proc,
-          mca_pml_bfo_recv_frag_t* frag)
-{
-    mca_pml_bfo_recv_request_t *match;
-    mca_pml_bfo_comm_t *comm = (mca_pml_bfo_comm_t *)comm_ptr->c_pml_comm;
-
-    do {
-        match = match_incomming(hdr, comm, proc);
-
-        /* if match found, process data */
-        if(OPAL_LIKELY(NULL != match)) {
-            match->req_recv.req_base.req_proc = proc->ompi_proc;
-
-            if(OPAL_UNLIKELY(MCA_PML_REQUEST_PROBE == match->req_recv.req_base.req_type)) {
-                /* complete the probe */
-                mca_pml_bfo_recv_request_matched_probe(match, btl, segments,
-                                                       num_segments);
-                /* attempt to match actual request */
-                continue;
-            } else if (MCA_PML_REQUEST_MPROBE == match->req_recv.req_base.req_type) {
-                /* create a receive frag and associate it with the
-                   request, which is then completed so that it can be
-                   restarted later during mrecv */
-                mca_pml_bfo_recv_frag_t *tmp;
-                if(NULL == frag) {
-                    MCA_PML_BFO_RECV_FRAG_ALLOC(tmp);
-                    MCA_PML_BFO_RECV_FRAG_INIT(tmp, hdr, segments, num_segments, btl);
-                } else {
-                    tmp = frag;
-                }
-
-                match->req_recv.req_base.req_addr = tmp;
-                mca_pml_bfo_recv_request_matched_probe(match, btl, segments,
-                                                       num_segments);
-                /* this frag is already processed, so we want to break out
-                   of the loop and not end up back on the unexpected queue. */
-                return NULL;
-            }
-
-            PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_MSG_MATCH_POSTED_REQ,
-                                    &(match->req_recv.req_base), PERUSE_RECV);
-            return match;
-        }
-
-        /* if no match found, place on unexpected queue */
-        append_frag_to_list(&proc->unexpected_frags, btl, hdr, segments,
-                            num_segments, frag);
-        PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm_ptr,
-                               hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
-        return NULL;
-    } while(true);
-}
-
-static mca_pml_bfo_recv_frag_t* check_cantmatch_for_match(mca_pml_bfo_comm_proc_t *proc)
-{
-    mca_pml_bfo_recv_frag_t *frag;
-
-    /* search the list for a fragment from the send with sequence
-     * number next_msg_seq_expected
-     */
-    for(frag = (mca_pml_bfo_recv_frag_t*)opal_list_get_first(&proc->frags_cant_match);
-        frag != (mca_pml_bfo_recv_frag_t*)opal_list_get_end(&proc->frags_cant_match);
-        frag = (mca_pml_bfo_recv_frag_t*)opal_list_get_next(frag))
-    {
-        mca_pml_bfo_match_hdr_t* hdr = &frag->hdr.hdr_match;
-        /*
-         * If the message has the next expected seq from that proc...
-         */
-        if(hdr->hdr_seq != proc->expected_sequence)
-            continue;
-
-        opal_list_remove_item(&proc->frags_cant_match, (opal_list_item_t*)frag);
-        return frag;
-    }
-
-    return NULL;
-}
-
-/**
- * RCS/CTS receive side matching
- *
- * @param hdr list of parameters needed for matching
- *                    This list is also embeded in frag,
- *                    but this allows to save a memory copy when
- *                    a match is made in this routine. (IN)
- * @param frag   pointer to receive fragment which we want
- *                    to match (IN/OUT).  If a match is not made,
- *                    hdr is copied to frag.
- * @param match_made  parameter indicating if we matched frag/
- *                    hdr (OUT)
- * @param additional_matches  if a match is made with frag, we
- *                    may be able to match fragments that previously
- *                    have arrived out-of-order.  If this is the
- *                    case, the associated fragment descriptors are
- *                    put on this list for further processing. (OUT)
- *
- * @return OMPI error code
- *
- * This routine is used to try and match a newly arrived message fragment
- *   to pre-posted receives.  The following assumptions are made
- *   - fragments are received out of order
- *   - for long messages, e.g. more than one fragment, a RTS/CTS algorithm
- *       is used.
- *   - 2nd and greater fragments include a receive descriptor pointer
- *   - fragments may be dropped
- *   - fragments may be corrupt
- *   - this routine may be called simultaneously by more than one thread
- */
-static int mca_pml_bfo_recv_frag_match( mca_btl_base_module_t *btl,
-                                        mca_pml_bfo_match_hdr_t *hdr,
-                                        mca_btl_base_segment_t* segments,
-                                        size_t num_segments,
-                                        int type)
-{
-    /* local variables */
-    uint16_t next_msg_seq_expected, frag_msg_seq;
-    ompi_communicator_t *comm_ptr;
-    mca_pml_bfo_recv_request_t *match = NULL;
-    mca_pml_bfo_comm_t *comm;
-    mca_pml_bfo_comm_proc_t *proc;
-    mca_pml_bfo_recv_frag_t* frag = NULL;
-
-    /* communicator pointer */
-    comm_ptr = ompi_comm_lookup(hdr->hdr_ctx);
-    if(OPAL_UNLIKELY(NULL == comm_ptr)) {
-        /* This is a special case. A message for a not yet existing
-         * communicator can happens. Instead of doing a matching we
-         * will temporarily add it the a pending queue in the PML.
-         * Later on, when the communicator is completely instantiated,
-         * this pending queue will be searched and all matching fragments
-         * moved to the right communicator.
-         */
-        append_frag_to_list( &mca_pml_bfo.non_existing_communicator_pending,
-                             btl, hdr, segments, num_segments, NULL );
-        return OMPI_SUCCESS;
-    }
-    comm = (mca_pml_bfo_comm_t *)comm_ptr->c_pml_comm;
-
-    /* source sequence number */
-    frag_msg_seq = hdr->hdr_seq;
-    proc = &comm->procs[hdr->hdr_src];
-
-    /**
-     * We generate the MSG_ARRIVED event as soon as the PML is aware of a matching
-     * fragment arrival. Independing if it is received on the correct order or not.
-     * This will allow the tools to figure out if the messages are not received in the
-     * correct order (if multiple network interfaces).
-     */
-    PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm_ptr,
-                           hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
-
-    /* get next expected message sequence number - if threaded
-     * run, lock to make sure that if another thread is processing
-     * a frag from the same message a match is made only once.
-     * Also, this prevents other posted receives (for a pair of
-     * end points) from being processed, and potentially "loosing"
-     * the fragment.
-     */
-    OPAL_THREAD_LOCK(&comm->matching_lock);
-
-#if PML_BFO
-    if(OPAL_UNLIKELY(hdr->hdr_common.hdr_flags & MCA_PML_BFO_HDR_FLAGS_RESTART)) {
-        if (NULL == (match = mca_pml_bfo_get_request(hdr))) {
-            return OMPI_SUCCESS;
-        }
-    } else {
-#endif /* PML_BFO */
-    /* get sequence number of next message that can be processed */
-    next_msg_seq_expected = (uint16_t)proc->expected_sequence;
-    if(OPAL_UNLIKELY(frag_msg_seq != next_msg_seq_expected))
-        goto wrong_seq;
-
-    /*
-     * This is the sequence number we were expecting,
-     * so we can try matching it to already posted
-     * receives.
-     */
-
-out_of_order_match:
-    /* We're now expecting the next sequence number. */
-    proc->expected_sequence++;
-
-    /**
-     * We generate the SEARCH_POSTED_QUEUE only when the message is received
-     * in the correct sequence. Otherwise, we delay the event generation until
-     * we reach the correct sequence number.
-     */
-    PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_BEGIN, comm_ptr,
-                            hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
-
-    match = match_one(btl, hdr, segments, num_segments, comm_ptr, proc, frag);
-
-    /**
-     * The match is over. We generate the SEARCH_POSTED_Q_END here, before going
-     * into the mca_pml_bfo_check_cantmatch_for_match so we can make a difference
-     * for the searching time for all messages.
-     */
-    PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr,
-                            hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
-
-    /* release matching lock before processing fragment */
-    OPAL_THREAD_UNLOCK(&comm->matching_lock);
-
-#if PML_BFO
-    }
-#endif /* PML_BFO */
-    if(OPAL_LIKELY(match)) {
-        switch(type) {
-        case MCA_PML_BFO_HDR_TYPE_MATCH:
-            mca_pml_bfo_recv_request_progress_match(match, btl, segments, num_segments);
-            break;
-        case MCA_PML_BFO_HDR_TYPE_RNDV:
-            mca_pml_bfo_recv_request_progress_rndv(match, btl, segments, num_segments);
-            break;
-        case MCA_PML_BFO_HDR_TYPE_RGET:
-            mca_pml_bfo_recv_request_progress_rget(match, btl, segments, num_segments);
-            break;
-        }
-
-        if(OPAL_UNLIKELY(frag))
-            MCA_PML_BFO_RECV_FRAG_RETURN(frag);
-    }
-
-    /*
-     * Now that new message has arrived, check to see if
-     * any fragments on the c_c_frags_cant_match list
-     * may now be used to form new matchs
-     */
-    if(OPAL_UNLIKELY(opal_list_get_size(&proc->frags_cant_match) > 0)) {
-        OPAL_THREAD_LOCK(&comm->matching_lock);
-        if((frag = check_cantmatch_for_match(proc))) {
-            hdr = &frag->hdr.hdr_match;
-            segments = frag->segments;
-            num_segments = frag->num_segments;
-            btl = frag->btl;
-            type = hdr->hdr_common.hdr_type;
-            goto out_of_order_match;
-        }
-        OPAL_THREAD_UNLOCK(&comm->matching_lock);
-    }
-
-    return OMPI_SUCCESS;
-wrong_seq:
-    /*
-     * This message comes after the next expected, so it
-     * is ahead of sequence.  Save it for later.
-     */
-#if PML_BFO
-    if (true == mca_pml_bfo_is_duplicate_msg(proc, hdr)) {
-        return OMPI_SUCCESS;
-    }
-#endif /* PML_BFO */
-    append_frag_to_list(&proc->frags_cant_match, btl, hdr, segments,
-                        num_segments, NULL);
-    OPAL_THREAD_UNLOCK(&comm->matching_lock);
-    return OMPI_SUCCESS;
-}
-
--- a/ompi/mca/pml/bfo/pml_bfo_recvfrag.h
+++ b/ompi/mca/pml/bfo/pml_bfo_recvfrag.h
@ -1,172 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2013 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2008      UT-Battelle, LLC. All rights reserved.
- * Copyright (c) 2010      Oracle and/or its affiliates.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/**
- *  @file
- */
-
-#ifndef MCA_PML_BFO_RECVFRAG_H
-#define MCA_PML_BFO_RECVFRAG_H
-
-#include "pml_bfo_hdr.h"
-
-BEGIN_C_DECLS
-
-struct mca_pml_bfo_buffer_t {
-    size_t len;
-    void * addr;
-};
-typedef struct mca_pml_bfo_buffer_t mca_pml_bfo_buffer_t;
-
-
-struct mca_pml_bfo_recv_frag_t {
-    opal_free_list_item_t super;
-    mca_pml_bfo_hdr_t hdr;
-    size_t num_segments;
-    mca_btl_base_module_t* btl;
-    mca_btl_base_segment_t segments[MCA_BTL_DES_MAX_SEGMENTS];
-    mca_pml_bfo_buffer_t buffers[MCA_BTL_DES_MAX_SEGMENTS];
-    unsigned char addr[1];
-};
-typedef struct mca_pml_bfo_recv_frag_t mca_pml_bfo_recv_frag_t;
-
-OBJ_CLASS_DECLARATION(mca_pml_bfo_recv_frag_t);
-
-
-#define MCA_PML_BFO_RECV_FRAG_ALLOC(frag)                       \
-do {                                                            \
-    opal_free_list_item_t* item;                                \
-    OPAL_FREE_LIST_WAIT_MT(&mca_pml_bfo.recv_frags, item);      \
-    frag = (mca_pml_bfo_recv_frag_t*)item;                      \
-} while(0)
-
-
-#define MCA_PML_BFO_RECV_FRAG_INIT(frag, hdr, segs, cnt, btl )          \
-do {                                                                    \
-    size_t i, _size;                                                    \
-    mca_btl_base_segment_t* macro_segments = frag->segments;            \
-    mca_pml_bfo_buffer_t* buffers = frag->buffers;                      \
-    unsigned char* _ptr = (unsigned char*)frag->addr;                   \
-    /* init recv_frag */                                                \
-    frag->btl = btl;                                                    \
-    frag->hdr = *(mca_pml_bfo_hdr_t*)hdr;                               \
-    frag->num_segments = 1;                                             \
-    _size = segs[0].seg_len;                                            \
-    for( i = 1; i < cnt; i++ ) {                                        \
-        _size += segs[i].seg_len;                                       \
-    }                                                                   \
-    /* copy over data */                                                \
-    if(_size <= mca_pml_bfo.unexpected_limit ) {                        \
-        macro_segments[0].seg_addr.pval = frag->addr;                   \
-    } else {                                                            \
-        buffers[0].len = _size;                                         \
-        buffers[0].addr = (char*)                                       \
-            mca_pml_bfo.allocator->alc_alloc( mca_pml_bfo.allocator,    \
-                                              buffers[0].len,           \
-                                              0, NULL);                 \
-        _ptr = (unsigned char*)(buffers[0].addr);                       \
-        macro_segments[0].seg_addr.pval = buffers[0].addr;              \
-    }                                                                   \
-    macro_segments[0].seg_len = _size;                                  \
-    for( i = 0; i < cnt; i++ ) {                                        \
-        memcpy( _ptr, segs[i].seg_addr.pval, segs[i].seg_len);          \
-        _ptr += segs[i].seg_len;                                        \
-    }                                                                   \
- } while(0)
-
-
-#define MCA_PML_BFO_RECV_FRAG_RETURN(frag)                              \
-do {                                                                    \
-    if( frag->segments[0].seg_len > mca_pml_bfo.unexpected_limit ) {    \
-        /* return buffers */                                            \
-        mca_pml_bfo.allocator->alc_free( mca_pml_bfo.allocator,         \
-                                         frag->buffers[0].addr );       \
-    }                                                                   \
-    frag->num_segments = 0;                                             \
-                                                                        \
-    /* return recv_frag */                                              \
-    OPAL_FREE_LIST_RETURN(&mca_pml_bfo.recv_frags,                      \
-                          (opal_free_list_item_t*)frag);                \
- } while(0)
-
-
-/**
- *  Callback from BTL on receipt of a recv_frag (match).
- */
-
-extern void mca_pml_bfo_recv_frag_callback_match( mca_btl_base_module_t *btl,
-                                                  mca_btl_base_tag_t tag,
-                                                  mca_btl_base_descriptor_t* descriptor,
-                                                  void* cbdata );
-
-/**
- *  Callback from BTL on receipt of a recv_frag (rndv).
- */
-
-extern void mca_pml_bfo_recv_frag_callback_rndv( mca_btl_base_module_t *btl,
-                                                 mca_btl_base_tag_t tag,
-                                                 mca_btl_base_descriptor_t* descriptor,
-                                                 void* cbdata );
-/**
- *  Callback from BTL on receipt of a recv_frag (rget).
- */
-
-extern void mca_pml_bfo_recv_frag_callback_rget( mca_btl_base_module_t *btl,
-                                                 mca_btl_base_tag_t tag,
-                                                 mca_btl_base_descriptor_t* descriptor,
-                                                 void* cbdata );
-
-/**
- *  Callback from BTL on receipt of a recv_frag (ack).
- */
-
-extern void mca_pml_bfo_recv_frag_callback_ack( mca_btl_base_module_t *btl,
-                                                mca_btl_base_tag_t tag,
-                                                mca_btl_base_descriptor_t* descriptor,
-                                                void* cbdata );
-/**
- *  Callback from BTL on receipt of a recv_frag (frag).
- */
-
-extern void mca_pml_bfo_recv_frag_callback_frag( mca_btl_base_module_t *btl,
-                                                 mca_btl_base_tag_t tag,
-                                                 mca_btl_base_descriptor_t* descriptor,
-                                                 void* cbdata );
-/**
- *  Callback from BTL on receipt of a recv_frag (put).
- */
-
-extern void mca_pml_bfo_recv_frag_callback_put( mca_btl_base_module_t *btl,
-                                                mca_btl_base_tag_t tag,
-                                                mca_btl_base_descriptor_t* descriptor,
-                                                void* cbdata );
-/**
- *  Callback from BTL on receipt of a recv_frag (fin).
- */
-
-extern void mca_pml_bfo_recv_frag_callback_fin( mca_btl_base_module_t *btl,
-                                                mca_btl_base_tag_t tag,
-                                                mca_btl_base_descriptor_t* descriptor,
-                                                void* cbdata );
-
-
-END_C_DECLS
-
-#endif
-
--- a/ompi/mca/pml/bfo/pml_bfo_recvreq.c
+++ b/ompi/mca/pml/bfo/pml_bfo_recvreq.c
--- a/ompi/mca/pml/bfo/pml_bfo_recvreq.h
+++ b/ompi/mca/pml/bfo/pml_bfo_recvreq.h
@ -1,449 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2016 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2008      UT-Battelle, LLC. All rights reserved.
- * Copyright (c) 2010      Oracle and/or its affiliates.  All rights reserved.
- * Copyright (c) 2011-2012 Los Alamos National Security, LLC.
- *                         All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-/**
- * @file
- */
-#ifndef OMPI_PML_BFO_RECV_REQUEST_H
-#define OMPI_PML_BFO_RECV_REQUEST_H
-
-#include "pml_bfo.h"
-#include "pml_bfo_rdma.h"
-#include "pml_bfo_rdmafrag.h"
-#include "ompi/proc/proc.h"
-#include "ompi/mca/pml/bfo/pml_bfo_comm.h"
-#include "opal/mca/mpool/base/base.h"
-#include "ompi/mca/pml/base/pml_base_recvreq.h"
-#if PML_BFO
-#define RECVREQ_RECVERRSENT        0x01
-#define RECVREQ_RNDVRESTART_RECVED 0x02
-#define RECVREQ_RNDVRESTART_ACKED  0x04
-#endif /* PML_BFO */
-
-BEGIN_C_DECLS
-
-struct mca_pml_bfo_recv_request_t {
-    mca_pml_base_recv_request_t req_recv;
-    opal_ptr_t remote_req_send;
-#if PML_BFO
-    int32_t req_msgseq;     /* PML sequence number */
-    int32_t req_events;     /* number of outstanding events on request */
-    int32_t req_restartseq; /* sequence number of restarted request */
-    int32_t req_errstate;   /* state of request if in error */
-#endif /* PML_BFO */
-    int32_t req_lock;
-    size_t  req_pipeline_depth;
-    size_t  req_bytes_received;  /**< amount of data transferred into the user buffer */
-    size_t  req_bytes_expected; /**< local size of the data as suggested by the user */
-    size_t  req_rdma_offset;
-    size_t  req_send_offset;
-    uint32_t req_rdma_cnt;
-    uint32_t req_rdma_idx;
-    bool req_pending;
-    bool req_ack_sent; /**< whether ack was sent to the sender */
-    bool req_match_received; /**< Prevent request to be completed prematurely */
-    opal_mutex_t lock;
-    mca_pml_bfo_com_btl_t req_rdma[1];
-};
-typedef struct mca_pml_bfo_recv_request_t mca_pml_bfo_recv_request_t;
-
-OBJ_CLASS_DECLARATION(mca_pml_bfo_recv_request_t);
-
-static inline bool lock_recv_request(mca_pml_bfo_recv_request_t *recvreq)
-{
-        return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock,  1) == 1;
-}
-
-static inline bool unlock_recv_request(mca_pml_bfo_recv_request_t *recvreq)
-{
-        return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, -1) == 0;
-}
-
-/**
- *  Allocate a recv request from the modules free list.
- *
- *  @param rc (OUT)  OMPI_SUCCESS or error status on failure.
- *  @return          Receive request.
- */
-#define MCA_PML_BFO_RECV_REQUEST_ALLOC(recvreq)                    \
-do {                                                               \
-   ompi_free_list_item_t* item;                                    \
-   OMPI_FREE_LIST_GET_MT(&mca_pml_base_recv_requests, item);          \
-   recvreq = (mca_pml_bfo_recv_request_t*)item;                    \
-} while(0)
-
-
-/**
- * Initialize a receive request with call parameters.
- *
- * @param request (IN)       Receive request.
- * @param addr (IN)          User buffer.
- * @param count (IN)         Number of elements of indicated datatype.
- * @param datatype (IN)      User defined datatype.
- * @param src (IN)           Source rank w/in the communicator.
- * @param tag (IN)           User defined tag.
- * @param comm (IN)          Communicator.
- * @param persistent (IN)    Is this a ersistent request.
- */
-#define MCA_PML_BFO_RECV_REQUEST_INIT( request,                     \
-                                       addr,                        \
-                                       count,                       \
-                                       datatype,                    \
-                                       src,                         \
-                                       tag,                         \
-                                       comm,                        \
-                                       persistent)                  \
-do {                                                                \
-    MCA_PML_BASE_RECV_REQUEST_INIT( &(request)->req_recv,           \
-                                    addr,                           \
-                                    count,                          \
-                                    datatype,                       \
-                                    src,                            \
-                                    tag,                            \
-                                    comm,                           \
-                                    persistent);                    \
-} while(0)
-
-/**
- * Mark the request as completed at MPI level for internal purposes.
- *
- *  @param recvreq (IN)  Receive request.
- */
-#define MCA_PML_BFO_RECV_REQUEST_MPI_COMPLETE( recvreq )                              \
-    do {                                                                              \
-       PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE,                             \
-                                &(recvreq->req_recv.req_base), PERUSE_RECV );         \
-        ompi_request_complete( &(recvreq->req_recv.req_base.req_ompi), true );        \
-    } while (0)
-
-/*
- *  Free the PML receive request
- */
-#define MCA_PML_BFO_RECV_REQUEST_RETURN(recvreq)                        \
-    {                                                                   \
-        MCA_PML_BASE_RECV_REQUEST_FINI(&(recvreq)->req_recv);           \
-        OPAL_FREE_LIST_RETURN( &mca_pml_base_recv_requests,             \
-                               (opal_free_list_item_t*)(recvreq));      \
-    }
-
-/**
- * Complete receive request. Request structure cannot be accessed after calling
- * this function any more.
- *
- *  @param recvreq (IN)  Receive request.
- */
-static inline void
-recv_request_pml_complete(mca_pml_bfo_recv_request_t *recvreq)
-{
-    size_t i;
-
-    if(false == recvreq->req_recv.req_base.req_pml_complete) {
-
-        if(recvreq->req_recv.req_bytes_packed > 0) {
-            PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END,
-                    &recvreq->req_recv.req_base, PERUSE_RECV );
-        }
-
-        for(i = 0; i < recvreq->req_rdma_cnt; i++) {
-            mca_mpool_base_registration_t* btl_reg = recvreq->req_rdma[i].btl_reg;
-            if( NULL != btl_reg  && btl_reg->mpool != NULL) {
-                btl_reg->mpool->mpool_deregister( btl_reg->mpool, btl_reg );
-            }
-        }
-        recvreq->req_rdma_cnt = 0;
-#if PML_BFO
-        recvreq->req_msgseq -= 100;
-#endif /* PML_BFO */
-
-        if(true == recvreq->req_recv.req_base.req_free_called) {
-            if( MPI_SUCCESS != recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR ) {
-                ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_REQUEST);
-            }
-            MCA_PML_BFO_RECV_REQUEST_RETURN(recvreq);
-        } else {
-            /* initialize request status */
-            recvreq->req_recv.req_base.req_pml_complete = true;
-            recvreq->req_recv.req_base.req_ompi.req_status._ucount =
-                recvreq->req_bytes_received;
-            if (recvreq->req_recv.req_bytes_packed > recvreq->req_bytes_expected) {
-                recvreq->req_recv.req_base.req_ompi.req_status._ucount =
-                    recvreq->req_recv.req_bytes_packed;
-                recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR =
-                    MPI_ERR_TRUNCATE;
-            }
-            MCA_PML_BFO_RECV_REQUEST_MPI_COMPLETE(recvreq);
-        }
-    }
-}
-
-static inline bool
-recv_request_pml_complete_check(mca_pml_bfo_recv_request_t *recvreq)
-{
-#if OPAL_ENABLE_MULTI_THREADS
-    opal_atomic_rmb();
-#endif
-    if(recvreq->req_match_received &&
-            recvreq->req_bytes_received >= recvreq->req_recv.req_bytes_packed &&
-#if PML_BFO
-                        (0 == recvreq->req_events) && lock_recv_request(recvreq)) {
-#else /* PML_BFO */
-            lock_recv_request(recvreq)) {
-#endif /* PML_BFO */
-        recv_request_pml_complete(recvreq);
-        return true;
-    }
-
-    return false;
-}
-
-extern void mca_pml_bfo_recv_req_start(mca_pml_bfo_recv_request_t *req);
-#define MCA_PML_BFO_RECV_REQUEST_START(r) mca_pml_bfo_recv_req_start(r)
-
-static inline void prepare_recv_req_converter(mca_pml_bfo_recv_request_t *req)
-{
-    if( req->req_recv.req_base.req_datatype->super.size | req->req_recv.req_base.req_count ) {
-        opal_convertor_copy_and_prepare_for_recv(
-                req->req_recv.req_base.req_proc->super.proc_convertor,
-                &(req->req_recv.req_base.req_datatype->super),
-                req->req_recv.req_base.req_count,
-                req->req_recv.req_base.req_addr,
-                0,
-                &req->req_recv.req_base.req_convertor);
-        opal_convertor_get_unpacked_size(&req->req_recv.req_base.req_convertor,
-                                         &req->req_bytes_expected);
-    }
-}
-
-#define MCA_PML_BFO_RECV_REQUEST_MATCHED(request, hdr) \
-    recv_req_matched(request, hdr)
-
-static inline void recv_req_matched(mca_pml_bfo_recv_request_t *req,
-                                    mca_pml_bfo_match_hdr_t *hdr)
-{
-    req->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = hdr->hdr_src;
-    req->req_recv.req_base.req_ompi.req_status.MPI_TAG = hdr->hdr_tag;
-    req->req_match_received = true;
-#if PML_BFO
-    req->req_msgseq = hdr->hdr_seq;
-#endif /* PML_BFO */
-#if OPAL_ENABLE_MULTI_THREADS
-    opal_atomic_wmb();
-#endif
-    if(req->req_recv.req_bytes_packed > 0) {
-#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
-        if(MPI_ANY_SOURCE == req->req_recv.req_base.req_peer) {
-            /* non wildcard prepared during post recv */
-            prepare_recv_req_converter(req);
-        }
-#endif  /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT */
-        PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_XFER_BEGIN,
-                                &req->req_recv.req_base, PERUSE_RECV);
-    }
-}
-
-
-/**
- *
- */
-
-#define MCA_PML_BFO_RECV_REQUEST_UNPACK( request,                                 \
-                                         segments,                                \
-                                         num_segments,                            \
-                                         seg_offset,                              \
-                                         data_offset,                             \
-                                         bytes_received,                          \
-                                         bytes_delivered)                         \
-do {                                                                              \
-    bytes_delivered = 0;                                                          \
-    if(request->req_recv.req_bytes_packed > 0) {                                  \
-        struct iovec iov[MCA_BTL_DES_MAX_SEGMENTS];                               \
-        uint32_t iov_count = 0;                                                   \
-        size_t max_data = bytes_received;                                         \
-        size_t n, offset = seg_offset;                                            \
-        mca_btl_base_segment_t* segment = segments;                               \
-                                                                                  \
-        OPAL_THREAD_LOCK(&request->lock);                                         \
-        for( n = 0; n < num_segments; n++, segment++ ) {                          \
-            if(offset >= segment->seg_len) {                                      \
-                offset -= segment->seg_len;                                       \
-            } else {                                                              \
-                iov[iov_count].iov_len = segment->seg_len - offset;               \
-                iov[iov_count].iov_base = (IOVBASE_TYPE*)                         \
-                    ((unsigned char*)segment->seg_addr.pval + offset);            \
-                iov_count++;                                                      \
-                offset = 0;                                                       \
-            }                                                                     \
-        }                                                                         \
-        PERUSE_TRACE_COMM_OMPI_EVENT (PERUSE_COMM_REQ_XFER_CONTINUE,              \
-                                      &(recvreq->req_recv.req_base), max_data,    \
-                                      PERUSE_RECV);                               \
-        opal_convertor_set_position( &(request->req_recv.req_base.req_convertor), \
-                                     &data_offset );                              \
-        opal_convertor_unpack( &(request)->req_recv.req_base.req_convertor,       \
-                               iov,                                               \
-                               &iov_count,                                        \
-                               &max_data );                                       \
-        bytes_delivered = max_data;                                               \
-        OPAL_THREAD_UNLOCK(&request->lock);                                       \
-    }                                                                             \
-} while (0)
-
-
-/**
- *
- */
-
-void mca_pml_bfo_recv_request_progress_match(
-    mca_pml_bfo_recv_request_t* req,
-    struct mca_btl_base_module_t* btl,
-    mca_btl_base_segment_t* segments,
-    size_t num_segments);
-
-/**
- *
- */
-
-void mca_pml_bfo_recv_request_progress_frag(
-    mca_pml_bfo_recv_request_t* req,
-    struct mca_btl_base_module_t* btl,
-    mca_btl_base_segment_t* segments,
-    size_t num_segments);
-
-/**
- *
- */
-
-void mca_pml_bfo_recv_request_progress_rndv(
-    mca_pml_bfo_recv_request_t* req,
-    struct mca_btl_base_module_t* btl,
-    mca_btl_base_segment_t* segments,
-    size_t num_segments);
-
-/**
- *
- */
-
-void mca_pml_bfo_recv_request_progress_rget(
-    mca_pml_bfo_recv_request_t* req,
-    struct mca_btl_base_module_t* btl,
-    mca_btl_base_segment_t* segments,
-    size_t num_segments);
-
-/**
- *
- */
-
-void mca_pml_bfo_recv_request_matched_probe(
-    mca_pml_bfo_recv_request_t* req,
-    struct mca_btl_base_module_t* btl,
-    mca_btl_base_segment_t* segments,
-    size_t num_segments);
-
-/**
- *
- */
-
-int mca_pml_bfo_recv_request_schedule_once(
-    mca_pml_bfo_recv_request_t* req, mca_bml_base_btl_t* start_bml_btl);
-
-static inline int mca_pml_bfo_recv_request_schedule_exclusive(
-        mca_pml_bfo_recv_request_t* req,
-        mca_bml_base_btl_t* start_bml_btl)
-{
-    int rc;
-
-    do {
-        rc = mca_pml_bfo_recv_request_schedule_once(req, start_bml_btl);
-        if(rc == OMPI_ERR_OUT_OF_RESOURCE)
-            break;
-    } while(!unlock_recv_request(req));
-
-    if(OMPI_SUCCESS == rc)
-        recv_request_pml_complete_check(req);
-
-    return rc;
-}
-
-static inline void mca_pml_bfo_recv_request_schedule(
-        mca_pml_bfo_recv_request_t* req,
-        mca_bml_base_btl_t* start_bml_btl)
-{
-    if(!lock_recv_request(req))
-        return;
-
-    (void)mca_pml_bfo_recv_request_schedule_exclusive(req, start_bml_btl);
-}
-
-#define MCA_PML_BFO_ADD_ACK_TO_PENDING(P, S, D, O)                      \
-    do {                                                                \
-        mca_pml_bfo_pckt_pending_t *_pckt;                              \
-                                                                        \
-        MCA_PML_BFO_PCKT_PENDING_ALLOC(_pckt);                          \
-        _pckt->hdr.hdr_common.hdr_type = MCA_PML_BFO_HDR_TYPE_ACK;      \
-        _pckt->hdr.hdr_ack.hdr_src_req.lval = (S);                      \
-        _pckt->hdr.hdr_ack.hdr_dst_req.pval = (D);                      \
-        _pckt->hdr.hdr_ack.hdr_send_offset = (O);                       \
-        _pckt->proc = (P);                                              \
-        _pckt->bml_btl = NULL;                                          \
-        OPAL_THREAD_LOCK(&mca_pml_bfo.lock);                            \
-        opal_list_append(&mca_pml_bfo.pckt_pending,                     \
-                         (opal_list_item_t*)_pckt);                     \
-        OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock);                          \
-    } while(0)
-
-int mca_pml_bfo_recv_request_ack_send_btl(ompi_proc_t* proc,
-        mca_bml_base_btl_t* bml_btl, uint64_t hdr_src_req, void *hdr_dst_req,
-        uint64_t hdr_rdma_offset, bool nordma);
-
-static inline int mca_pml_bfo_recv_request_ack_send(ompi_proc_t* proc,
-        uint64_t hdr_src_req, void *hdr_dst_req, uint64_t hdr_send_offset,
-        bool nordma)
-{
-    size_t i;
-    mca_bml_base_btl_t* bml_btl;
-    mca_bml_base_endpoint_t* endpoint =
-        (mca_bml_base_endpoint_t*)proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
-
-    for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) {
-        bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
-        if(mca_pml_bfo_recv_request_ack_send_btl(proc, bml_btl, hdr_src_req,
-                    hdr_dst_req, hdr_send_offset, nordma) == OMPI_SUCCESS)
-            return OMPI_SUCCESS;
-    }
-
-    MCA_PML_BFO_ADD_ACK_TO_PENDING(proc, hdr_src_req, hdr_dst_req,
-                                   hdr_send_offset);
-
-    return OMPI_ERR_OUT_OF_RESOURCE;
-}
-
-int mca_pml_bfo_recv_request_get_frag(mca_pml_bfo_rdma_frag_t* frag);
-
-/* This function tries to continue recvreq that stuck due to resource
- * unavailability. Recvreq is added to recv_pending list if scheduling of put
- * operation cannot be accomplished for some reason. */
-void mca_pml_bfo_recv_request_process_pending(void);
-
-END_C_DECLS
-
-#endif
-
--- a/ompi/mca/pml/bfo/pml_bfo_sendreq.c
+++ b/ompi/mca/pml/bfo/pml_bfo_sendreq.c
--- a/ompi/mca/pml/bfo/pml_bfo_sendreq.h
+++ b/ompi/mca/pml/bfo/pml_bfo_sendreq.h
@ -1,499 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2016 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2009-2012 Oracle and/or its affiliates.  All rights reserved.
- * Copyright (c) 2011-2012 Los Alamos National Security, LLC.
- *                         All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#ifndef OMPI_PML_BFO_SEND_REQUEST_H
-#define OMPI_PML_BFO_SEND_REQUEST_H
-
-#include "opal/mca/btl/btl.h"
-#include "opal/mca/mpool/base/base.h"
-#include "ompi/mca/pml/base/pml_base_sendreq.h"
-#include "pml_bfo_comm.h"
-#include "pml_bfo_hdr.h"
-#include "pml_bfo_rdma.h"
-#include "pml_bfo_rdmafrag.h"
-#include "ompi/mca/bml/bml.h"
-
-BEGIN_C_DECLS
-
-typedef enum {
-    MCA_PML_BFO_SEND_PENDING_NONE,
-    MCA_PML_BFO_SEND_PENDING_SCHEDULE,
-    MCA_PML_BFO_SEND_PENDING_START
-} mca_pml_bfo_send_pending_t;
-
-struct mca_pml_bfo_send_request_t {
-    mca_pml_base_send_request_t req_send;
-    mca_bml_base_endpoint_t* req_endpoint;
-    opal_ptr_t req_recv;
-#if PML_BFO
-    int32_t req_events;     /* number of outstanding events on request */
-    int32_t req_restartseq; /* sequence number of restarted request */
-    int32_t req_restart;    /* state of restarted request */
-    int32_t req_error;      /* non-zero when error has occurred on request */
-#endif /* PML_BFO */
-    int32_t req_state;
-    int32_t req_lock;
-    bool req_throttle_sends;
-    size_t req_pipeline_depth;
-    size_t req_bytes_delivered;
-    uint32_t req_rdma_cnt;
-    mca_pml_bfo_send_pending_t req_pending;
-    opal_mutex_t req_send_range_lock;
-    opal_list_t req_send_ranges;
-    mca_pml_bfo_com_btl_t req_rdma[1];
-};
-typedef struct mca_pml_bfo_send_request_t mca_pml_bfo_send_request_t;
-
-OBJ_CLASS_DECLARATION(mca_pml_bfo_send_request_t);
-
-struct mca_pml_bfo_send_range_t {
-    opal_free_list_item_t base;
-    uint64_t range_send_offset;
-    uint64_t range_send_length;
-    int range_btl_idx;
-    int range_btl_cnt;
-    mca_pml_bfo_com_btl_t range_btls[1];
-};
-typedef struct mca_pml_bfo_send_range_t mca_pml_bfo_send_range_t;
-OBJ_CLASS_DECLARATION(mca_pml_bfo_send_range_t);
-
-static inline bool lock_send_request(mca_pml_bfo_send_request_t *sendreq)
-{
-    return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock,  1) == 1;
-}
-
-static inline bool unlock_send_request(mca_pml_bfo_send_request_t *sendreq)
-{
-    return OPAL_THREAD_ADD_FETCH32(&sendreq->req_lock, -1) == 0;
-}
-
-static inline void
-add_request_to_send_pending(mca_pml_bfo_send_request_t* sendreq,
-                            const mca_pml_bfo_send_pending_t type,
-                            const bool append)
-{
-    opal_list_item_t *item = (opal_list_item_t*)sendreq;
-
-    OPAL_THREAD_LOCK(&mca_pml_bfo.lock);
-    sendreq->req_pending = type;
-    if(append)
-        opal_list_append(&mca_pml_bfo.send_pending, item);
-    else
-        opal_list_prepend(&mca_pml_bfo.send_pending, item);
-
-    OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock);
-}
-
-static inline mca_pml_bfo_send_request_t*
-get_request_from_send_pending(mca_pml_bfo_send_pending_t *type)
-{
-    mca_pml_bfo_send_request_t *sendreq;
-
-    OPAL_THREAD_LOCK(&mca_pml_bfo.lock);
-    sendreq = (mca_pml_bfo_send_request_t*)
-        opal_list_remove_first(&mca_pml_bfo.send_pending);
-    if(sendreq) {
-        *type = sendreq->req_pending;
-        sendreq->req_pending = MCA_PML_BFO_SEND_PENDING_NONE;
-    }
-    OPAL_THREAD_UNLOCK(&mca_pml_bfo.lock);
-
-    return sendreq;
-}
-
-#define MCA_PML_BFO_SEND_REQUEST_ALLOC( comm,                           \
-                                        dst,                            \
-                                        sendreq)                        \
-    {                                                                   \
-        ompi_proc_t *proc = ompi_comm_peer_lookup( comm, dst );         \
-        opal_free_list_item_t* item;                                    \
-                                                                        \
-        sendreq = NULL;                                                 \
-        if( OPAL_LIKELY(NULL != proc) ) {                               \
-            OPAL_FREE_LIST_WAIT_MT(&mca_pml_base_send_requests, item);  \
-            sendreq = (mca_pml_bfo_send_request_t*)item;                \
-            sendreq->req_send.req_base.req_proc = proc;                 \
-        }                                                               \
-    }
-
-
-#define MCA_PML_BFO_SEND_REQUEST_INIT( sendreq,                         \
-                                       buf,                             \
-                                       count,                           \
-                                       datatype,                        \
-                                       dst,                             \
-                                       tag,                             \
-                                       comm,                            \
-                                       sendmode,                        \
-                                       persistent)                      \
-    {                                                                   \
-        MCA_PML_BASE_SEND_REQUEST_INIT(&sendreq->req_send,              \
-                                       buf,                             \
-                                       count,                           \
-                                       datatype,                        \
-                                       dst,                             \
-                                       tag,                             \
-                                       comm,                            \
-                                       sendmode,                        \
-                                       persistent,                      \
-                                       0); /* convertor_flags */        \
-        (sendreq)->req_recv.pval = NULL;                                \
-    }
-
-
-static inline void mca_pml_bfo_free_rdma_resources(mca_pml_bfo_send_request_t* sendreq)
-{
-    size_t r;
-
-    /* return mpool resources */
-    for(r = 0; r < sendreq->req_rdma_cnt; r++) {
-        struct mca_btl_base_registration_handle_t* handle = sendreq->req_rdma[r].btl_reg;
-        mca_bml_base_btl_t *bml_btl = sendreq->req_rdma[r].bml_btl;
-
-        if( NULL != handle ) {
-            mca_bml_base_deregister_mem (bml_btl, handle);
-            sendreq->req_rdma[r].btl_reg = NULL;
-        }
-    }
-    sendreq->req_rdma_cnt = 0;
-}
-
-
-/**
- * Start a send request.
- */
-
-#define MCA_PML_BFO_SEND_REQUEST_START(sendreq, rc)       \
-    do {                                                  \
-        rc = mca_pml_bfo_send_request_start(sendreq);     \
-    } while (0)
-
-
-/*
- * Mark a send request as completed at the MPI level.
- */
-
-#define MCA_PML_BFO_SEND_REQUEST_MPI_COMPLETE(sendreq, with_signal)                  \
-do {                                                                                 \
-   (sendreq)->req_send.req_base.req_ompi.req_status.MPI_SOURCE =                     \
-       (sendreq)->req_send.req_base.req_comm->c_my_rank;                             \
-   (sendreq)->req_send.req_base.req_ompi.req_status.MPI_TAG =                        \
-        (sendreq)->req_send.req_base.req_tag;                                        \
-   (sendreq)->req_send.req_base.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS;        \
-   (sendreq)->req_send.req_base.req_ompi.req_status._ucount =                        \
-        (sendreq)->req_send.req_bytes_packed;                                        \
-   PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE,                                \
-                            &(sendreq->req_send.req_base), PERUSE_SEND);             \
-                                                                                     \
-   ompi_request_complete( &((sendreq)->req_send.req_base.req_ompi), (with_signal) ); \
-} while(0)
-
-/*
- * Release resources associated with a request
- */
-
-#define MCA_PML_BFO_SEND_REQUEST_RETURN(sendreq)                        \
-    do {                                                                \
-        MCA_PML_BASE_SEND_REQUEST_FINI((&(sendreq)->req_send));         \
-        OPAL_FREE_LIST_RETURN( &mca_pml_base_send_requests,             \
-                               (opal_free_list_item_t*)sendreq);        \
-    } while(0)
-
-
-/*
- * The PML has completed a send request. Note that this request
- * may have been orphaned by the user or have already completed
- * at the MPI level.
- * This function will never be called directly from the upper level, as it
- * should only be an internal call to the PML.
- *
- */
-static inline void
-send_request_pml_complete(mca_pml_bfo_send_request_t *sendreq)
-{
-    if(false == sendreq->req_send.req_base.req_pml_complete) {
-        if(sendreq->req_send.req_bytes_packed > 0) {
-            PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END,
-                                     &(sendreq->req_send.req_base), PERUSE_SEND);
-        }
-
-        /* return mpool resources */
-        mca_pml_bfo_free_rdma_resources(sendreq);
-
-        if (sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED &&
-            sendreq->req_send.req_addr != sendreq->req_send.req_base.req_addr) {
-            mca_pml_base_bsend_request_fini((ompi_request_t*)sendreq);
-        }
-
-        sendreq->req_send.req_base.req_pml_complete = true;
-
-        if( !REQUEST_COMPLETE( &((sendreq->req_send).req_base.req_ompi)) ) {
-            /* Should only be called for long messages (maybe synchronous) */
-            MCA_PML_BFO_SEND_REQUEST_MPI_COMPLETE(sendreq, true);
-        } else {
-            if( MPI_SUCCESS != sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR ) {
-                ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_REQUEST);
-            }
-        }
-#if PML_BFO
-        sendreq->req_send.req_base.req_sequence -= 100;
-#endif /* PML_BFO */
-
-        if(true == sendreq->req_send.req_base.req_free_called) {
-            MCA_PML_BFO_SEND_REQUEST_RETURN(sendreq);
-        }
-    }
-}
-
-/* returns true if request was completed on PML level */
-static inline bool
-send_request_pml_complete_check(mca_pml_bfo_send_request_t *sendreq)
-{
-#if OPAL_ENABLE_MULTI_THREADS
-    opal_atomic_rmb();
-#endif
-    /* if no more events are expected for the request and the whole message is
-     * already sent and send fragment scheduling isn't running in another
-     * thread then complete the request on PML level. From now on, if user
-     * called free on this request, the request structure can be reused for
-     * another request or if the request is persistent it can be restarted */
-    if(sendreq->req_state == 0 &&
-            sendreq->req_bytes_delivered >= sendreq->req_send.req_bytes_packed
-            && lock_send_request(sendreq)) {
-        send_request_pml_complete(sendreq);
-        return true;
-    }
-
-    return false;
-}
-
-/**
- *  Schedule additional fragments
- */
-int
-mca_pml_bfo_send_request_schedule_once(mca_pml_bfo_send_request_t*);
-
-static inline int
-mca_pml_bfo_send_request_schedule_exclusive(mca_pml_bfo_send_request_t* sendreq)
-{
-    int rc;
-    do {
-        rc = mca_pml_bfo_send_request_schedule_once(sendreq);
-        if(rc == OMPI_ERR_OUT_OF_RESOURCE)
-            break;
-    } while(!unlock_send_request(sendreq));
-
-    if(OMPI_SUCCESS == rc)
-        send_request_pml_complete_check(sendreq);
-
-    return rc;
-}
-
-static inline void
-mca_pml_bfo_send_request_schedule(mca_pml_bfo_send_request_t* sendreq)
-{
-    /*
-     * Only allow one thread in this routine for a given request.
-     * However, we cannot block callers on a mutex, so simply keep track
-     * of the number of times the routine has been called and run through
-     * the scheduling logic once for every call.
-     */
-
-    if(!lock_send_request(sendreq))
-        return;
-
-    mca_pml_bfo_send_request_schedule_exclusive(sendreq);
-}
-
-#if OPAL_CUDA_SUPPORT
-int mca_pml_bfo_send_request_start_cuda(
-    mca_pml_bfo_send_request_t* sendreq,
-    mca_bml_base_btl_t* bml_btl,
-    size_t size);
-#endif /* OPAL_CUDA_SUPPORT */
-
-/**
- *  Start the specified request
- */
-
-int mca_pml_bfo_send_request_start_buffered(
-    mca_pml_bfo_send_request_t* sendreq,
-    mca_bml_base_btl_t* bml_btl,
-    size_t size);
-
-int mca_pml_bfo_send_request_start_copy(
-    mca_pml_bfo_send_request_t* sendreq,
-    mca_bml_base_btl_t* bml_btl,
-    size_t size);
-
-int mca_pml_bfo_send_request_start_prepare(
-    mca_pml_bfo_send_request_t* sendreq,
-    mca_bml_base_btl_t* bml_btl,
-    size_t size);
-
-int mca_pml_bfo_send_request_start_rdma(
-    mca_pml_bfo_send_request_t* sendreq,
-    mca_bml_base_btl_t* bml_btl,
-    size_t size);
-
-int mca_pml_bfo_send_request_start_rndv(
-    mca_pml_bfo_send_request_t* sendreq,
-    mca_bml_base_btl_t* bml_btl,
-    size_t size,
-    int flags);
-
-static inline int
-mca_pml_bfo_send_request_start_btl( mca_pml_bfo_send_request_t* sendreq,
-                                    mca_bml_base_btl_t* bml_btl )
-{
-    size_t size = sendreq->req_send.req_bytes_packed;
-    mca_btl_base_module_t* btl = bml_btl->btl;
-    size_t eager_limit = btl->btl_eager_limit - sizeof(mca_pml_bfo_hdr_t);
-    int rc;
-
-    if( OPAL_LIKELY(size <= eager_limit) ) {
-        switch(sendreq->req_send.req_send_mode) {
-        case MCA_PML_BASE_SEND_SYNCHRONOUS:
-            rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, size, 0);
-            break;
-        case MCA_PML_BASE_SEND_BUFFERED:
-            rc = mca_pml_bfo_send_request_start_copy(sendreq, bml_btl, size);
-            break;
-        case MCA_PML_BASE_SEND_COMPLETE:
-            rc = mca_pml_bfo_send_request_start_prepare(sendreq, bml_btl, size);
-            break;
-        default:
-            if (size != 0 && bml_btl->btl_flags & MCA_BTL_FLAGS_SEND_INPLACE) {
-                rc = mca_pml_bfo_send_request_start_prepare(sendreq, bml_btl, size);
-            } else {
-                rc = mca_pml_bfo_send_request_start_copy(sendreq, bml_btl, size);
-            }
-            break;
-        }
-    } else {
-        size = eager_limit;
-        if(OPAL_UNLIKELY(btl->btl_rndv_eager_limit < eager_limit))
-            size = btl->btl_rndv_eager_limit;
-        if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) {
-            rc = mca_pml_bfo_send_request_start_buffered(sendreq, bml_btl, size);
-        } else if
-                (opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) {
-            unsigned char *base;
-            opal_convertor_get_current_pointer( &sendreq->req_send.req_base.req_convertor, (void**)&base );
-
-            if( 0 != (sendreq->req_rdma_cnt = (uint32_t)mca_pml_bfo_rdma_btls(
-                                                                              sendreq->req_endpoint,
-                                                                              base,
-                                                                              sendreq->req_send.req_bytes_packed,
-                                                                              sendreq->req_rdma))) {
-                rc = mca_pml_bfo_send_request_start_rdma(sendreq, bml_btl,
-                                                         sendreq->req_send.req_bytes_packed);
-                if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
-                    mca_pml_bfo_free_rdma_resources(sendreq);
-                }
-            } else {
-                rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, size,
-                                                         MCA_PML_BFO_HDR_FLAGS_CONTIG);
-            }
-        } else {
-#if OPAL_CUDA_SUPPORT
-            if (sendreq->req_send.req_base.req_convertor.flags & CONVERTOR_CUDA) {
-                return mca_pml_bfo_send_request_start_cuda(sendreq, bml_btl, size);
-            }
-#endif /* OPAL_CUDA_SUPPORT */
-            rc = mca_pml_bfo_send_request_start_rndv(sendreq, bml_btl, size, 0);
-        }
-    }
-
-    return rc;
-}
-
-static inline int
-mca_pml_bfo_send_request_start( mca_pml_bfo_send_request_t* sendreq )
-{
-    mca_pml_bfo_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm;
-    mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*)
-                                        sendreq->req_send.req_base.req_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
-    size_t i;
-
-    if( OPAL_UNLIKELY(endpoint == NULL) ) {
-        return OMPI_ERR_UNREACH;
-    }
-
-    sendreq->req_endpoint = endpoint;
-    sendreq->req_state = 0;
-    sendreq->req_lock = 0;
-    sendreq->req_pipeline_depth = 0;
-    sendreq->req_bytes_delivered = 0;
-    sendreq->req_pending = MCA_PML_BFO_SEND_PENDING_NONE;
-    sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD_FETCH32(
-        &comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1);
-#if PML_BFO
-    sendreq->req_restartseq = 0;      /* counts up restarts */
-    sendreq->req_restart = 0;         /* reset in case we restart again */
-    sendreq->req_error = 0;           /* clear error state */
-    sendreq->req_events = 0;          /* clear events, probably 0 anyways */
-#endif /* PML_BFO */
-
-    MCA_PML_BASE_SEND_START( &sendreq->req_send.req_base );
-
-    for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) {
-        mca_bml_base_btl_t* bml_btl;
-        int rc;
-
-        /* select a btl */
-        bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
-        rc = mca_pml_bfo_send_request_start_btl(sendreq, bml_btl);
-        if( OPAL_LIKELY(OMPI_ERR_OUT_OF_RESOURCE != rc) )
-            return rc;
-    }
-    add_request_to_send_pending(sendreq, MCA_PML_BFO_SEND_PENDING_START, true);
-
-    return OMPI_SUCCESS;
-}
-
-/**
- *  Initiate a put scheduled by the receiver.
- */
-
-void mca_pml_bfo_send_request_put( mca_pml_bfo_send_request_t* sendreq,
-                                   mca_btl_base_module_t* btl,
-                                   mca_pml_bfo_rdma_hdr_t* hdr );
-
-int mca_pml_bfo_send_request_put_frag(mca_pml_bfo_rdma_frag_t* frag);
-
-/* This function tries to continue sendreq that was stuck because of resource
- * unavailability. A sendreq may be added to send_pending list if there is no
- * resource to send initial packet or there is not resource to schedule data
- * for sending. The reason the sendreq was added to the list is stored inside
- * sendreq struct and appropriate operation is retried when resource became
- * available. bml_btl passed to the function doesn't represents sendreq
- * destination, it represents BTL on which resource was freed, so only this BTL
- * should be considered for sending packets */
-void mca_pml_bfo_send_request_process_pending(mca_bml_base_btl_t *bml_btl);
-
-void mca_pml_bfo_send_request_copy_in_out(mca_pml_bfo_send_request_t *sendreq,
-                uint64_t send_offset, uint64_t send_length);
-
-END_C_DECLS
-
-#endif  /* OMPI_PML_BFO_SEND_REQUEST_H */
--- a/ompi/mca/pml/bfo/pml_bfo_start.c
+++ b/ompi/mca/pml/bfo/pml_bfo_start.c
@ -1,148 +0,0 @@
-/*
- * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2016 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2006      Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2010      Oracle and/or its affiliates.  All rights reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "pml_bfo.h"
-#include "pml_bfo_recvreq.h"
-#include "pml_bfo_sendreq.h"
-#include "ompi/memchecker.h"
-
-
-int mca_pml_bfo_start(size_t count, ompi_request_t** requests)
-{
-    int rc;
-    size_t i;
-    bool reuse_old_request = true;
-
-    for(i=0; i<count; i++) {
-        mca_pml_base_request_t *pml_request = (mca_pml_base_request_t*)requests[i];
-        if(NULL == pml_request) {
-            continue;
-        }
-        if (OMPI_REQUEST_PML != requests[i]->req_type) {
-            continue;
-        }
-
-        /* If the persistent request is currently active - obtain the
-         * request lock and verify the status is incomplete. if the
-         * pml layer has not completed the request - mark the request
-         * as free called - so that it will be freed when the request
-         * completes - and create a new request.
-         */
-
-#if OPAL_ENABLE_MULTI_THREADS
-        opal_atomic_rmb();
-#endif
-        reuse_old_request = true;
-        switch(pml_request->req_ompi.req_state) {
-            case OMPI_REQUEST_INACTIVE:
-                if(pml_request->req_pml_complete == true)
-                    break;
-                /* otherwise fall through */
-            case OMPI_REQUEST_ACTIVE: {
-
-                ompi_request_t *request;
-                if (pml_request->req_pml_complete == false) {
-                    /* free request after it completes */
-                    pml_request->req_free_called = true;
-                } else {
-                    /* can reuse the existing request */
-                    break;
-                }
-
-                reuse_old_request = false;
-                /* allocate a new request */
-                switch(pml_request->req_type) {
-                    case MCA_PML_REQUEST_SEND: {
-                         mca_pml_base_send_mode_t sendmode =
-                             ((mca_pml_base_send_request_t*)pml_request)->req_send_mode;
-                         rc = mca_pml_bfo_isend_init(
-                              pml_request->req_addr,
-                              pml_request->req_count,
-                              pml_request->req_datatype,
-                              pml_request->req_peer,
-                              pml_request->req_tag,
-                              sendmode,
-                              pml_request->req_comm,
-                              &request);
-                         break;
-                    }
-                    case MCA_PML_REQUEST_RECV:
-                         rc = mca_pml_bfo_irecv_init(
-                              pml_request->req_addr,
-                              pml_request->req_count,
-                              pml_request->req_datatype,
-                              pml_request->req_peer,
-                              pml_request->req_tag,
-                              pml_request->req_comm,
-                              &request);
-                         break;
-                    default:
-                         rc = OMPI_ERR_REQUEST;
-                         break;
-                }
-                if(OMPI_SUCCESS != rc)
-                    return rc;
-                pml_request = (mca_pml_base_request_t*)request;
-                requests[i] = request;
-                break;
-            }
-            default:
-                return OMPI_ERR_REQUEST;
-        }
-
-        /* start the request */
-        switch(pml_request->req_type) {
-            case MCA_PML_REQUEST_SEND:
-            {
-                mca_pml_bfo_send_request_t* sendreq = (mca_pml_bfo_send_request_t*)pml_request;
-                MEMCHECKER(
-                    memchecker_call(&opal_memchecker_base_isdefined,
-                                    pml_request->req_addr, pml_request->req_count,
-                                    pml_request->req_datatype);
-                );
-                if( reuse_old_request && (sendreq->req_send.req_bytes_packed != 0) ) {
-                    size_t offset = 0;
-                    /**
-                     * Reset the convertor in case we're dealing with the original
-                     * request, which when completed do not reset the convertor.
-                     */
-                    opal_convertor_set_position( &sendreq->req_send.req_base.req_convertor,
-                                                 &offset );
-                }
-                MCA_PML_BFO_SEND_REQUEST_START(sendreq, rc);
-                if(rc != OMPI_SUCCESS)
-                    return rc;
-                break;
-            }
-            case MCA_PML_REQUEST_RECV:
-            {
-                mca_pml_bfo_recv_request_t* recvreq = (mca_pml_bfo_recv_request_t*)pml_request;
-                MCA_PML_BFO_RECV_REQUEST_START(recvreq);
-                break;
-            }
-            default:
-                return OMPI_ERR_REQUEST;
-        }
-    }
-    return OMPI_SUCCESS;
-}
-
--- a/ompi/mca/pml/bfo/post_configure.sh
+++ b/ompi/mca/pml/bfo/post_configure.sh
@ -1 +0,0 @@
-DIRECT_CALL_HEADER="ompi/mca/pml/bfo/pml_bfo.h"
				`@ -1 +0,0 @@`
				`DIRECT_CALL_HEADER="ompi/mca/pml/bfo/pml_bfo.h"`