Two major things in this commit:

* New "op" MPI layer framework * Addition of the MPI_REDUCE_LOCAL proposed function (for MPI-2.2) = Op framework = Add new "op" framework in the ompi layer. This framework replaces the hard-coded MPI_Op back-end functions for (MPI_Op, MPI_Datatype) tuples for pre-defined MPI_Ops, allowing components and modules to provide the back-end functions. The intent is that components can be written to take advantage of hardware acceleration (GPU, FPGA, specialized CPU instructions, etc.). Similar to other frameworks, components are intended to be able to discover at run-time if they can be used, and if so, elect themselves to be selected (or disqualify themselves from selection if they cannot run). If specialized hardware is not available, there is a default set of functions that will automatically be used. This framework is ''not'' used for user-defined MPI_Ops. The new op framework is similar to the existing coll framework, in that the final set of function pointers that are used on any given intrinsic MPI_Op can be a mixed bag of function pointers, potentially coming from multiple different op modules. This allows for hardware that only supports some of the operations, not all of them (e.g., a GPU that only supports single-precision operations). All the hard-coded back-end MPI_Op functions for (MPI_Op, MPI_Datatype) tuples still exist, but unlike coll, they're in the framework base (vs. being in a separate "basic" component) and are automatically used if no component is found at runtime that provides a module with the necessary function pointers. There is an "example" op component that will hopefully be useful to those writing meaningful op components. It is currently .ompi_ignore'd so that it doesn't impinge on other developers (it's somewhat chatty in terms of opal_output() so that you can tell when its functions have been invoked). See the README file in the example op component directory. Developers of new op components are encouraged to look at the following wiki pages: https://svn.open-mpi.org/trac/ompi/wiki/devel/Autogen https://svn.open-mpi.org/trac/ompi/wiki/devel/CreateComponent https://svn.open-mpi.org/trac/ompi/wiki/devel/CreateFramework = MPI_REDUCE_LOCAL = Part of the MPI-2.2 proposal listed here: https://svn.mpi-forum.org/trac/mpi-forum-web/ticket/24 is to add a new function named MPI_REDUCE_LOCAL. It is very easy to implement, so I added it (also because it makes testing the op framework pretty easy -- you can do it in serial rather than via parallel reductions). There's even a man page! This commit was SVN r20280.
2009-01-14 23:44:31 +00:00 · 2009-01-14 23:44:31 +00:00 · 4d8a187450
--- a/ompi/include/mpi.h.in
+++ b/ompi/include/mpi.h.in
@ -9,7 +9,7 @@
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
- * Copyright (c) 2007-2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright (c) 2007-2009 Cisco Systems, Inc.  All rights reserved.
 * Copyright (c) 2008      Sun Microsystems, Inc.  All rights reserved.
 * $COPYRIGHT$
 * 
@ -1150,6 +1150,8 @@ OMPI_DECLSPEC  int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int sou
                            int tag, MPI_Comm comm, MPI_Status *status);
 OMPI_DECLSPEC  int MPI_Reduce(void *sendbuf, void *recvbuf, int count, 
                              MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm);
+OMPI_DECLSPEC  int MPI_Reduce_local(void *inbuf, void *inoutbuf, int count, 
+                                    MPI_Datatype datatype, MPI_Op op);
 OMPI_DECLSPEC  int MPI_Reduce_scatter(void *sendbuf, void *recvbuf, int *recvcounts, 
                                      MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
 OMPI_DECLSPEC  int MPI_Register_datarep(char *datarep, 
@ -1660,6 +1662,8 @@ OMPI_DECLSPEC  int PMPI_Recv(void *buf, int count, MPI_Datatype datatype, int so
                             int tag, MPI_Comm comm, MPI_Status *status);
 OMPI_DECLSPEC  int PMPI_Reduce(void *sendbuf, void *recvbuf, int count, 
                               MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm);
+OMPI_DECLSPEC  int PMPI_Reduce_local(void *inbuf, void *inoutbuf, int count, 
+                                     MPI_Datatype datatype, MPI_Op);
 OMPI_DECLSPEC  int PMPI_Reduce_scatter(void *sendbuf, void *recvbuf, int *recvcounts, 
                                       MPI_Datatype datatype, MPI_Op op, MPI_Comm comm);
 OMPI_DECLSPEC  int PMPI_Register_datarep(char *datarep, 
--- a/ompi/mca/coll/coll.h
+++ b/ompi/mca/coll/coll.h
@ -101,6 +101,7 @@ typedef int (*mca_coll_base_component_init_query_fn_t)
     (bool enable_progress_threads, bool enable_mpi_threads);


+
 /**
 * Query whether a component is available for the given communicator
 *
--- a/ompi/mca/op/Makefile.am
+++ b/ompi/mca/op/Makefile.am
@ -0,0 +1,42 @@
+#
+# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+#                         University Research and Technology
+#                         Corporation.  All rights reserved.
+# Copyright (c) 2004-2005 The University of Tennessee and The University
+#                         of Tennessee Research Foundation.  All rights
+#                         reserved.
+# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+#                         University of Stuttgart.  All rights reserved.
+# Copyright (c) 2004-2005 The Regents of the University of California.
+#                         All rights reserved.
+# Copyright (c) 2008      Cisco Systems, Inc.  All rights reserved.
+# $COPYRIGHT$
+# 
+# Additional copyrights may follow
+# 
+# $HEADER$
+#
+
+# main library setup
+noinst_LTLIBRARIES = libmca_op.la
+libmca_op_la_SOURCES =
+
+# header setup
+nobase_ompi_HEADERS =
+
+# local files
+headers = op.h
+libmca_op_la_SOURCES += $(headers)
+
+# Conditionally install the header files
+if WANT_INSTALL_HEADERS
+nobase_ompi_HEADERS += $(headers)
+ompidir = $(includedir)/openmpi/ompi/mca/op
+else
+ompidir = $(includedir)
+endif
+
+include base/Makefile.include
+
+distclean-local:
+	rm -f base/static-components.h
--- a/ompi/mca/op/base/Makefile.include
+++ b/ompi/mca/op/base/Makefile.include
@ -0,0 +1,32 @@
+# -*- makefile -*-
+#
+# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+#                         University Research and Technology
+#                         Corporation.  All rights reserved.
+# Copyright (c) 2004-2005 The University of Tennessee and The University
+#                         of Tennessee Research Foundation.  All rights
+#                         reserved.
+# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+#                         University of Stuttgart.  All rights reserved.
+# Copyright (c) 2004-2005 The Regents of the University of California.
+#                         All rights reserved.
+# Copyright (c) 2008-2009 Cisco Systems, Inc.  All rights reserved.
+# $COPYRIGHT$
+# 
+# Additional copyrights may follow
+# 
+# $HEADER$
+#
+
+dist_pkgdata_DATA = base/help-mca-op-base.txt
+
+headers += \
+        base/base.h \
+        base/functions.h
+
+libmca_op_la_SOURCES += \
+        base/op_base_close.c \
+        base/op_base_open.c \
+        base/op_base_find_available.c \
+        base/op_base_functions.c \
+        base/op_base_op_select.c
--- a/ompi/mca/op/base/base.h
+++ b/ompi/mca/op/base/base.h
@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2006 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2006      Sun Microsystems, Inc.  All rights reserved.
+ * Copyright (c) 2008-2009 Cisco Systems, Inc.  All rights reserved.
+ * $COPYRIGHT$
+ * 
+ * Additional copyrights may follow
+ * 
+ * $HEADER$
+ */
+/**
+ * @file
+ *
+ * This framework is for the selection and assignment of "op" modules
+ * to intrinsict MPI_Op objects.  This framework is not used for
+ * user-defined MPI_Op objects.
+ *
+ * The main idea is to let intrinsic MPI_Ops be able to utilize
+ * functions from multiple op modules, based on the (datatype,
+ * operation) tuple.  Hence, it is possible for specialized hardware
+ * to be utilized for datatypes and operations that are supported.
+ */
+
+#ifndef MCA_OP_BASE_H
+#define MCA_OP_BASE_H
+
+#include "ompi_config.h"
+#include "opal/class/opal_list.h"
+#include "opal/mca/mca.h"
+#include "ompi/mca/op/op.h"
+
+BEGIN_C_DECLS 
+
+typedef struct ompi_op_base_selected_module_t {
+    opal_list_item_t super;
+    ompi_op_base_component_t *op_component;
+    ompi_op_base_module_t *op_module;
+} ompi_op_base_selected_module_t;
+
+
+/**
+ * Open the op framework.
+ */
+OMPI_DECLSPEC int ompi_op_base_open(void);
+
+/**
+ * Find all available op components.
+ */
+OMPI_DECLSPEC int ompi_op_base_find_available(bool enable_progress_threads,
+                                              bool enable_mpi_threads);
+
+/**
+ * Select an available component for a new intrinsice MPI_Op (this
+ * function is *not* used for user-defined MPI_Ops!).
+ *
+ * @param op MPI_Op that the component will be selected for.
+ * @param int fortran_handle Enum corresponding to the Fortran handle
+ * for the op that is being configured.
+ *
+ * @return OMPI_SUCCESS Upon success.
+ * @return OMPI_ERROR Upon failure.
+ *
+ * Note that the types of the parameters have "struct" in them (e.g.,
+ * ompi_op_t" vs. a plain "ompi_op_t") to avoid an include file loop.
+ * All similar types (e.g., "struct ompi_op_t *", "ompi_op_t *", and
+ * "MPI_Op") are all typedef'ed to be the same, so the fact that we
+ * use struct here in the prototype is ok.
+ *
+ * This function is invoked when a new MPI_Op is created and
+ * op components need to be selected for it.
+ */
+int ompi_op_base_op_select(struct ompi_op_t *op);
+
+/**
+ * Finalize all op modules on a specific (intrinsic) MPI_Op.
+ *
+ * @param comm The op that is being destroyed.
+ *
+ * @retval OMPI_SUCCESS Always.
+ *
+ * Note that the type of the parameter is only a "struct ompi_op_t"
+ * (vs. a plain "ompi_op_t") to avoid an include file loop.  The types
+ * "struct ompi_op_t *", "ompi_op_t *", and "MPI_Op" are all
+ * typedef'ed to be the same, so the fact that we use struct here in
+ * the prototype is ok.
+ *
+ * This function is invoked near the beginning of the destruction of
+ * an op.  It finalizes the op modules associated with the MPI_Op
+ * (e.g., allowing the component to clean up and free any resources
+ * allocated for that MPI_Op) by calling the destructor on each
+ * object.
+ */
+OMPI_DECLSPEC int ompi_op_base_op_unselect(struct ompi_op_t *op);
+
+/**
+ * Close the op framework
+ */
+OMPI_DECLSPEC int ompi_op_base_close(void);
+
+
+/**
+ * Verbose output stream for this framework
+ */
+OMPI_DECLSPEC extern int ompi_op_base_output;
+
+/**
+ * List of all opened components; created when the op framework is
+ * initialized and destroyed when we reduce the list to all available
+ * op components.
+ */
+OPAL_DECLSPEC extern opal_list_t ompi_op_base_components_opened;
+
+/**
+ * Indicator as to whether the list of opened op components is valid
+ * or not.
+ */
+OPAL_DECLSPEC extern bool ompi_op_base_components_opened_valid;
+
+/**
+ * List of all available components.
+ */
+OMPI_DECLSPEC extern opal_list_t ompi_op_base_components_available;
+
+/**
+ * Indicator as to whether the list of available op components is
+ * valid or not.
+ */
+OMPI_DECLSPEC extern bool ompi_op_base_components_available_valid;
+
+END_C_DECLS
+#endif /* MCA_OP_BASE_H */
--- a/ompi/mca/op/base/functions.h
+++ b/ompi/mca/op/base/functions.h
@ -9,6 +9,7 @@
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
+ * Copyright (c) 2008-2009 Cisco Systems, Inc.  All rights reserved.
 * $COPYRIGHT$
 * 
 * Additional copyrights may follow
@ -16,33 +17,33 @@
 * $HEADER$
 */

-#ifndef OMPI_OP_PREDEFINED_H
-#define OMPI_OP_PREDEFINED_H
+#ifndef OMPI_OP_BASE_FUNCTIONS_H
+#define OMPI_OP_BASE_FUNCTIONS_H

-#include "ompi/op/op.h"
+#include "ompi/mca/op/op.h"

 /*
 * Since we have so many of these, and they're all identical except
 * for the name, use macros to prototype them.
 */
-#define OMPI_OP_PROTO (void *in, void *out, int *count, MPI_Datatype *dtype)
+#define OMPI_OP_PROTO (void *in, void *out, int *count, struct ompi_datatype_t **dtype, struct ompi_op_base_module_1_0_0_t *module)

 /* C integer */

 #define OMPI_OP_HANDLER_C_INTEGER_INTRINSIC(name) \
-  void ompi_mpi_op_##name##_unsigned_char OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_signed_char OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_int OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_long OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_short OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_unsigned_short OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_unsigned OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_unsigned_long OMPI_OP_PROTO;
+  void ompi_op_base_##name##_unsigned_char OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_signed_char OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_int OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_long OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_short OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_unsigned_short OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_unsigned OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_unsigned_long OMPI_OP_PROTO;
 #if HAVE_LONG_LONG
 #define OMPI_OP_HANDLER_C_INTEGER_OPTIONAL(name) \
-  void ompi_mpi_op_##name##_long_long_int OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_long_long OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_unsigned_long_long OMPI_OP_PROTO;
+  void ompi_op_base_##name##_long_long_int OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_long_long OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_unsigned_long_long OMPI_OP_PROTO;
 #else
 #define OMPI_OP_HANDLER_C_INTEGER_OPTIONAL(name)
 #endif
@ -53,34 +54,34 @@
 /* Fortran integer */

 #define OMPI_OP_HANDLER_FORTRAN_INTEGER_INTRINSIC(name) \
-  void ompi_mpi_op_##name##_fortran_integer OMPI_OP_PROTO;
+  void ompi_op_base_##name##_fortran_integer OMPI_OP_PROTO;
 #if OMPI_HAVE_FORTRAN_INTEGER1
 #define OMPI_OP_HANDLER_FORTRAN_INTEGER1(name) \
-  void ompi_mpi_op_##name##_fortran_integer1 OMPI_OP_PROTO;
+  void ompi_op_base_##name##_fortran_integer1 OMPI_OP_PROTO;
 #else
 #define OMPI_OP_HANDLER_FORTRAN_INTEGER1(name)
 #endif
 #if OMPI_HAVE_FORTRAN_INTEGER2
 #define OMPI_OP_HANDLER_FORTRAN_INTEGER2(name) \
-  void ompi_mpi_op_##name##_fortran_integer2 OMPI_OP_PROTO;
+  void ompi_op_base_##name##_fortran_integer2 OMPI_OP_PROTO;
 #else
 #define OMPI_OP_HANDLER_FORTRAN_INTEGER2(name)
 #endif
 #if OMPI_HAVE_FORTRAN_INTEGER4
 #define OMPI_OP_HANDLER_FORTRAN_INTEGER4(name) \
-  void ompi_mpi_op_##name##_fortran_integer4 OMPI_OP_PROTO;
+  void ompi_op_base_##name##_fortran_integer4 OMPI_OP_PROTO;
 #else
 #define OMPI_OP_HANDLER_FORTRAN_INTEGER4(name)
 #endif
 #if OMPI_HAVE_FORTRAN_INTEGER8
 #define OMPI_OP_HANDLER_FORTRAN_INTEGER8(name) \
-  void ompi_mpi_op_##name##_fortran_integer8 OMPI_OP_PROTO;
+  void ompi_op_base_##name##_fortran_integer8 OMPI_OP_PROTO;
 #else
 #define OMPI_OP_HANDLER_FORTRAN_INTEGER8(name)
 #endif
 #if OMPI_HAVE_FORTRAN_INTEGER16
 #define OMPI_OP_HANDLER_FORTRAN_INTEGER16(name) \
-  void ompi_mpi_op_##name##_fortran_integer16 OMPI_OP_PROTO;
+  void ompi_op_base_##name##_fortran_integer16 OMPI_OP_PROTO;
 #else
 #define OMPI_OP_HANDLER_FORTRAN_INTEGER16(name)
 #endif
@ -95,32 +96,32 @@
 /* Floating point */

 #define OMPI_OP_HANDLER_FLOATING_POINT_INTRINSIC(name) \
-  void ompi_mpi_op_##name##_float OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_double OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_fortran_real OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_fortran_double_precision OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_long_double OMPI_OP_PROTO;
+  void ompi_op_base_##name##_float OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_double OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_fortran_real OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_fortran_double_precision OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_long_double OMPI_OP_PROTO;
 #if OMPI_HAVE_FORTRAN_REAL2
 #define OMPI_OP_HANDLER_FLOATING_POINT_REAL2(name) \
-  void ompi_mpi_op_##name##_fortran_real2 OMPI_OP_PROTO;
+  void ompi_op_base_##name##_fortran_real2 OMPI_OP_PROTO;
 #else
 #define OMPI_OP_HANDLER_FLOATING_POINT_REAL2(name)
 #endif
 #if OMPI_HAVE_FORTRAN_REAL4
 #define OMPI_OP_HANDLER_FLOATING_POINT_REAL4(name) \
-  void ompi_mpi_op_##name##_fortran_real4 OMPI_OP_PROTO;
+  void ompi_op_base_##name##_fortran_real4 OMPI_OP_PROTO;
 #else
 #define OMPI_OP_HANDLER_FLOATING_POINT_REAL4(name)
 #endif
 #if OMPI_HAVE_FORTRAN_REAL8
 #define OMPI_OP_HANDLER_FLOATING_POINT_REAL8(name) \
-  void ompi_mpi_op_##name##_fortran_real8 OMPI_OP_PROTO;
+  void ompi_op_base_##name##_fortran_real8 OMPI_OP_PROTO;
 #else
 #define OMPI_OP_HANDLER_FLOATING_POINT_REAL8(name)
 #endif
 #if OMPI_HAVE_FORTRAN_REAL16
 #define OMPI_OP_HANDLER_FLOATING_POINT_REAL16(name) \
-  void ompi_mpi_op_##name##_fortran_real16 OMPI_OP_PROTO;
+  void ompi_op_base_##name##_fortran_real16 OMPI_OP_PROTO;
 #else
 #define OMPI_OP_HANDLER_FLOATING_POINT_REAL16(name)
 #endif
@ -133,38 +134,38 @@
 /* Logical */

 #define OMPI_OP_HANDLER_LOGICAL(name) \
-  void ompi_mpi_op_##name##_fortran_logical OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_bool OMPI_OP_PROTO;
+  void ompi_op_base_##name##_fortran_logical OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_bool OMPI_OP_PROTO;

 /* Complex */

 #if OMPI_HAVE_FORTRAN_REAL
 #define OMPI_OP_HANDLER_COMPLEX_INTRINSIC(name) \
-  void ompi_mpi_op_##name##_fortran_complex OMPI_OP_PROTO;
+  void ompi_op_base_##name##_fortran_complex OMPI_OP_PROTO;
 #else
 #define OMPI_OP_HANDLER_COMPLEX_INTRINSIC(name)
 #endif
 #if OMPI_HAVE_FORTRAN_DOUBLE_PRECISION
 #define OMPI_OP_HANDLER_DOUBLE_COMPLEX_INTRINSIC(name) \
-  void ompi_mpi_op_##name##_fortran_double_complex OMPI_OP_PROTO;
+  void ompi_op_base_##name##_fortran_double_complex OMPI_OP_PROTO;
 #else
 #define OMPI_OP_HANDLER_DOUBLE_COMPLEX_INTRINSIC(name)
 #endif
 #if OMPI_HAVE_FORTRAN_REAL4
 #define OMPI_OP_HANDLER_COMPLEX8(name) \
-  void ompi_mpi_op_##name##_fortran_complex8 OMPI_OP_PROTO;
+  void ompi_op_base_##name##_fortran_complex8 OMPI_OP_PROTO;
 #else
 #define OMPI_OP_HANDLER_COMPLEX8(name)
 #endif
 #if OMPI_HAVE_FORTRAN_REAL8
 #define OMPI_OP_HANDLER_COMPLEX16(name) \
-  void ompi_mpi_op_##name##_fortran_complex16 OMPI_OP_PROTO;
+  void ompi_op_base_##name##_fortran_complex16 OMPI_OP_PROTO;
 #else
 #define OMPI_OP_HANDLER_COMPLEX16(name)
 #endif
 #if OMPI_HAVE_FORTRAN_REAL16
 #define OMPI_OP_HANDLER_COMPLEX32(name) \
-  void ompi_mpi_op_##name##_fortran_complex32 OMPI_OP_PROTO;
+  void ompi_op_base_##name##_fortran_complex32 OMPI_OP_PROTO;
 #else
 #define OMPI_OP_HANDLER_COMPLEX32(name)
 #endif
@ -178,20 +179,20 @@
 /* Byte */

 #define OMPI_OP_HANDLER_BYTE(name) \
-  void ompi_mpi_op_##name##_byte OMPI_OP_PROTO;
+  void ompi_op_base_##name##_byte OMPI_OP_PROTO;

 /* "2 type" */

 #define OMPI_OP_HANDLER_2TYPE(name) \
-  void ompi_mpi_op_##name##_2real OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_2double_precision OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_2integer OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_float_int OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_double_int OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_long_int OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_2int OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_short_int OMPI_OP_PROTO; \
-  void ompi_mpi_op_##name##_long_double_int OMPI_OP_PROTO;
+  void ompi_op_base_##name##_2real OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_2double_precision OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_2integer OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_float_int OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_double_int OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_long_int OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_2int OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_short_int OMPI_OP_PROTO; \
+  void ompi_op_base_##name##_long_double_int OMPI_OP_PROTO;

 BEGIN_C_DECLS

@ -279,24 +280,25 @@ BEGIN_C_DECLS
 */
 #define OMPI_OP_PROTO_3BUF  \
  ( void * restrict in1, void * restrict in2, void * restrict out, \
-   int *count, MPI_Datatype *dtype)
+   int *count, struct ompi_datatype_t **dtype, \
+   struct ompi_op_base_module_1_0_0_t *module)

 /* C integer */

 #define OMPI_OP_3BUFF_HANDLER_C_INTEGER_INTRINSIC(name) \
-  void ompi_mpi_op_three_buff_##name##_unsigned_char OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_signed_char OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_int OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_long OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_short OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_unsigned_short OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_unsigned OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_unsigned_long OMPI_OP_PROTO_3BUF;
+  void ompi_op_base_3buff_##name##_unsigned_char OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_signed_char OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_int OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_long OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_short OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_unsigned_short OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_unsigned OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_unsigned_long OMPI_OP_PROTO_3BUF;
 #if HAVE_LONG_LONG
 #define OMPI_OP_3BUFF_HANDLER_C_INTEGER_OPTIONAL(name) \
-  void ompi_mpi_op_three_buff_##name##_long_long_int OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_long_long OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_unsigned_long_long OMPI_OP_PROTO_3BUF;
+  void ompi_op_base_3buff_##name##_long_long_int OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_long_long OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_unsigned_long_long OMPI_OP_PROTO_3BUF;
 #else
 #define OMPI_OP_3BUFF_HANDLER_C_INTEGER_OPTIONAL(name)
 #endif
@ -307,34 +309,34 @@ BEGIN_C_DECLS
 /* Fortran integer */

 #define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER_INTRINSIC(name) \
-  void ompi_mpi_op_three_buff_##name##_fortran_integer OMPI_OP_PROTO_3BUF;
+  void ompi_op_base_3buff_##name##_fortran_integer OMPI_OP_PROTO_3BUF;
 #if OMPI_HAVE_FORTRAN_INTEGER1
 #define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER1(name) \
-  void ompi_mpi_op_three_buff_##name##_fortran_integer1 OMPI_OP_PROTO_3BUF;
+  void ompi_op_base_3buff_##name##_fortran_integer1 OMPI_OP_PROTO_3BUF;
 #else
 #define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER1(name)
 #endif
 #if OMPI_HAVE_FORTRAN_INTEGER2
 #define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER2(name) \
-  void ompi_mpi_op_three_buff_##name##_fortran_integer2 OMPI_OP_PROTO_3BUF;
+  void ompi_op_base_3buff_##name##_fortran_integer2 OMPI_OP_PROTO_3BUF;
 #else
 #define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER2(name)
 #endif
 #if OMPI_HAVE_FORTRAN_INTEGER4
 #define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER4(name) \
-  void ompi_mpi_op_three_buff_##name##_fortran_integer4 OMPI_OP_PROTO_3BUF;
+  void ompi_op_base_3buff_##name##_fortran_integer4 OMPI_OP_PROTO_3BUF;
 #else
 #define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER4(name)
 #endif
 #if OMPI_HAVE_FORTRAN_INTEGER8
 #define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER8(name) \
-  void ompi_mpi_op_three_buff_##name##_fortran_integer8 OMPI_OP_PROTO_3BUF;
+  void ompi_op_base_3buff_##name##_fortran_integer8 OMPI_OP_PROTO_3BUF;
 #else
 #define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER8(name)
 #endif
 #if OMPI_HAVE_FORTRAN_INTEGER16
 #define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER16(name) \
-  void ompi_mpi_op_three_buff_##name##_fortran_integer16 OMPI_OP_PROTO_3BUF;
+  void ompi_op_base_3buff_##name##_fortran_integer16 OMPI_OP_PROTO_3BUF;
 #else
 #define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER16(name)
 #endif
@ -349,32 +351,32 @@ BEGIN_C_DECLS
 /* Floating point */

 #define OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_INTRINSIC(name) \
-  void ompi_mpi_op_three_buff_##name##_float OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_double OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_fortran_real OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_fortran_double_precision OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_long_double OMPI_OP_PROTO_3BUF;
+  void ompi_op_base_3buff_##name##_float OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_double OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_fortran_real OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_fortran_double_precision OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_long_double OMPI_OP_PROTO_3BUF;
 #if OMPI_HAVE_FORTRAN_REAL2
 #define OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_REAL2(name) \
-  void ompi_mpi_op_three_buff_##name##_fortran_real2 OMPI_OP_PROTO_3BUF;
+  void ompi_op_base_3buff_##name##_fortran_real2 OMPI_OP_PROTO_3BUF;
 #else
 #define OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_REAL2(name)
 #endif
 #if OMPI_HAVE_FORTRAN_REAL4
 #define OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_REAL4(name) \
-  void ompi_mpi_op_three_buff_##name##_fortran_real4 OMPI_OP_PROTO_3BUF;
+  void ompi_op_base_3buff_##name##_fortran_real4 OMPI_OP_PROTO_3BUF;
 #else
 #define OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_REAL4(name)
 #endif
 #if OMPI_HAVE_FORTRAN_REAL8
 #define OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_REAL8(name) \
-  void ompi_mpi_op_three_buff_##name##_fortran_real8 OMPI_OP_PROTO_3BUF;
+  void ompi_op_base_3buff_##name##_fortran_real8 OMPI_OP_PROTO_3BUF;
 #else
 #define OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_REAL8(name)
 #endif
 #if OMPI_HAVE_FORTRAN_REAL16
 #define OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_REAL16(name) \
-  void ompi_mpi_op_three_buff_##name##_fortran_real16 OMPI_OP_PROTO_3BUF;
+  void ompi_op_base_3buff_##name##_fortran_real16 OMPI_OP_PROTO_3BUF;
 #else
 #define OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_REAL16(name)
 #endif
@ -387,38 +389,38 @@ BEGIN_C_DECLS
 /* Logical */

 #define OMPI_OP_3BUFF_HANDLER_LOGICAL(name) \
-  void ompi_mpi_op_three_buff_##name##_fortran_logical OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_bool OMPI_OP_PROTO_3BUF;
+  void ompi_op_base_3buff_##name##_fortran_logical OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_bool OMPI_OP_PROTO_3BUF;

 /* Complex */

 #if OMPI_HAVE_FORTRAN_REAL
 #define OMPI_OP_3BUFF_HANDLER_COMPLEX_INTRINSIC(name) \
-  void ompi_mpi_op_three_buff_##name##_fortran_complex OMPI_OP_PROTO_3BUF;
+  void ompi_op_base_3buff_##name##_fortran_complex OMPI_OP_PROTO_3BUF;
 #else
 #define OMPI_OP_3BUFF_HANDLER_COMPLEX_INTRINSIC(name)
 #endif
 #if OMPI_HAVE_FORTRAN_DOUBLE_PRECISION
 #define OMPI_OP_3BUFF_HANDLER_DOUBLE_COMPLEX_INTRINSIC(name) \
-  void ompi_mpi_op_three_buff_##name##_fortran_double_complex OMPI_OP_PROTO_3BUF;
+  void ompi_op_base_3buff_##name##_fortran_double_complex OMPI_OP_PROTO_3BUF;
 #else
 #define OMPI_OP_3BUFF_HANDLER_DOUBLE_COMPLEX_INTRINSIC(name)
 #endif
 #if OMPI_HAVE_FORTRAN_REAL4
 #define OMPI_OP_3BUFF_HANDLER_COMPLEX8(name) \
-  void ompi_mpi_op_three_buff_##name##_fortran_complex8 OMPI_OP_PROTO_3BUF;
+  void ompi_op_base_3buff_##name##_fortran_complex8 OMPI_OP_PROTO_3BUF;
 #else
 #define OMPI_OP_3BUFF_HANDLER_COMPLEX8(name)
 #endif
 #if OMPI_HAVE_FORTRAN_REAL8
 #define OMPI_OP_3BUFF_HANDLER_COMPLEX16(name) \
-  void ompi_mpi_op_three_buff_##name##_fortran_complex16 OMPI_OP_PROTO_3BUF;
+  void ompi_op_base_3buff_##name##_fortran_complex16 OMPI_OP_PROTO_3BUF;
 #else
 #define OMPI_OP_3BUFF_HANDLER_COMPLEX16(name)
 #endif
 #if OMPI_HAVE_FORTRAN_REAL16
 #define OMPI_OP_3BUFF_HANDLER_COMPLEX32(name) \
-  void ompi_mpi_op_three_buff_##name##_fortran_complex32 OMPI_OP_PROTO_3BUF;
+  void ompi_op_base_3buff_##name##_fortran_complex32 OMPI_OP_PROTO_3BUF;
 #else
 #define OMPI_OP_3BUFF_HANDLER_COMPLEX32(name)
 #endif
@ -432,20 +434,20 @@ BEGIN_C_DECLS
 /* Byte */

 #define OMPI_OP_3BUFF_HANDLER_BYTE(name) \
-  void ompi_mpi_op_three_buff_##name##_byte OMPI_OP_PROTO_3BUF;
+  void ompi_op_base_3buff_##name##_byte OMPI_OP_PROTO_3BUF;

 /* "2 type" */

 #define OMPI_OP_3BUFF_HANDLER_2TYPE(name) \
-  void ompi_mpi_op_three_buff_##name##_2real OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_2double_precision OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_2integer OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_float_int OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_double_int OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_long_int OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_2int OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_short_int OMPI_OP_PROTO_3BUF; \
-  void ompi_mpi_op_three_buff_##name##_long_double_int OMPI_OP_PROTO_3BUF;
+  void ompi_op_base_3buff_##name##_2real OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_2double_precision OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_2integer OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_float_int OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_double_int OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_long_int OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_2int OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_short_int OMPI_OP_PROTO_3BUF; \
+  void ompi_op_base_3buff_##name##_long_double_int OMPI_OP_PROTO_3BUF;

 /**
 * Handler functions for MPI_MAX
@ -526,6 +528,15 @@ BEGIN_C_DECLS
 */
  OMPI_OP_3BUFF_HANDLER_2TYPE(minloc)

+/**
+ * Globals holding all the "base" function pointers, indexed by op and
+ * datatype.
+ */
+OMPI_DECLSPEC extern ompi_op_base_handler_fn_t 
+    ompi_op_base_functions[OMPI_OP_BASE_FORTRAN_OP_MAX][OMPI_OP_BASE_TYPE_MAX];
+OMPI_DECLSPEC extern ompi_op_base_3buff_handler_fn_t 
+    ompi_op_base_3buff_functions[OMPI_OP_BASE_FORTRAN_OP_MAX][OMPI_OP_BASE_TYPE_MAX];
+
 END_C_DECLS

-#endif /* OMPI_OP_PREDEFINED_H */
+#endif /* OMPI_OP_BASE_FUNCTIONS_H */
--- a/ompi/mca/op/base/help-mca-op-base.txt
+++ b/ompi/mca/op/base/help-mca-op-base.txt
@ -0,0 +1,37 @@
+# -*- text -*-
+#
+# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+#                         University Research and Technology
+#                         Corporation.  All rights reserved.
+# Copyright (c) 2004-2005 The University of Tennessee and The University
+#                         of Tennessee Research Foundation.  All rights
+#                         reserved.
+# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+#                         University of Stuttgart.  All rights reserved.
+# Copyright (c) 2004-2005 The Regents of the University of California.
+#                         All rights reserved.
+# Copyright (c) 2008      Cisco Systems, Inc.  All rights reserved.
+# $COPYRIGHT$
+# 
+# Additional copyrights may follow
+# 
+# $HEADER$
+#
+# This is the US/English help file for Open MPI MCA op-specific
+# error messages. 
+#
+[op-select:none-available]
+Although some op components are available on your system, none of
+them said that they could be used for a new MPI_Op ("%s").
+
+This is extremely unusual -- a basic function should always be able to
+be selected for any MPI_Op.  As such, this likely means that something
+else is wrong with either your Open MPI installation or your system.
+#
+[op-unselect:failed-finalize]
+A op module failed to finalize properly when a MPI_Op that was
+using it was destroyed.  
+
+This is somewhat unusual: the module itself may be at fault, or this
+may be a symptom of another issue (e.g., a memory problem).
+#
--- a/ompi/mca/op/base/op_base_close.c
+++ b/ompi/mca/op/base/op_base_close.c
@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2006 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2007-2009 Cisco Systems, Inc.  All rights reserved.
+ * $COPYRIGHT$
+ * 
+ * Additional copyrights may follow
+ * 
+ * $HEADER$
+ */
+
+#include "ompi_config.h"
+
+#include "opal/mca/mca.h"
+#include "opal/mca/base/base.h"
+
+#include "ompi/constants.h"
+#include "ompi/mca/op/op.h"
+#include "ompi/mca/op/base/base.h"
+
+
+int ompi_op_base_close(void)
+{
+    /* Close all components that are still open.  This may be the
+     * opened list (if we're in ompi_info), or it may be the available
+     * list (if we're anywhere else). */
+
+    if (ompi_op_base_components_opened_valid) {
+        mca_base_components_close(ompi_op_base_output,
+                                  &ompi_op_base_components_opened, NULL);
+        OBJ_DESTRUCT(&ompi_op_base_components_opened);
+        ompi_op_base_components_opened_valid = false;
+    } else if (ompi_op_base_components_available_valid) {
+        mca_base_components_close(ompi_op_base_output,
+                                  &ompi_op_base_components_available,
+                                  NULL);
+        OBJ_DESTRUCT(&ompi_op_base_components_available);
+        ompi_op_base_components_available_valid = false;
+    }
+
+    /* All done */
+
+    return OMPI_SUCCESS;
+}
--- a/ompi/mca/op/base/op_base_find_available.c
+++ b/ompi/mca/op/base/op_base_find_available.c
@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2005 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2008-2009 Cisco Systems, Inc.  All rights reserved.
+ * $COPYRIGHT$
+ * 
+ * Additional copyrights may follow
+ * 
+ * $HEADER$
+ */
+
+#include "ompi_config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mpi.h"
+
+#include "opal/class/opal_list.h"
+#include "opal/mca/mca.h"
+#include "opal/mca/base/base.h"
+#include "opal/mca/base/mca_base_component_repository.h"
+
+#include "orte/util/show_help.h"
+
+#include "ompi/constants.h"
+#include "ompi/op/op.h"
+#include "ompi/mca/op/op.h"
+#include "ompi/mca/op/base/base.h"
+
+
+/*
+ * Global variables
+ */
+bool ompi_op_base_components_available_valid = false;
+opal_list_t ompi_op_base_components_available;
+
+
+/*
+ * Private functions
+ */
+static int init_query(const mca_base_component_t * ls,
+                      mca_base_component_priority_list_item_t * entry,
+                      bool enable_progress_threads,
+                      bool enable_mpi_threads);
+static int init_query_1_0_0(const mca_base_component_t * ls,
+                            mca_base_component_priority_list_item_t *
+                            entry, bool enable_progress_threads,
+                            bool enable_mpi_threads);
+
+/*
+ * Scan down the list of successfully opened components and query each
+ * of them (the opened list will be one or more components.  If the
+ * user requested a specific set of components, they will be the only
+ * components in the opened list).  Create and populate the available
+ * list of all components who indicate that they want to be considered
+ * for selection.  Close all components who do not want to be
+ * considered for selection.  Finally, destroy the "opened" list,
+ * because the only the "available" list is relevant now.
+ */
+int ompi_op_base_find_available(bool enable_progress_threads,
+                                bool enable_mpi_threads)
+{
+    bool found = false;
+    mca_base_component_priority_list_item_t *entry;
+    opal_list_item_t *p;
+    const mca_base_component_t *component;
+
+    /* Initialize the list */
+
+    OBJ_CONSTRUCT(&ompi_op_base_components_available, opal_list_t);
+    ompi_op_base_components_available_valid = true;
+
+    /* The list of components that we should check has already been
+       established in ompi_op_base_open. */
+
+    for (found = false,
+         p = opal_list_remove_first(&ompi_op_base_components_opened);
+         p != NULL;
+         p = opal_list_remove_first(&ompi_op_base_components_opened)) {
+        component = ((mca_base_component_list_item_t *) p)->cli_component;
+
+        /* Call a subroutine to do the work, because the component may
+           represent different versions of the op MCA. */
+
+        entry = OBJ_NEW(mca_base_component_priority_list_item_t);
+        entry->super.cli_component = component;
+        entry->cpli_priority = 0;
+        if (OMPI_SUCCESS == init_query(component, entry,
+                                       enable_progress_threads,
+                                       enable_mpi_threads)) {
+            opal_list_append(&ompi_op_base_components_available,
+                             (opal_list_item_t *) entry);
+            found = true;
+        } else {
+
+            /* If the component doesn't want to run, then close it.
+               It's already had its close() method invoked; now close
+               it out of the DSO repository (if it's there). */
+
+            mca_base_component_repository_release(component);
+            OBJ_RELEASE(entry);
+        }
+
+        /* Free the entry from the "opened" list */
+
+        OBJ_RELEASE(p);
+    }
+
+    /* The opened list is now no longer useful and we can free it */
+
+    OBJ_DESTRUCT(&ompi_op_base_components_opened);
+    ompi_op_base_components_opened_valid = false;
+
+    /* If we have no op components available, it's an error.  Thanks
+       for playing! */
+
+    if (!found) {
+        /* Need to free all items in the list */
+        OBJ_DESTRUCT(&ompi_op_base_components_available);
+        ompi_op_base_components_available_valid = false;
+        opal_output_verbose(10, ompi_op_base_output,
+                            "op:find_available: no op components available!");
+        orte_show_help("help-mca-base", "find-available:none-found", true,
+                       "op");
+        return OMPI_ERROR;
+    }
+
+    /* All done */
+
+    return OMPI_SUCCESS;
+}
+
+
+/*
+ * Query a component, see if it wants to run at all.  If it does, save
+ * some information.  If it doesn't, close it.
+ */
+static int init_query(const mca_base_component_t * c,
+                      mca_base_component_priority_list_item_t * entry,
+                      bool enable_progress_threads, bool enable_mpi_threads)
+{
+    int ret;
+
+    opal_output_verbose(10, ompi_op_base_output,
+                        "op:find_available: querying op component %s",
+                        c->mca_component_name);
+
+    /* This component has already been successfully opened.  So now
+       query it. */
+
+    if (1 == c->mca_type_major_version &&
+        0 == c->mca_type_minor_version &&
+        0 == c->mca_type_release_version) {
+        ret = init_query_1_0_0(c, entry, enable_progress_threads,
+                               enable_mpi_threads);
+    } else {
+        /* Unrecognized op API version */
+
+        opal_output_verbose(10, ompi_op_base_output,
+                            "op:find_available: unrecognized op API version (%d.%d.%d, ignored)",
+                            c->mca_type_major_version,
+                            c->mca_type_minor_version,
+                            c->mca_type_release_version);
+        return OMPI_ERROR;
+    }
+
+    /* Query done -- look at the return value to see what happened */
+
+    if (OMPI_SUCCESS != ret) {
+        opal_output_verbose(10, ompi_op_base_output,
+                            "op:find_available: op component %s is not available",
+                            c->mca_component_name);
+        if (NULL != c->mca_close_component) {
+            c->mca_close_component();
+        }
+    } else {
+        opal_output_verbose(10, ompi_op_base_output,
+                            "op:find_available: op component %s is available",
+                            c->mca_component_name);
+    }
+
+    /* All done */
+
+    return ret;
+}
+
+
+/*
+ * Query a specific component, op v2.0.0
+ */
+static int init_query_1_0_0(const mca_base_component_t * component,
+                            mca_base_component_priority_list_item_t * entry,
+                            bool enable_progress_threads, 
+                            bool enable_mpi_threads)
+{
+    ompi_op_base_component_1_0_0_t *op =
+        (ompi_op_base_component_1_0_0_t *) component;
+
+    return op->opc_init_query(enable_progress_threads,
+                              enable_mpi_threads);
+}
--- a/ompi/mca/op/base/op_base_functions.c
+++ b/ompi/mca/op/base/op_base_functions.c
@ -9,7 +9,7 @@
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
- * Copyright (c) 2006-2008 Cisco Systems, Inc.  All rights reserved.
+ * Copyright (c) 2006-2009 Cisco Systems, Inc.  All rights reserved.
 * $COPYRIGHT$
 * 
 * Additional copyrights may follow
@ -23,8 +23,8 @@
 #include <sys/types.h>
 #endif

-#include "ompi/op/op.h"
-#include "ompi/op/op_predefined.h"
+#include "ompi/mca/op/op.h"
+#include "ompi/mca/op/base/functions.h"


 /*
@ -35,8 +35,9 @@
 * This macro is for (out op in).
 */
 #define OP_FUNC(name, type_name, type, op) \
-  void ompi_mpi_op_##name##_##type_name(void *in, void *out, int *count, \
-                                        MPI_Datatype *dtype)             \
+  void ompi_op_base_##name##_##type_name(void *in, void *out, int *count, \
+                                        struct ompi_datatype_t **dtype,  \
+                                        struct ompi_op_base_module_1_0_0_t *module) \
  {                                                                      \
      int i;                                                             \
      type *a = (type *) in;                                             \
@ -47,8 +48,9 @@
  }

 #define COMPLEX_OP_FUNC_SUM(type_name, type) \
-  void ompi_mpi_op_sum_##type_name(void *in, void *out, int *count,      \
-                                   MPI_Datatype *dtype)                  \
+  void ompi_op_base_sum_##type_name(void *in, void *out, int *count,     \
+                                    struct ompi_datatype_t **dtype,      \
+                                    struct ompi_op_base_module_1_0_0_t *module)\
  {                                                                      \
      int i;                                                             \
      type *a = (type *) in;                                             \
@ -60,8 +62,9 @@
  }

 #define COMPLEX_OP_FUNC_PROD(type_name, type) \
-  void ompi_mpi_op_prod_##type_name(void *in, void *out, int *count,     \
-                                   MPI_Datatype *dtype)                  \
+  void ompi_op_base_prod_##type_name(void *in, void *out, int *count,    \
+                                    struct ompi_datatype_t **dtype,      \
+                                    struct ompi_op_base_module_1_0_0_t *module)\
  {                                                                      \
      int i;                                                             \
      type *a = (type *) in;                                             \
@ -83,8 +86,9 @@
 * This macro is for (out = op(out, in))
 */
 #define FUNC_FUNC(name, type_name, type) \
-  void ompi_mpi_op_##name##_##type_name(void *in, void *out, int *count, \
-                                        MPI_Datatype *dtype)             \
+  void ompi_op_base_##name##_##type_name(void *in, void *out, int *count, \
+                                         struct ompi_datatype_t **dtype,  \
+                                         struct ompi_op_base_module_1_0_0_t *module)\
  {                                                                      \
      int i;                                                             \
      type *a = (type *) in;                                             \
@ -110,8 +114,9 @@
  } ompi_op_predefined_##type_name##_t;

 #define LOC_FUNC(name, type_name, op) \
-  void ompi_mpi_op_##name##_##type_name(void *in, void *out, int *count, \
-                                        MPI_Datatype *dtype) \
+  void ompi_op_base_##name##_##type_name(void *in, void *out, int *count, \
+                                         struct ompi_datatype_t **dtype,  \
+                                         struct ompi_op_base_module_1_0_0_t *module)\
  { \
      int i; \
      ompi_op_predefined_##type_name##_t *a = (ompi_op_predefined_##type_name##_t*) in; \
@ -680,9 +685,10 @@ LOC_FUNC(minloc, long_double_int, <)
 *    routines, needed for some optimizations.
 */
 #define OP_FUNC_3BUF(name, type_name, type, op) \
-  void ompi_mpi_op_three_buff_##name##_##type_name(void * restrict in1,  \
+  void ompi_op_base_3buff_##name##_##type_name(void * restrict in1,      \
          void * restrict in2, void * restrict out, int *count,          \
-                                        MPI_Datatype *dtype)             \
+          struct ompi_datatype_t **dtype,                                \
+          struct ompi_op_base_module_1_0_0_t *module)                          \
  {                                                                      \
      int i;                                                             \
      type *a1 = (type *) in1;                                           \
@ -694,9 +700,10 @@ LOC_FUNC(minloc, long_double_int, <)
  }

 #define COMPLEX_OP_FUNC_SUM_3BUF(type_name, type) \
-  void ompi_mpi_op_three_buff_sum_##type_name(void * restrict in1,       \
+  void ompi_op_base_3buff_sum_##type_name(void * restrict in1,           \
          void * restrict in2, void * restrict out, int *count,          \
-                                   MPI_Datatype *dtype)                  \
+          struct ompi_datatype_t **dtype,                                \
+          struct ompi_op_base_module_1_0_0_t *module)                          \
  {                                                                      \
      int i;                                                             \
      type *a1 = (type *) in1;                                           \
@ -709,9 +716,10 @@ LOC_FUNC(minloc, long_double_int, <)
  }

 #define COMPLEX_OP_FUNC_PROD_3BUF(type_name, type) \
-  void ompi_mpi_op_three_buff_prod_##type_name(void * restrict in1,      \
+  void ompi_op_base_3buff_prod_##type_name(void * restrict in1,          \
          void * restrict in2, void * restrict out, int *count,          \
-                                   MPI_Datatype *dtype)                  \
+          struct ompi_datatype_t **dtype,                                \
+          struct ompi_op_base_module_1_0_0_t *module)                          \
  {                                                                      \
      int i;                                                             \
      type *a1 = (type *) in1;                                           \
@ -732,9 +740,10 @@ LOC_FUNC(minloc, long_double_int, <)
 * This macro is for (out = op(in1, in2))
 */
 #define FUNC_FUNC_3BUF(name, type_name, type) \
-  void ompi_mpi_op_three_buff_##name##_##type_name(void * restrict in1,  \
+  void ompi_op_base_3buff_##name##_##type_name(void * restrict in1,      \
          void * restrict in2, void * restrict out, int *count,          \
-                                        MPI_Datatype *dtype)             \
+          struct ompi_datatype_t **dtype,                                \
+          struct ompi_op_base_module_1_0_0_t *module)                          \
  {                                                                      \
      int i;                                                             \
      type *a1 = (type *) in1;                                           \
@ -764,9 +773,10 @@ LOC_FUNC(minloc, long_double_int, <)
 */

 #define LOC_FUNC_3BUF(name, type_name, op) \
-  void ompi_mpi_op_three_buff_##name##_##type_name(void * restrict in1,  \
+  void ompi_op_base_3buff_##name##_##type_name(void * restrict in1,      \
          void * restrict in2, void * restrict out, int *count,          \
-                                        MPI_Datatype *dtype)             \
+          struct ompi_datatype_t **dtype,                                \
+          struct ompi_op_base_module_1_0_0_t *module)                          \
  { \
      int i; \
      ompi_op_predefined_##type_name##_t *a1 = (ompi_op_predefined_##type_name##_t*) in1; \
@ -1335,3 +1345,734 @@ LOC_FUNC_3BUF(minloc, short_int, <)
 #if HAVE_LONG_DOUBLE
 LOC_FUNC_3BUF(minloc, long_double_int, <)
 #endif
+
+/*
+ * Helpful defines, because there's soooo many names!
+ *
+ * **NOTE** These #define's are strictly ordered!  A series of macros
+ * are built up to assemble a list of function names (or NULLs) that
+ * are put into the intrinsict ompi_op_t's in the middle of this file.
+ * The order of these function names is critical, and must be the same
+ * as the OMPI_OP_BASE_TYPE_* enums in ompi/mca/op/op.h (i.e., the
+ * enum's starting with OMPI_OP_BASE_TYPE_UNSIGNED_CHAR).
+ */
+
+/** C integer ***********************************************************/
+
+#ifdef HAVE_LONG_LONG
+#define C_INTEGER_LONG_LONG(name) \
+  ompi_op_base_##name##_long_long_int,  /* OMPI_OP_BASE_TYPE_LONG_LONG_INT */ \
+  ompi_op_base_##name##_unsigned_long_long /* OMPI_OP_BASE_TYPE_UNSIGNED_LONG_LONG */
+#define C_INTEGER_LONG_LONG_3BUFF(name) \
+  ompi_op_base_3buff_##name##_long_long_int,  /* OMPI_OP_BASE_TYPE_LONG_LONG_INT */ \
+  ompi_op_base_3buff_##name##_unsigned_long_long /* OMPI_OP_BASE_TYPE_UNSIGNED_LONG_LONG */
+#else
+#define C_INTEGER_LONG_LONG(name) \
+  NULL, /* OMPI_OP_BASE_TYPE_LONG_LONG_INT */ \
+  NULL  /* OMPI_OP_BASE_TYPE_UNSIGNED_LONG_LONG */
+#define C_INTEGER_LONG_LONG_3BUFF(name) \
+  NULL, /* OMPI_OP_BASE_TYPE_LONG_LONG_INT */ \
+  NULL  /* OMPI_OP_BASE_TYPE_UNSIGNED_LONG_LONG */
+#endif
+
+#define C_INTEGER(name) \
+  ompi_op_base_##name##_unsigned_char,  /* OMPI_OP_BASE_TYPE_UNSIGNED_CHAR */ \
+  ompi_op_base_##name##_signed_char,    /* OMPI_OP_BASE_TYPE_SIGNED_CHAR */ \
+  ompi_op_base_##name##_int,            /* OMPI_OP_BASE_TYPE_INT */ \
+  ompi_op_base_##name##_long,           /* OMPI_OP_BASE_TYPE_LONG */ \
+  ompi_op_base_##name##_short,          /* OMPI_OP_BASE_TYPE_SHORT */ \
+  ompi_op_base_##name##_unsigned_short, /* OMPI_OP_BASE_TYPE_UNSIGNED_SHORT */ \
+  ompi_op_base_##name##_unsigned,       /* OMPI_OP_BASE_TYPE_UNSIGNED */ \
+  ompi_op_base_##name##_unsigned_long,  /* OMPI_OP_BASE_TYPE_UNSIGNED_LONG */ \
+  C_INTEGER_LONG_LONG(name)
+#define C_INTEGER_3BUFF(name) \
+  ompi_op_base_3buff_##name##_unsigned_char,  /* OMPI_OP_BASE_TYPE_UNSIGNED_CHAR */ \
+  ompi_op_base_3buff_##name##_signed_char,    /* OMPI_OP_BASE_TYPE_SIGNED_CHAR */ \
+  ompi_op_base_3buff_##name##_int,            /* OMPI_OP_BASE_TYPE_INT */ \
+  ompi_op_base_3buff_##name##_long,           /* OMPI_OP_BASE_TYPE_LONG */ \
+  ompi_op_base_3buff_##name##_short,          /* OMPI_OP_BASE_TYPE_SHORT */ \
+  ompi_op_base_3buff_##name##_unsigned_short, /* OMPI_OP_BASE_TYPE_UNSIGNED_SHORT */ \
+  ompi_op_base_3buff_##name##_unsigned,       /* OMPI_OP_BASE_TYPE_UNSIGNED */ \
+  ompi_op_base_3buff_##name##_unsigned_long,  /* OMPI_OP_BASE_TYPE_UNSIGNED_LONG */ \
+  C_INTEGER_LONG_LONG_3BUFF(name)
+
+#define C_INTEGER_NULL \
+  NULL, /* OMPI_OP_BASE_TYPE_UNSIGNED_CHAR */ \
+  NULL, /* OMPI_OP_BASE_TYPE_SIGNED_CHAR */ \
+  NULL, /* OMPI_OP_BASE_TYPE_INT */ \
+  NULL, /* OMPI_OP_BASE_TYPE_LONG */ \
+  NULL, /* OMPI_OP_BASE_TYPE_SHORT */ \
+  NULL, /* OMPI_OP_BASE_TYPE_UNSIGNED_SHORT */ \
+  NULL, /* OMPI_OP_BASE_TYPE_UNSIGNED */ \
+  NULL, /* OMPI_OP_BASE_TYPE_UNSIGNED_LONG */ \
+  NULL, /* OMPI_OP_BASE_TYPE_LONG_LONG_INT */ \
+  NULL  /* OMPI_OP_BASE_TYPE_UNSIGNED_LONG_LONG */
+
+#define C_INTEGER_NULL_3BUFF \
+  NULL, /* OMPI_OP_BASE_TYPE_UNSIGNED_CHAR */ \
+  NULL, /* OMPI_OP_BASE_TYPE_SIGNED_CHAR */ \
+  NULL, /* OMPI_OP_BASE_TYPE_INT */ \
+  NULL, /* OMPI_OP_BASE_TYPE_LONG */ \
+  NULL, /* OMPI_OP_BASE_TYPE_SHORT */ \
+  NULL, /* OMPI_OP_BASE_TYPE_UNSIGNED_SHORT */ \
+  NULL, /* OMPI_OP_BASE_TYPE_UNSIGNED */ \
+  NULL, /* OMPI_OP_BASE_TYPE_UNSIGNED_LONG */ \
+  NULL, /* OMPI_OP_BASE_TYPE_LONG_LONG_INT */ \
+  NULL  /* OMPI_OP_BASE_TYPE_UNSIGNED_LONG_LONG */
+
+/** All the Fortran integers ********************************************/
+
+#if OMPI_HAVE_FORTRAN_INTEGER
+#define FORTRAN_INTEGER_PLAIN(name) ompi_op_base_##name##_fortran_integer
+#define FORTRAN_INTEGER_PLAIN_3BUFF(name) ompi_op_base_3buff_##name##_fortran_integer
+#else
+#define FORTRAN_INTEGER_PLAIN(name) NULL
+#define FORTRAN_INTEGER_PLAIN_3BUFF(name) NULL
+#endif
+#if OMPI_HAVE_FORTRAN_INTEGER1
+#define FORTRAN_INTEGER1(name) ompi_op_base_##name##_fortran_integer1
+#define FORTRAN_INTEGER1_3BUFF(name) ompi_op_base_3buff_##name##_fortran_integer1
+#else
+#define FORTRAN_INTEGER1(name) NULL
+#define FORTRAN_INTEGER1_3BUFF(name) NULL
+#endif
+#if OMPI_HAVE_FORTRAN_INTEGER2
+#define FORTRAN_INTEGER2(name) ompi_op_base_##name##_fortran_integer2
+#define FORTRAN_INTEGER2_3BUFF(name) ompi_op_base_3buff_##name##_fortran_integer2
+#else
+#define FORTRAN_INTEGER2(name) NULL
+#define FORTRAN_INTEGER2_3BUFF(name) NULL
+#endif
+#if OMPI_HAVE_FORTRAN_INTEGER4
+#define FORTRAN_INTEGER4(name) ompi_op_base_##name##_fortran_integer4
+#define FORTRAN_INTEGER4_3BUFF(name) ompi_op_base_3buff_##name##_fortran_integer4
+#else
+#define FORTRAN_INTEGER4(name) NULL
+#define FORTRAN_INTEGER4_3BUFF(name) NULL
+#endif
+#if OMPI_HAVE_FORTRAN_INTEGER8
+#define FORTRAN_INTEGER8(name) ompi_op_base_##name##_fortran_integer8
+#define FORTRAN_INTEGER8_3BUFF(name) ompi_op_base_3buff_##name##_fortran_integer8
+#else
+#define FORTRAN_INTEGER8(name) NULL
+#define FORTRAN_INTEGER8_3BUFF(name) NULL
+#endif
+#if OMPI_HAVE_FORTRAN_INTEGER16
+#define FORTRAN_INTEGER16(name) ompi_op_base_##name##_fortran_integer16
+#define FORTRAN_INTEGER16_3BUFF(name) ompi_op_base_3buff_##name##_fortran_integer16
+#else
+#define FORTRAN_INTEGER16(name) NULL
+#define FORTRAN_INTEGER16_3BUFF(name) NULL
+#endif
+#define FORTRAN_INTEGER(name) \
+  FORTRAN_INTEGER_PLAIN(name),      /* OMPI_OP_BASE_TYPE_INTEGER */ \
+  FORTRAN_INTEGER1(name),           /* OMPI_OP_BASE_TYPE_INTEGER1 */ \
+  FORTRAN_INTEGER2(name),           /* OMPI_OP_BASE_TYPE_INTEGER2 */ \
+  FORTRAN_INTEGER4(name),           /* OMPI_OP_BASE_TYPE_INTEGER4 */ \
+  FORTRAN_INTEGER8(name),           /* OMPI_OP_BASE_TYPE_INTEGER8 */ \
+  FORTRAN_INTEGER16(name)           /* OMPI_OP_BASE_TYPE_INTEGER16 */
+
+#define FORTRAN_INTEGER_3BUFF(name) \
+  FORTRAN_INTEGER_PLAIN_3BUFF(name),      /* OMPI_OP_BASE_TYPE_INTEGER */ \
+  FORTRAN_INTEGER1_3BUFF(name),           /* OMPI_OP_BASE_TYPE_INTEGER1 */ \
+  FORTRAN_INTEGER2_3BUFF(name),           /* OMPI_OP_BASE_TYPE_INTEGER2 */ \
+  FORTRAN_INTEGER4_3BUFF(name),           /* OMPI_OP_BASE_TYPE_INTEGER4 */ \
+  FORTRAN_INTEGER8_3BUFF(name),           /* OMPI_OP_BASE_TYPE_INTEGER8 */ \
+  FORTRAN_INTEGER16_3BUFF(name)           /* OMPI_OP_BASE_TYPE_INTEGER16 */
+
+#define FORTRAN_INTEGER_NULL \
+  NULL,  /* OMPI_OP_BASE_TYPE_INTEGER */ \
+  NULL,  /* OMPI_OP_BASE_TYPE_INTEGER1 */ \
+  NULL,  /* OMPI_OP_BASE_TYPE_INTEGER2 */ \
+  NULL,  /* OMPI_OP_BASE_TYPE_INTEGER4 */ \
+  NULL,  /* OMPI_OP_BASE_TYPE_INTEGER8 */ \
+  NULL  /* OMPI_OP_BASE_TYPE_INTEGER16 */
+
+#define FORTRAN_INTEGER_NULL_3BUFF \
+  NULL,  /* OMPI_OP_BASE_TYPE_INTEGER */ \
+  NULL,  /* OMPI_OP_BASE_TYPE_INTEGER1 */ \
+  NULL,  /* OMPI_OP_BASE_TYPE_INTEGER2 */ \
+  NULL,  /* OMPI_OP_BASE_TYPE_INTEGER4 */ \
+  NULL,  /* OMPI_OP_BASE_TYPE_INTEGER8 */ \
+  NULL  /* OMPI_OP_BASE_TYPE_INTEGER16 */
+
+/** All the Fortran reals ***********************************************/
+
+#if OMPI_HAVE_FORTRAN_REAL
+#define FLOATING_POINT_FORTRAN_REAL_PLAIN(name) ompi_op_base_##name##_fortran_real
+#define FLOATING_POINT_FORTRAN_REAL_PLAIN_3BUFF(name) ompi_op_base_3buff_##name##_fortran_real
+#else
+#define FLOATING_POINT_FORTRAN_REAL_PLAIN(name) NULL
+#define FLOATING_POINT_FORTRAN_REAL_PLAIN_3BUFF(name) NULL
+#endif
+#if OMPI_HAVE_FORTRAN_REAL2
+#define FLOATING_POINT_FORTRAN_REAL2(name) ompi_op_base_##name##_fortran_real2
+#define FLOATING_POINT_FORTRAN_REAL2_3BUFF(name) ompi_op_base_3buff_##name##_fortran_real2
+#else
+#define FLOATING_POINT_FORTRAN_REAL2(name) NULL
+#define FLOATING_POINT_FORTRAN_REAL2_3BUFF(name) NULL
+#endif
+#if OMPI_HAVE_FORTRAN_REAL4
+#define FLOATING_POINT_FORTRAN_REAL4(name) ompi_op_base_##name##_fortran_real4
+#define FLOATING_POINT_FORTRAN_REAL4_3BUFF(name) ompi_op_base_3buff_##name##_fortran_real4
+#else
+#define FLOATING_POINT_FORTRAN_REAL4(name) NULL
+#define FLOATING_POINT_FORTRAN_REAL4_3BUFF(name) NULL
+#endif
+#if OMPI_HAVE_FORTRAN_REAL8
+#define FLOATING_POINT_FORTRAN_REAL8(name) ompi_op_base_##name##_fortran_real8
+#define FLOATING_POINT_FORTRAN_REAL8_3BUFF(name) ompi_op_base_3buff_##name##_fortran_real8
+#else
+#define FLOATING_POINT_FORTRAN_REAL8(name) NULL
+#define FLOATING_POINT_FORTRAN_REAL8_3BUFF(name) NULL
+#endif
+/* If:
+   - we have fortran REAL*16, *and*
+   - fortran REAL*16 matches the bit representation of the
+     corresponding C type
+   Only then do we put in function pointers for REAL*16 reductions.
+   Otherwise, just put in NULL. */
+#if OMPI_HAVE_FORTRAN_REAL16 && OMPI_REAL16_MATCHES_C
+#define FLOATING_POINT_FORTRAN_REAL16(name) ompi_op_base_##name##_fortran_real16
+#define FLOATING_POINT_FORTRAN_REAL16_3BUFF(name) ompi_op_base_3buff_##name##_fortran_real16
+#else
+#define FLOATING_POINT_FORTRAN_REAL16(name) NULL
+#define FLOATING_POINT_FORTRAN_REAL16_3BUFF(name) NULL
+#endif
+
+#define FLOATING_POINT_FORTRAN_REAL(name) \
+  FLOATING_POINT_FORTRAN_REAL_PLAIN(name),      /* OMPI_OP_BASE_TYPE_REAL */ \
+  FLOATING_POINT_FORTRAN_REAL2(name),           /* OMPI_OP_BASE_TYPE_REAL2 */ \
+  FLOATING_POINT_FORTRAN_REAL4(name),           /* OMPI_OP_BASE_TYPE_REAL4 */ \
+  FLOATING_POINT_FORTRAN_REAL8(name),           /* OMPI_OP_BASE_TYPE_REAL8 */ \
+  FLOATING_POINT_FORTRAN_REAL16(name)           /* OMPI_OP_BASE_TYPE_REAL16 */
+
+#define FLOATING_POINT_FORTRAN_REAL_3BUFF(name) \
+  FLOATING_POINT_FORTRAN_REAL_PLAIN_3BUFF(name),      /* OMPI_OP_BASE_TYPE_REAL */ \
+  FLOATING_POINT_FORTRAN_REAL2_3BUFF(name),           /* OMPI_OP_BASE_TYPE_REAL2 */ \
+  FLOATING_POINT_FORTRAN_REAL4_3BUFF(name),           /* OMPI_OP_BASE_TYPE_REAL4 */ \
+  FLOATING_POINT_FORTRAN_REAL8_3BUFF(name),           /* OMPI_OP_BASE_TYPE_REAL8 */ \
+  FLOATING_POINT_FORTRAN_REAL16_3BUFF(name)           /* OMPI_OP_BASE_TYPE_REAL16 */
+
+/** Fortran double precision ********************************************/
+
+#if OMPI_HAVE_FORTRAN_DOUBLE_PRECISION
+#define FLOATING_POINT_FORTRAN_DOUBLE_PRECISION(name) \
+    ompi_op_base_##name##_fortran_double_precision
+#define FLOATING_POINT_FORTRAN_DOUBLE_PRECISION_3BUFF(name) \
+    ompi_op_base_3buff_##name##_fortran_double_precision
+#else
+#define FLOATING_POINT_FORTRAN_DOUBLE_PRECISION(name) NULL
+#define FLOATING_POINT_FORTRAN_DOUBLE_PRECISION_3BUFF(name) NULL
+#endif
+
+/** Floating point, including all the Fortran reals *********************/
+
+#define FLOATING_POINT(name) \
+  ompi_op_base_##name##_float,                    /* OMPI_OP_BASE_TYPE_FLOAT */\
+  ompi_op_base_##name##_double,                   /* OMPI_OP_BASE_TYPE_DOUBLE */\
+  FLOATING_POINT_FORTRAN_REAL(name),                 /* OMPI_OP_BASE_TYPE_REAL */ \
+  FLOATING_POINT_FORTRAN_DOUBLE_PRECISION(name),     /* OMPI_OP_BASE_TYPE_DOUBLE_PRECISION */ \
+  ompi_op_base_##name##_long_double               /* OMPI_OP_BASE_TYPE_LONG_DOUBLE */
+
+#define FLOATING_POINT_3BUFF(name) \
+  ompi_op_base_3buff_##name##_float,                    /* OMPI_OP_BASE_TYPE_FLOAT */\
+  ompi_op_base_3buff_##name##_double,                   /* OMPI_OP_BASE_TYPE_DOUBLE */\
+  FLOATING_POINT_FORTRAN_REAL_3BUFF(name),                 /* OMPI_OP_BASE_TYPE_REAL */ \
+  FLOATING_POINT_FORTRAN_DOUBLE_PRECISION_3BUFF(name),     /* OMPI_OP_BASE_TYPE_DOUBLE_PRECISION */ \
+  ompi_op_base_3buff_##name##_long_double               /* OMPI_OP_BASE_TYPE_LONG_DOUBLE */
+
+#define FLOATING_POINT_NULL \
+  NULL, /* OMPI_OP_BASE_TYPE_FLOAT */ \
+  NULL, /* OMPI_OP_BASE_TYPE_DOUBLE */ \
+  NULL, /* OMPI_OP_BASE_TYPE_REAL */ \
+  NULL, /* OMPI_OP_BASE_TYPE_REAL2 */ \
+  NULL, /* OMPI_OP_BASE_TYPE_REAL4 */ \
+  NULL, /* OMPI_OP_BASE_TYPE_REAL8 */ \
+  NULL, /* OMPI_OP_BASE_TYPE_REAL16 */ \
+  NULL, /* OMPI_OP_BASE_TYPE_DOUBLE_PRECISION */ \
+  NULL  /* OMPI_OP_BASE_TYPE_LONG_DOUBLE */
+
+#define FLOATING_POINT_NULL_3BUFF \
+  NULL, /* OMPI_OP_BASE_TYPE_FLOAT */ \
+  NULL, /* OMPI_OP_BASE_TYPE_DOUBLE */ \
+  NULL, /* OMPI_OP_BASE_TYPE_REAL */ \
+  NULL, /* OMPI_OP_BASE_TYPE_REAL2 */ \
+  NULL, /* OMPI_OP_BASE_TYPE_REAL4 */ \
+  NULL, /* OMPI_OP_BASE_TYPE_REAL8 */ \
+  NULL, /* OMPI_OP_BASE_TYPE_REAL16 */ \
+  NULL, /* OMPI_OP_BASE_TYPE_DOUBLE_PRECISION */ \
+  NULL  /* OMPI_OP_BASE_TYPE_LONG_DOUBLE */
+
+/** Fortran logical *****************************************************/
+
+#if OMPI_HAVE_FORTRAN_LOGICAL
+#define FORTRAN_LOGICAL(name) \
+  ompi_op_base_##name##_fortran_logical  /* OMPI_OP_BASE_TYPE_LOGICAL */
+#define FORTRAN_LOGICAL_3BUFF(name) \
+  ompi_op_base_3buff_##name##_fortran_logical  /* OMPI_OP_BASE_TYPE_LOGICAL */
+#else
+#define FORTRAN_LOGICAL(name) NULL
+#define FORTRAN_LOGICAL_3BUFF(name) NULL
+#endif
+#define LOGICAL(name) \
+  FORTRAN_LOGICAL(name), \
+  ompi_op_base_##name##_bool  /* OMPI_OP_BASE_TYPE_BOOL */
+#define LOGICAL_3BUFF(name) \
+  FORTRAN_LOGICAL_3BUFF(name), \
+  ompi_op_base_3buff_##name##_bool  /* OMPI_OP_BASE_TYPE_BOOL */
+
+#define LOGICAL_NULL \
+  NULL,  /* OMPI_OP_BASE_TYPE_LOGICAL */ \
+  NULL   /* OMPI_OP_BASE_TYPE_BOOL */
+
+#define LOGICAL_NULL_3BUFF \
+  NULL,  /* OMPI_OP_BASE_TYPE_LOGICAL */ \
+  NULL   /* OMPI_OP_BASE_TYPE_BOOL */
+
+/** Fortran complex *****************************************************/
+
+#if OMPI_HAVE_FORTRAN_REAL && OMPI_HAVE_FORTRAN_COMPLEX
+#define COMPLEX_PLAIN(name) ompi_op_base_##name##_fortran_complex
+#define COMPLEX_PLAIN_3BUFF(name) ompi_op_base_3buff_##name##_fortran_complex
+#else
+#define COMPLEX_PLAIN(name) NULL
+#define COMPLEX_PLAIN_3BUFF(name) NULL
+#endif
+#if OMPI_HAVE_FORTRAN_DOUBLE_PRECISION && OMPI_HAVE_FORTRAN_COMPLEX
+#define COMPLEX_DOUBLE(name) ompi_op_base_##name##_fortran_double_complex
+#define COMPLEX_DOUBLE_3BUFF(name) ompi_op_base_3buff_##name##_fortran_double_complex
+#else
+#define COMPLEX_DOUBLE(name) NULL
+#define COMPLEX_DOUBLE_3BUFF(name) NULL
+#endif
+#if OMPI_HAVE_FORTRAN_REAL4 && OMPI_HAVE_FORTRAN_COMPLEX8
+#define COMPLEX8(name) ompi_op_base_##name##_fortran_complex8
+#define COMPLEX8_3BUFF(name) ompi_op_base_3buff_##name##_fortran_complex8
+#else
+#define COMPLEX8(name) NULL
+#define COMPLEX8_3BUFF(name) NULL
+#endif
+#if OMPI_HAVE_FORTRAN_REAL8 && OMPI_HAVE_FORTRAN_COMPLEX16
+#define COMPLEX16(name) ompi_op_base_##name##_fortran_complex16
+#define COMPLEX16_3BUFF(name) ompi_op_base_3buff_##name##_fortran_complex16
+#else
+#define COMPLEX16(name) NULL
+#define COMPLEX16_3BUFF(name) NULL
+#endif
+/* If:
+   - we have fortran REAL*16, *and*
+   - fortran REAL*16 matches the bit representation of the
+     corresponding C type, *and*
+   - we have fortran COMPILEX*32
+   Only then do we put in function pointers for COMPLEX*32 reductions.
+   Otherwise, just put in NULL. */
+#if OMPI_HAVE_FORTRAN_REAL16 && OMPI_REAL16_MATCHES_C && OMPI_HAVE_FORTRAN_COMPLEX32
+#define COMPLEX32(name) ompi_op_base_##name##_fortran_complex32
+#define COMPLEX32_3BUFF(name) ompi_op_base_3buff_##name##_fortran_complex32
+#else
+#define COMPLEX32(name) NULL
+#define COMPLEX32_3BUFF(name) NULL
+#endif
+
+#define COMPLEX(name) \
+  COMPLEX_PLAIN(name),  /* OMPI_OP_BASE_TYPE_COMPLEX */ \
+  COMPLEX_DOUBLE(name), /* OMPI_OP_BASE_TYPE_DOUBLE_COMPLEX */ \
+  COMPLEX8(name),       /* OMPI_OP_BASE_TYPE_COMPLEX8 */ \
+  COMPLEX16(name),      /* OMPI_OP_BASE_TYPE_COMPLEX16 */ \
+  COMPLEX32(name)       /* OMPI_OP_BASE_TYPE_COMPLEX32 */
+
+#define COMPLEX_3BUFF(name) \
+  COMPLEX_PLAIN_3BUFF(name),  /* OMPI_OP_BASE_TYPE_COMPLEX */ \
+  COMPLEX_DOUBLE_3BUFF(name), /* OMPI_OP_BASE_TYPE_DOUBLE_COMPLEX */ \
+  COMPLEX8_3BUFF(name),       /* OMPI_OP_BASE_TYPE_COMPLEX8 */ \
+  COMPLEX16_3BUFF(name),      /* OMPI_OP_BASE_TYPE_COMPLEX16 */ \
+  COMPLEX32_3BUFF(name)       /* OMPI_OP_BASE_TYPE_COMPLEX32 */
+
+#define COMPLEX_NULL \
+  NULL,  /* OMPI_OP_BASE_TYPE_COMPLEX */ \
+  NULL,  /* OMPI_OP_BASE_TYPE_DOUBLE_COMPLEX */ \
+  NULL,  /* OMPI_OP_BASE_TYPE_COMPLEX8 */ \
+  NULL,  /* OMPI_OP_BASE_TYPE_COMPLEX16 */ \
+  NULL   /* OMPI_OP_BASE_TYPE_COMPLEX32 */
+
+#define COMPLEX_NULL_3BUFF \
+  NULL,  /* OMPI_OP_BASE_TYPE_COMPLEX */ \
+  NULL,  /* OMPI_OP_BASE_TYPE_DOUBLE_COMPLEX */ \
+  NULL,  /* OMPI_OP_BASE_TYPE_COMPLEX8 */ \
+  NULL,  /* OMPI_OP_BASE_TYPE_COMPLEX16 */ \
+  NULL   /* OMPI_OP_BASE_TYPE_COMPLEX32 */
+
+/** Byte ****************************************************************/
+
+#define BYTE(name) \
+  ompi_op_base_##name##_byte  /* OMPI_OP_BASE_TYPE_BYTE */
+#define BYTE_3BUFF(name) \
+  ompi_op_base_3buff_##name##_byte  /* OMPI_OP_BASE_TYPE_BYTE */
+
+#define BYTE_NULL \
+  NULL  /* OMPI_OP_BASE_TYPE_BYTE */
+
+#define BYTE_NULL_3BUFF \
+  NULL  /* OMPI_OP_BASE_TYPE_BYTE */
+
+/** Fortran complex *****************************************************/
+/** Fortran "2" types ***************************************************/
+
+#if OMPI_HAVE_FORTRAN_REAL
+#define TWOLOC_FORTRAN_2REAL(name) ompi_op_base_##name##_2real
+#define TWOLOC_FORTRAN_2REAL_3BUFF(name) ompi_op_base_3buff_##name##_2real
+#else
+#define TWOLOC_FORTRAN_2REAL(name) NULL
+#define TWOLOC_FORTRAN_2REAL_3BUFF(name) NULL
+#endif
+#if OMPI_HAVE_FORTRAN_DOUBLE_PRECISION
+#define TWOLOC_FORTRAN_2DOUBLE_PRECISION(name) ompi_op_base_##name##_2double_precision
+#define TWOLOC_FORTRAN_2DOUBLE_PRECISION_3BUFF(name) ompi_op_base_3buff_##name##_2double_precision
+#else
+#define TWOLOC_FORTRAN_2DOUBLE_PRECISION(name) NULL
+#define TWOLOC_FORTRAN_2DOUBLE_PRECISION_3BUFF(name) NULL
+#endif
+#if OMPI_HAVE_FORTRAN_INTEGER
+#define TWOLOC_FORTRAN_2INTEGER(name) ompi_op_base_##name##_2integer
+#define TWOLOC_FORTRAN_2INTEGER_3BUFF(name) ompi_op_base_3buff_##name##_2integer
+#else
+#define TWOLOC_FORTRAN_2INTEGER(name) NULL
+#define TWOLOC_FORTRAN_2INTEGER_3BUFF(name) NULL
+#endif
+
+/** All "2" types *******************************************************/
+
+#define TWOLOC(name) \
+  TWOLOC_FORTRAN_2REAL(name),                 /* OMPI_OP_BASE_TYPE_2REAL */ \
+  TWOLOC_FORTRAN_2DOUBLE_PRECISION(name),     /* OMPI_OP_BASE_TYPE_2DOUBLE_PRECISION */ \
+  TWOLOC_FORTRAN_2INTEGER(name),              /* OMPI_OP_BASE_TYPE_2INTEGER */ \
+  ompi_op_base_##name##_float_int,         /* OMPI_OP_BASE_TYPE_FLOAT_INT */ \
+  ompi_op_base_##name##_double_int,        /* OMPI_OP_BASE_TYPE_DOUBLE_INT */ \
+  ompi_op_base_##name##_long_int,          /* OMPI_OP_BASE_TYPE_LONG_INT */ \
+  ompi_op_base_##name##_2int,              /* OMPI_OP_BASE_TYPE_2INT */ \
+  ompi_op_base_##name##_short_int,         /* OMPI_OP_BASE_TYPE_SHORT_INT */ \
+  ompi_op_base_##name##_long_double_int    /* OMPI_OP_BASE_TYPE_LONG_DOUBLE_INT */
+
+#define TWOLOC_3BUFF(name) \
+  TWOLOC_FORTRAN_2REAL_3BUFF(name),                 /* OMPI_OP_BASE_TYPE_2REAL */ \
+  TWOLOC_FORTRAN_2DOUBLE_PRECISION_3BUFF(name),     /* OMPI_OP_BASE_TYPE_2DOUBLE_PRECISION */ \
+  TWOLOC_FORTRAN_2INTEGER_3BUFF(name),              /* OMPI_OP_BASE_TYPE_2INTEGER */ \
+  ompi_op_base_3buff_##name##_float_int,         /* OMPI_OP_BASE_TYPE_FLOAT_INT */ \
+  ompi_op_base_3buff_##name##_double_int,        /* OMPI_OP_BASE_TYPE_DOUBLE_INT */ \
+  ompi_op_base_3buff_##name##_long_int,          /* OMPI_OP_BASE_TYPE_LONG_INT */ \
+  ompi_op_base_3buff_##name##_2int,              /* OMPI_OP_BASE_TYPE_2INT */ \
+  ompi_op_base_3buff_##name##_short_int,         /* OMPI_OP_BASE_TYPE_SHORT_INT */ \
+  ompi_op_base_3buff_##name##_long_double_int    /* OMPI_OP_BASE_TYPE_LONG_DOUBLE_INT */
+
+#define TWOLOC_NULL \
+  NULL, /* OMPI_OP_BASE_TYPE_2REAL */\
+  NULL, /* OMPI_OP_BASE_TYPE_2DOUBLE_PRECISION */ \
+  NULL, /* OMPI_OP_BASE_TYPE_2INTEGER */ \
+  NULL, /* OMPI_OP_BASE_TYPE_FLOAT_INT */ \
+  NULL, /* OMPI_OP_BASE_TYPE_DOUBLE_INT */ \
+  NULL, /* OMPI_OP_BASE_TYPE_LONG_INT */ \
+  NULL, /* OMPI_OP_BASE_TYPE_2INT */ \
+  NULL, /* OMPI_OP_BASE_TYPE_SHORT_INT */ \
+  NULL  /* OMPI_OP_BASE_TYPE_LONG_DOUBLE_INT */
+
+#define TWOLOC_NULL_3BUFF \
+  NULL, /* OMPI_OP_BASE_TYPE_2REAL */\
+  NULL, /* OMPI_OP_BASE_TYPE_2DOUBLE_PRECISION */ \
+  NULL, /* OMPI_OP_BASE_TYPE_2INTEGER */ \
+  NULL, /* OMPI_OP_BASE_TYPE_FLOAT_INT */ \
+  NULL, /* OMPI_OP_BASE_TYPE_DOUBLE_INT */ \
+  NULL, /* OMPI_OP_BASE_TYPE_LONG_INT */ \
+  NULL, /* OMPI_OP_BASE_TYPE_2INT */ \
+  NULL, /* OMPI_OP_BASE_TYPE_SHORT_INT */ \
+  NULL  /* OMPI_OP_BASE_TYPE_LONG_DOUBLE_INT */
+
+
+/*
+ * MPI_OP_NULL
+ * All types
+ */
+#define FLAGS_NO_FLOAT \
+    (OMPI_OP_FLAGS_INTRINSIC | OMPI_OP_FLAGS_ASSOC | OMPI_OP_FLAGS_COMMUTE)
+#define FLAGS \
+    (OMPI_OP_FLAGS_INTRINSIC | OMPI_OP_FLAGS_ASSOC | \
+     OMPI_OP_FLAGS_FLOAT_ASSOC | OMPI_OP_FLAGS_COMMUTE)
+
+ompi_op_base_handler_fn_t ompi_op_base_functions[OMPI_OP_BASE_FORTRAN_OP_MAX][OMPI_OP_BASE_TYPE_MAX] = 
+    {
+        /* Corresponds to MPI_OP_NULL */
+        {
+            /* Leaving this empty puts in NULL for all entries */
+            NULL,
+        },
+        /* Corresponds to MPI_MAX */
+        {
+            C_INTEGER(max),
+            FORTRAN_INTEGER(max),
+            FLOATING_POINT(max),
+            LOGICAL_NULL,
+            COMPLEX_NULL,
+            BYTE_NULL,
+            TWOLOC_NULL
+        },
+        /* Corresponds to MPI_MIN */
+        {
+            C_INTEGER(min),
+            FORTRAN_INTEGER(min),
+            FLOATING_POINT(min),
+            LOGICAL_NULL,
+            COMPLEX_NULL,
+            BYTE_NULL,
+            TWOLOC_NULL
+        },
+        /* Corresponds to MPI_SUM */
+        {
+            C_INTEGER(sum),
+            FORTRAN_INTEGER(sum),
+            FLOATING_POINT(sum),
+            LOGICAL_NULL,
+            COMPLEX(sum),
+            BYTE_NULL,
+            TWOLOC_NULL
+        },
+        /* Corresponds to MPI_PROD */
+        {
+            C_INTEGER(prod),
+            FORTRAN_INTEGER(prod),
+            FLOATING_POINT(prod),
+            LOGICAL_NULL,
+            COMPLEX(prod),
+            BYTE_NULL,
+            TWOLOC_NULL
+        },
+        /* Corresponds to MPI_LAND */
+        {
+            C_INTEGER(land),
+            FORTRAN_INTEGER_NULL,
+            FLOATING_POINT_NULL,
+            LOGICAL(land),
+            COMPLEX_NULL,
+            BYTE_NULL,
+            TWOLOC_NULL
+        },
+        /* Corresponds to MPI_BAND */
+        {
+            C_INTEGER(band),
+            FORTRAN_INTEGER(band),
+            FLOATING_POINT_NULL,
+            LOGICAL_NULL,
+            COMPLEX_NULL,
+            BYTE(band),
+            TWOLOC_NULL
+        },
+        /* Corresponds to MPI_LOR */
+        {
+            C_INTEGER(lor),
+            FORTRAN_INTEGER_NULL,
+            FLOATING_POINT_NULL,
+            LOGICAL(lor),
+            COMPLEX_NULL,
+            BYTE_NULL,
+            TWOLOC_NULL
+        },
+        /* Corresponds to MPI_BOR */
+        {
+            C_INTEGER(bor),
+            FORTRAN_INTEGER(bor),
+            FLOATING_POINT_NULL,
+            LOGICAL_NULL,
+            COMPLEX_NULL,
+            BYTE(bor),
+            TWOLOC_NULL
+        },
+        /* Corresponds to MPI_LXOR */
+        {
+            C_INTEGER(lxor),
+            FORTRAN_INTEGER_NULL,
+            FLOATING_POINT_NULL,
+            LOGICAL(lxor),
+            COMPLEX_NULL,
+            BYTE_NULL,
+            TWOLOC_NULL
+        },
+        /* Corresponds to MPI_BXOR */
+        {
+            C_INTEGER(bxor),
+            FORTRAN_INTEGER(bxor),
+            FLOATING_POINT_NULL,
+            LOGICAL_NULL,
+            COMPLEX_NULL,
+            BYTE(bxor),
+            TWOLOC_NULL
+        },
+        /* Corresponds to MPI_MAXLOC */
+        {
+            C_INTEGER_NULL,
+            FORTRAN_INTEGER_NULL,
+            FLOATING_POINT_NULL,
+            LOGICAL_NULL,
+            COMPLEX_NULL,
+            BYTE_NULL,
+            TWOLOC(maxloc),
+        },
+        /* Corresponds to MPI_MINLOC */
+        {
+            C_INTEGER_NULL,
+            FORTRAN_INTEGER_NULL,
+            FLOATING_POINT_NULL,
+            LOGICAL_NULL,
+            COMPLEX_NULL,
+            BYTE_NULL,
+            TWOLOC(minloc),
+        },
+        /* Corresponds to MPI_REPLACE */
+        {
+            /* (MPI_ACCUMULATE is handled differently than the other
+               reductions, so just zero out its function
+               impementations here to ensure that users don't invoke
+               MPI_REPLACE with any reduction operations other than
+               ACCUMULATE) */
+            NULL,
+        },
+
+    };
+
+
+ompi_op_base_3buff_handler_fn_t ompi_op_base_3buff_functions[OMPI_OP_BASE_FORTRAN_OP_MAX][OMPI_OP_BASE_TYPE_MAX] = 
+    {
+        /* Corresponds to MPI_OP_NULL */
+        {
+            /* Leaving this empty puts in NULL for all entries */
+            NULL,
+        },
+        /* Corresponds to MPI_MAX */
+        {
+            C_INTEGER_3BUFF(max),
+            FORTRAN_INTEGER_3BUFF(max),
+            FLOATING_POINT_3BUFF(max),
+            LOGICAL_NULL_3BUFF,
+            COMPLEX_NULL_3BUFF,
+            BYTE_NULL_3BUFF,
+            TWOLOC_NULL_3BUFF
+        },
+        /* Corresponds to MPI_MIN */
+        {
+            C_INTEGER_3BUFF(min),
+            FORTRAN_INTEGER_3BUFF(min),
+            FLOATING_POINT_3BUFF(min),
+            LOGICAL_NULL_3BUFF,
+            COMPLEX_NULL_3BUFF,
+            BYTE_NULL_3BUFF,
+            TWOLOC_NULL_3BUFF
+        },
+        /* Corresponds to MPI_SUM */
+        {
+            C_INTEGER_3BUFF(sum),
+            FORTRAN_INTEGER_3BUFF(sum),
+            FLOATING_POINT_3BUFF(sum),
+            LOGICAL_NULL_3BUFF,
+            COMPLEX_3BUFF(sum),
+            BYTE_NULL_3BUFF,
+            TWOLOC_NULL_3BUFF
+        },
+        /* Corresponds to MPI_PROD */
+        {
+            C_INTEGER_3BUFF(prod),
+            FORTRAN_INTEGER_3BUFF(prod),
+            FLOATING_POINT_3BUFF(prod),
+            LOGICAL_NULL_3BUFF,
+            COMPLEX_3BUFF(prod),
+            BYTE_NULL_3BUFF,
+            TWOLOC_NULL_3BUFF
+        },
+        /* Corresponds to MPI_LAND */
+        {
+            C_INTEGER_3BUFF(land),
+            FORTRAN_INTEGER_NULL_3BUFF,
+            FLOATING_POINT_NULL_3BUFF,
+            LOGICAL_3BUFF(land),
+            COMPLEX_NULL_3BUFF,
+            BYTE_NULL_3BUFF,
+            TWOLOC_NULL_3BUFF
+        },
+        /* Corresponds to MPI_BAND */
+        {
+            C_INTEGER_3BUFF(band),
+            FORTRAN_INTEGER_3BUFF(band),
+            FLOATING_POINT_NULL_3BUFF,
+            LOGICAL_NULL_3BUFF,
+            COMPLEX_NULL_3BUFF,
+            BYTE_3BUFF(band),
+            TWOLOC_NULL_3BUFF
+        },
+        /* Corresponds to MPI_LOR */
+        {
+            C_INTEGER_3BUFF(lor),
+            FORTRAN_INTEGER_NULL_3BUFF,
+            FLOATING_POINT_NULL_3BUFF,
+            LOGICAL_3BUFF(lor),
+            COMPLEX_NULL_3BUFF,
+            BYTE_NULL_3BUFF,
+            TWOLOC_NULL_3BUFF
+        },
+        /* Corresponds to MPI_BOR */
+        {
+            C_INTEGER_3BUFF(bor),
+            FORTRAN_INTEGER_3BUFF(bor),
+            FLOATING_POINT_NULL_3BUFF,
+            LOGICAL_NULL_3BUFF,
+            COMPLEX_NULL_3BUFF,
+            BYTE_3BUFF(bor),
+            TWOLOC_NULL_3BUFF
+        },
+        /* Corresponds to MPI_LXOR */
+        {
+            C_INTEGER_3BUFF(lxor),
+            FORTRAN_INTEGER_NULL_3BUFF,
+            FLOATING_POINT_NULL_3BUFF,
+            LOGICAL_3BUFF(lxor),
+            COMPLEX_NULL_3BUFF,
+            BYTE_NULL_3BUFF,
+            TWOLOC_NULL_3BUFF
+        },
+        /* Corresponds to MPI_BXOR */
+        {
+            C_INTEGER_3BUFF(bxor),
+            FORTRAN_INTEGER_3BUFF(bxor),
+            FLOATING_POINT_NULL_3BUFF,
+            LOGICAL_NULL_3BUFF,
+            COMPLEX_NULL_3BUFF,
+            BYTE_3BUFF(bxor),
+            TWOLOC_NULL_3BUFF
+        },
+        /* Corresponds to MPI_MAXLOC */
+        {
+            C_INTEGER_NULL_3BUFF,
+            FORTRAN_INTEGER_NULL_3BUFF,
+            FLOATING_POINT_NULL_3BUFF,
+            LOGICAL_NULL_3BUFF,
+            COMPLEX_NULL_3BUFF,
+            BYTE_NULL_3BUFF,
+            TWOLOC_3BUFF(maxloc),
+        },
+        /* Corresponds to MPI_MINLOC */
+        {
+            C_INTEGER_NULL_3BUFF,
+            FORTRAN_INTEGER_NULL_3BUFF,
+            FLOATING_POINT_NULL_3BUFF,
+            LOGICAL_NULL_3BUFF,
+            COMPLEX_NULL_3BUFF,
+            BYTE_NULL_3BUFF,
+            TWOLOC_3BUFF(minloc),
+        },
+        /* Corresponds to MPI_REPLACE */
+        {
+            /* MPI_ACCUMULATE is handled differently than the other
+               reductions, so just zero out its function
+               impementations here to ensure that users don't invoke
+               MPI_REPLACE with any reduction operations other than
+               ACCUMULATE */
+            NULL,
+        },
+    };
+
--- a/ompi/mca/op/base/op_base_op_select.c
+++ b/ompi/mca/op/base/op_base_op_select.c
@ -0,0 +1,350 @@
+/*
+ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2005 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2007      Lawrence Livermore National Security, LLC.  All
+ *                         rights reserved.
+ * Copyright (c) 2008      Sun Microsystems, Inc.  All rights reserved.
+ * Copyright (c) 2008-2009 Cisco Systems, Inc.  All rights reserved.
+ * $COPYRIGHT$
+ * 
+ * Additional copyrights may follow
+ * 
+ * $HEADER$
+ */
+
+#include "ompi_config.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "mpi.h"
+#include "ompi/constants.h"
+
+#include "opal/util/argv.h"
+#include "opal/class/opal_list.h"
+#include "opal/class/opal_object.h"
+#include "opal/mca/mca.h"
+#include "opal/mca/base/base.h"
+
+#include "orte/util/show_help.h"
+
+#include "ompi/op/op.h"
+#include "ompi/mca/op/op.h"
+#include "ompi/mca/op/base/base.h"
+#include "ompi/mca/op/base/functions.h"
+
+
+/*
+ * Local types
+ */
+typedef struct avail_op_t {
+    opal_list_item_t super;
+
+    int ao_priority;
+    ompi_op_base_module_1_0_0_t *ao_module;
+} avail_op_t;
+
+
+/*
+ * Local functions
+ */
+static opal_list_t *check_components(opal_list_t *components, 
+                                     ompi_op_t *op);
+static int check_one_component(ompi_op_t *op, 
+                               const mca_base_component_t *component,
+                               ompi_op_base_module_1_0_0_t **module);
+
+static int query(const mca_base_component_t *component, 
+                 ompi_op_t *op, int *priority,
+                 ompi_op_base_module_1_0_0_t **module);
+
+static int query_1_0_0(const ompi_op_base_component_1_0_0_t *op_component, 
+                       ompi_op_t *op, int *priority,
+                       ompi_op_base_module_1_0_0_t **module);
+
+/*
+ * Stuff for the OBJ interface
+ */
+static OBJ_CLASS_INSTANCE(avail_op_t, opal_list_item_t, NULL, NULL);
+
+
+/*
+ * This function is called at the initialization time of every
+ * *intrinsic* MPI_Op (it is *not* used for user-defined MPI_Ops!).
+ * It is used to select which op component(s) will be active for a
+ * given MPI_Op.
+ *
+ * This selection logic is not for the weak.
+ */
+int ompi_op_base_op_select(ompi_op_t *op)
+{
+    int i, ret;
+    char name[MPI_MAX_OBJECT_NAME + 32];
+    opal_list_t *selectable;
+    opal_list_item_t *item;
+    ompi_op_base_module_t *module;
+
+    /* Announce */
+    snprintf(name, sizeof(name), "%s", op->o_name);
+    name[sizeof(name) - 1] = '\0';
+    opal_output_verbose(10, ompi_op_base_output,
+                        "op:base:op_select: new op: %s", 
+                        name);
+
+    /* Make a module for all the base functions so that other modules
+       can RETAIN it (vs. having NULL for the base function modules,
+       and forcing all other modules to check for NULL before calling
+       RETAIN). */
+    module = OBJ_NEW(ompi_op_base_module_t);
+
+    /* Initialize all functions to point to the corresponding base
+       functions.  Set the corresponding module pointers to NULL,
+       indicating that these are base functions with no corresponding
+       module. */
+    memset(&op->o_func, 0, sizeof(op->o_func));
+    memset(&op->o_3buff_intrinsic, 0, sizeof(op->o_3buff_intrinsic));
+    for (i = 0; i < OMPI_OP_BASE_TYPE_MAX; ++i) {
+        op->o_func.intrinsic.fns[i] = 
+            ompi_op_base_functions[op->o_f_to_c_index][i];
+        op->o_func.intrinsic.modules[i] = module;
+        OBJ_RETAIN(module);
+        op->o_3buff_intrinsic.fns[i] =
+            ompi_op_base_3buff_functions[op->o_f_to_c_index][i];
+        op->o_3buff_intrinsic.modules[i] = module;
+        OBJ_RETAIN(module);
+    }
+
+    /* Offset the initial OBJ_NEW */
+    OBJ_RELEASE(module);
+
+    /* Check for any components that want to run.  It's not an error
+       if there are none; we'll just use all the base functions in
+       this case. */
+    opal_output_verbose(10, ompi_op_base_output, 
+                        "op:base:op_select: Checking all available components");
+    selectable = check_components(&ompi_op_base_components_available, op);
+
+    /* Do the selection loop.  The selectable list is in priority
+       order; lowest priority first. */
+    for (item = opal_list_remove_first(selectable);
+         NULL != item; 
+         item = opal_list_remove_first(selectable)) {
+        avail_op_t *avail = (avail_op_t*) item;
+
+        /* Enable the module */
+        if (NULL != avail->ao_module->opm_enable) {
+            ret = avail->ao_module->opm_enable(avail->ao_module, op);
+            if (OMPI_SUCCESS != ret) {
+                /* If the module fails to enable, just release it and move
+                   on */
+                OBJ_RELEASE(avail->ao_module);
+                OBJ_RELEASE(avail);
+                continue;
+            }
+        }
+
+        /* Copy over the non-NULL pointers */
+        for (i = 0; i < OMPI_OP_BASE_TYPE_MAX; ++i) {
+            /* 2-buffer variants */
+            if (NULL != avail->ao_module->opm_fns[i]) {
+                if (NULL != op->o_func.intrinsic.modules[i]) {
+                    OBJ_RELEASE(op->o_func.intrinsic.modules[i]);
+                }
+                op->o_func.intrinsic.fns[i] = avail->ao_module->opm_fns[i];
+                op->o_func.intrinsic.modules[i] = avail->ao_module;
+                OBJ_RETAIN(avail->ao_module);
+            }
+
+            /* 3-buffer variants */
+            if (NULL != avail->ao_module->opm_3buff_fns[i]) {
+                if (NULL != op->o_3buff_intrinsic.modules[i]) {
+                    OBJ_RELEASE(op->o_func.intrinsic.modules[i]);
+                }
+                op->o_3buff_intrinsic.fns[i] = 
+                    avail->ao_module->opm_3buff_fns[i];
+                op->o_3buff_intrinsic.modules[i] = avail->ao_module;
+                OBJ_RETAIN(avail->ao_module);
+            }
+        }
+
+        /* release the original module reference and the list item */
+        OBJ_RELEASE(avail->ao_module);
+        OBJ_RELEASE(avail);
+    }
+
+    /* Done with the list from the check_components() call so release it. */
+    OBJ_RELEASE(selectable);
+
+    /* Sanity check: for intrinsic MPI_Ops, we should have exactly the
+       same pointers non-NULL as the corresponding initial table row
+       in ompi_op_base_functions / ompi_op_base_3buff_functions.  The
+       values may be different, of course, but the pattern of
+       NULL/non-NULL should be exactly the same. */
+    for (i = 0; i < OMPI_OP_BASE_TYPE_MAX; ++i) {
+        if ((NULL == ompi_op_base_functions[op->o_f_to_c_index][i] &&
+             NULL != op->o_func.intrinsic.fns[i]) ||
+            (NULL != ompi_op_base_functions[op->o_f_to_c_index][i] &&
+             NULL == op->o_func.intrinsic.fns[i])) {
+            /* Oops -- we found a mismatch.  This shouldn't happen; so
+               go release everything and return an error (yes, re-use
+               the "i" index because we're going to return without
+               completing the outter loop). */
+            for (i = 0; i < OMPI_OP_BASE_TYPE_MAX; ++i) {
+                if (NULL != op->o_func.intrinsic.modules[i]) {
+                    OBJ_RELEASE(op->o_func.intrinsic.modules[i]);
+                    op->o_func.intrinsic.modules[i] = NULL;
+                }
+                op->o_func.intrinsic.fns[i] = NULL;
+                return OMPI_ERR_NOT_FOUND;
+            }
+        }
+    }
+
+    return OMPI_SUCCESS;
+}
+
+
+/*
+ * For each module in the list, check and see if it wants to run, and
+ * do the resulting priority comparison.  Make a list of modules to be
+ * only those who returned that they want to run, and put them in
+ * priority order (lowest to highest).
+ */
+static opal_list_t *check_components(opal_list_t *components, 
+                                     ompi_op_t *op)
+{
+    int priority;
+    const mca_base_component_t *component;
+    opal_list_item_t *item, *item2;
+    ompi_op_base_module_1_0_0_t *module;
+    opal_list_t *selectable;
+    avail_op_t *avail, *avail2;
+  
+    /* Make a list of the components that query successfully */
+    selectable = OBJ_NEW(opal_list_t);
+
+    /* Scan through the list of components.  This nested loop is O(N^2),
+       but we should never have too many components and/or names, so this
+       *hopefully* shouldn't matter... */
+  
+    for (item = opal_list_get_first(components); 
+         item != opal_list_get_end(components); 
+         item = opal_list_get_next(item)) {
+        component = ((mca_base_component_priority_list_item_t *) 
+                     item)->super.cli_component;
+
+        priority = check_one_component(op, component, &module);
+        if (priority >= 0) {
+            
+            /* We have a component that indicated that it wants to run by
+               giving us a module */
+            avail = OBJ_NEW(avail_op_t);
+            avail->ao_priority = priority;
+            avail->ao_module = module;
+            
+            /* Put this item on the list in priority order (lowest
+               priority first).  Should it go first? */
+            for (item2 = opal_list_get_first(selectable);
+                 item2 != opal_list_get_end(selectable);
+                 item2 = opal_list_get_next(item2)) {
+                avail2 = (avail_op_t*)item2;
+                if(avail->ao_priority < avail2->ao_priority) {
+                    opal_list_insert_pos(selectable,
+                                         item2, (opal_list_item_t*)avail);
+                    break;
+                }
+            }
+            
+            if (opal_list_get_end(selectable) == item2) {
+                opal_list_append(selectable, (opal_list_item_t*)avail);
+            }
+        }
+    }
+
+    /* All done (even if the list is empty; that's ok) */
+    return selectable;
+}
+
+
+/*
+ * Check a single component
+ */
+static int check_one_component(ompi_op_t *op, 
+                               const mca_base_component_t *component,
+                               ompi_op_base_module_1_0_0_t **module)
+{
+    int err;
+    int priority = -1;
+
+    err = query(component, op, &priority, module);
+
+    if (OMPI_SUCCESS == err) {
+        priority = (priority < 100) ? priority : 100;
+        opal_output_verbose(10, ompi_op_base_output, 
+                            "op:base:op_select: component available: %s, priority: %d", 
+                            component->mca_component_name, priority);
+
+    } else {
+        priority = -1;
+        opal_output_verbose(10, ompi_op_base_output, 
+                            "op:base:op_select: component not available: %s",
+                            component->mca_component_name);
+    }
+
+    return priority;
+}
+
+
+/**************************************************************************
+ * Query functions
+ **************************************************************************/
+
+/*
+ * Take any version of a op module, query it, and return the right
+ * module struct
+ */
+static int query(const mca_base_component_t *component, 
+                 ompi_op_t *op, 
+                 int *priority, ompi_op_base_module_1_0_0_t **module)
+{
+    *module = NULL;
+    if (1 == component->mca_type_major_version &&
+        0 == component->mca_type_minor_version &&
+        0 == component->mca_type_release_version) {
+        const ompi_op_base_component_1_0_0_t *op100 = 
+            (ompi_op_base_component_1_0_0_t *) component;
+
+        return query_1_0_0(op100, op, priority, module);
+    } 
+
+    /* Unknown op API version -- return error */
+
+    return OMPI_ERROR;
+}
+
+
+static int query_1_0_0(const ompi_op_base_component_1_0_0_t *component,
+                       ompi_op_t *op, int *priority,
+                       ompi_op_base_module_1_0_0_t **module)
+{
+    ompi_op_base_module_1_0_0_t *ret;
+
+    /* There's currently no need for conversion */
+
+    ret = component->opc_op_query(op, priority);
+    if (NULL != ret) {
+        *module = ret;
+        return OMPI_SUCCESS;
+    }
+
+    return OMPI_ERROR;
+}
--- a/ompi/mca/op/base/op_base_open.c
+++ b/ompi/mca/op/base/op_base_open.c
@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2005 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2008-2009 Cisco Systems, Inc.  All rights reserved.
+ * $COPYRIGHT$
+ * 
+ * Additional copyrights may follow
+ * 
+ * $HEADER$
+ */
+
+
+#include "ompi_config.h"
+
+#include "opal/util/output.h"
+#include "opal/mca/mca.h"
+#include "opal/mca/base/base.h"
+#include "opal/mca/base/mca_base_param.h"
+
+#include "ompi/constants.h"
+#include "ompi/mca/op/op.h"
+#include "ompi/mca/op/base/base.h"
+
+
+/*
+ * The following file was created by configure.  It contains extern
+ * statements and the definition of an array of pointers to each
+ * component's public mca_base_component_t struct.
+ */
+#include "ompi/mca/op/base/static-components.h"
+
+
+/*
+ * Globals
+ */
+int ompi_op_base_output = -1;
+bool ompi_op_base_components_opened_valid = false;
+opal_list_t ompi_op_base_components_opened;
+
+static void module_constructor(ompi_op_base_module_t *m)
+{
+    m->opm_enable = NULL;
+    m->opm_op = NULL;
+    memset(&(m->opm_fns), 0, sizeof(m->opm_fns));
+    memset(&(m->opm_3buff_fns), 0, sizeof(m->opm_3buff_fns));
+}
+
+static void module_constructor_1_0_0(ompi_op_base_module_1_0_0_t *m)
+{
+    m->opm_enable = NULL;
+    m->opm_op = NULL;
+    memset(&(m->opm_fns), 0, sizeof(m->opm_fns));
+    memset(&(m->opm_3buff_fns), 0, sizeof(m->opm_3buff_fns));
+}
+
+OBJ_CLASS_INSTANCE(ompi_op_base_module_t, opal_object_t, 
+                   module_constructor, NULL);
+OBJ_CLASS_INSTANCE(ompi_op_base_module_1_0_0_t, opal_object_t, 
+                   module_constructor_1_0_0, NULL);
+
+/*
+ * Function for finding and opening either all MCA components, or the one
+ * that was specifically requested via a MCA parameter.
+ */
+int ompi_op_base_open(void)
+{
+    int value;
+
+    /* Debugging / verbose output */
+
+    mca_base_param_reg_int_name("op", "base_verbose",
+                                "Verbosity level of the op framework",
+                                false, false, 0, &value);
+    if (0 != value) {
+        ompi_op_base_output = opal_output_open(NULL);
+    } else {
+        ompi_op_base_output = -1;
+    }
+
+    /* Open up all available components */
+
+    if (OPAL_SUCCESS !=
+        mca_base_components_open("op", ompi_op_base_output,
+                                 mca_op_base_static_components,
+                                 &ompi_op_base_components_opened, true)) {
+        return OPAL_ERROR;
+    }
+    ompi_op_base_components_opened_valid = true;
+
+    /* All done */
+
+    return OPAL_SUCCESS;
+}
--- a/ompi/mca/op/example/.ompi_ignore
+++ b/ompi/mca/op/example/.ompi_ignore
--- a/ompi/mca/op/example/Makefile.am
+++ b/ompi/mca/op/example/Makefile.am
@ -0,0 +1,81 @@
+#
+# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+#                         University Research and Technology
+#                         Corporation.  All rights reserved.
+# Copyright (c) 2004-2005 The University of Tennessee and The University
+#                         of Tennessee Research Foundation.  All rights
+#                         reserved.
+# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+#                         University of Stuttgart.  All rights reserved.
+# Copyright (c) 2004-2005 The Regents of the University of California.
+#                         All rights reserved.
+# Copyright (c) 2008-2009 Cisco Systems, Inc.  All rights reserved.
+# $COPYRIGHT$
+# 
+# Additional copyrights may follow
+# 
+# $HEADER$
+#
+
+# This is an example op component.  This Makefile.am is a typical
+# example of how to integrate into Open MPI's Automake-based build
+# system.
+#
+# See https://svn.open-mpi.org/trac/ompi/wiki/devel/CreateComponent
+# for more details on how to make Open MPI components.
+
+# First, list all .h and .c sources.  It is necessary to list all .h
+# files so that they will be picked up in the distribution tarball.
+
+sources = \
+    op_example.h \
+    op_example_component.c \
+    op_example_module_bxor.c \
+    op_example_module_max.c
+
+# Open MPI components can be compiled two ways:
+#
+# 1. As a standalone dynamic shared object (DSO), sometimes called a
+# dynamically loadable library (DLL).
+#
+# 2. As a static library that is slurped up into the upper-level
+# libmpi library (regardless of whether libmpi is a static or dynamic
+# library).  This is called a "Libtool convenience library".
+#
+# The component needs to create an output library in this top-level
+# component directory, and named either mca_<type>_<name>.la (for DSO
+# builds) or libmca_<type>_<name>.la (for static builds).  The OMPI
+# build system will have set the
+# OMPI_BUILD_<framework>_<component>_DSO AM_CONDITIONAL to indicate
+# which way this component should be built.
+
+if OMPI_BUILD_op_example_DSO
+lib =
+lib_sources =
+component = mca_op_example.la
+component_sources = $(sources)
+else
+lib = libmca_op_example.la
+lib_sources = $(sources)
+component =
+component_sources =
+endif
+
+# Specific information for DSO builds.
+#
+# The DSO should install itself in $(pkglibdir) (by default,
+# $prefix/lib/openmpi).
+
+mcacomponentdir = $(pkglibdir)
+mcacomponent_LTLIBRARIES = $(component)
+mca_op_example_la_SOURCES = $(component_sources)
+mca_op_example_la_LDFLAGS = -module -avoid-version
+
+# Specific information for static builds.  
+#
+# Note that we *must* "noinst"; the upper-layer Makefile.am's will
+# slurp in the resulting .la library into libmpi.
+
+noinst_LTLIBRARIES = $(lib)
+libmca_op_example_la_SOURCES = $(lib_sources)
+libmca_op_example_la_LDFLAGS = -module -avoid-version
--- a/ompi/mca/op/example/README.txt
+++ b/ompi/mca/op/example/README.txt
@ -0,0 +1,128 @@
+Copyright 2009 Cisco Systems, Inc.  All rights reserved.
+
+This is a simple example op component meant to be a template /
+springboard for people to write their own op components.  There are
+many different ways to write components and modules; this is but one
+example.
+
+Before reading this example, note that it is probably more complicated
+that many op components need to be.  It was done this was
+intentionally to show many different OMPI concepts.  As with most
+programming examples, there are many different ways to program the
+same end effect.  Feel free to customize / simplify / strip out what
+you don't need from this example.
+
+This example component supports a ficticious set of hardware that
+provides acceleation for the MPI_MAX and MPI_BXOR MPI_Ops.  The
+ficticious hardware has multiple versions, too: some versions only
+support single precision floating point types for MAX and single
+precision integer types for BXOR, whereas later versions support both
+single and double precision floating point types for MAX and both
+single and double precision integer types for BXOR.  Hence, this
+example walks through setting up particular MPI_Op function pointers
+based on:
+
+a) hardware availability (e.g., does the node where this MPI process
+   is running have the relevant hardware/resources?)
+
+b) MPI_Op (e.g., in this example, only MPI_MAX and MPI_BXOR are
+   supported)
+
+c) datatype (e.g., single/double precision floating point for MAX and
+   single/double precision integer for BXOR)
+
+Additionally, there are other considerations that should be factored
+in at run time.  Hardware accelerators are great, but they do induce
+overhead -- for example, some accelerator hardware require registered
+memory.  So even if a particular MPI_Op and datatype are supported, it
+may not be worthwhile to use the hardware unless the amount of data to
+be processed is "big enough" (meaning that the cost of the
+registration and/or copy-in/copy-out is ameliorated) or the memory to
+be processed is already registered or is otherwise local to the the
+accelerator hardware.  
+
+Hence, at run-time, the module may choose to use the accelerator
+hardware or fail over to a "basic" version of the operation.  This
+failover strategy is well-supported by the op framework; during the
+query process, a component can "stack" itself similar to how POSIX
+signal handlers can be stacked.  Specifically, op components can cache
+other implementations of operation functions for use in the case of
+failover.  The MAX and BXOR module implementations show one way of
+using this method.
+
+Here's a listing of the files in the example component and what they
+do:
+
+- configure.params: A meta data file that specifies which files need
+  to be generated by OMPI's top-level configure file.  This file must
+  be present.
+- configure.m4: Tests that get slurped into OMPI's top-level configure
+  script to determine whether this component will be built or not.
+- Makefile.am: Automake makefile that builds this component.
+- op_example_component.c: The main "component" source file.
+- op_example_module.c: The main "module" source file.
+- op_example.h: information that is shared between the .c files.
+- .ompi_ignore: the presence of this file causes OMPI's autogen.sh to
+  skip this component in the configure/build/install process (see
+  below).
+
+To use this example as a template for your component (assume your new
+component is named "foo"):
+
+shell$ cd (top_ompi_dir)/ompi/mca/op
+shell$ cp -r example foo
+shell$ cd foo
+
+Remove the .ompi_ignore file (which makes the component "visible" to
+all developers) *OR* add an .ompi_unignore file with one username per
+line (as reported by `whoami`).  OMPI's autogen.sh will skip any
+component with a .ompi_ignore file *unless* there is also an
+.ompi_unignore file containing your user ID in it.  This is a handy
+mechanism to have a component in the tree but have it not built / used
+by most other developers:
+
+shell$ rm .ompi_ignore
+*OR*
+shell$ whoami > .ompi_unignore
+
+Now rename any file that contains "example" in the filename to have
+"foo", instead.  For example:
+
+shell$ mv op_example_component.c op_foo_component.c
+#...etc.
+
+Now edit all the files and s/example/foo/gi.  Specifically, replace
+all instances of "example" with "foo" in all function names, type
+names, header #defines, strings, and global variables.
+
+Now your component should be fully functional (although entirely
+renamed as "foo" instead of "example").  You can go to the top-level
+OMPI directory and run "autogen.sh" (which will find your component
+and att it to the configure/build process) and then "configure ..."
+and "make ..." as normal.  
+
+shell$ cd (top_ompi_dir)
+shell$ ./autogen.sh
+# ...lots of output...
+shell$ ./configure ...
+# ...lots of output...
+shell$ make -j 4 all
+# ...lots of output...
+shell$ make install
+# ...lots of output...
+
+After you have installed Open MPI, running "ompi_info" should show
+your "foo" component in the output.  
+
+shell$ ompi_info | grep op:
+                  MCA op: example (MCA v2.0, API v1.0, Component v1.4)
+                  MCA op: foo (MCA v2.0, API v1.0, Component v1.4)
+shell$
+
+If you do not see your foo component, check the above steps, and check
+the output of autogen.sh, configure, and make to ensure that "foo" was
+found, configured, and built successfully.
+
+Once ompi_info sees your component, start editing the "foo" component
+files in a meaningful way.  
+
--- a/ompi/mca/op/example/configure.m4
+++ b/ompi/mca/op/example/configure.m4
@ -0,0 +1,70 @@
+# -*- shell-script -*-
+#
+# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+#                         University Research and Technology
+#                         Corporation.  All rights reserved.
+# Copyright (c) 2004-2005 The University of Tennessee and The University
+#                         of Tennessee Research Foundation.  All rights
+#                         reserved.
+# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+#                         University of Stuttgart.  All rights reserved.
+# Copyright (c) 2004-2005 The Regents of the University of California.
+#                         All rights reserved.
+# Copyright (c) 2008-2009 Cisco Systems, Inc.  All rights reserved.
+# $COPYRIGHT$
+# 
+# Additional copyrights may follow
+# 
+# $HEADER$
+#
+
+# Example op component configure.m4 file.  This file is slurped in by
+# Open MPI's autogen.sh to be part of the top-level configure script.
+# This script must define (via AC_DEFUN) an m4 macro named
+# MCA_<framework>_<component>_CONFIG that executes either $1 if the
+# component wants to build itself, or $2 if the component does not
+# want to build itself.  
+
+# Do *NOT* invoke AC_MSG_ERROR, or any other macro that will abort
+# configure, except upon catastrophic error.  For example, it *is* a
+# catastropic error if the user specifically requested your component
+# but it cannot be built.  If it *not* a catastropic error if your
+# component cannot be built (but was not specifically requested).
+
+# See https://svn.open-mpi.org/trac/ompi/wiki/devel/CreateComponent
+# for more details on how to make Open MPI components.
+
+# MCA_op_example_CONFIG([action-if-found], [action-if-not-found])
+# -----------------------------------------------------------
+AC_DEFUN([MCA_op_example_CONFIG],[
+
+    # Add checks here for any necessary header files and/or libraries
+    # that must be present to compile your component.  
+
+    # This example performs a fairly simple test (checking for the
+    # "struct sockaddr_in" C type), just for the sake of showing you
+    # one test and executing either $1 or $2, depending on the output
+    # of the test.
+
+    # check for sockaddr_in (a good sign we have TCP)
+    AC_CHECK_TYPES([struct sockaddr_in], 
+                   [$1],
+                   [$2], 
+                   [AC_INCLUDES_DEFAULT
+#ifdef HAVE_NETINET_IN_H
+#include <netinet/in.h>
+#endif])
+
+    # Let's pretend that we found version A.B.C of the "libfoo"
+    # support library that is necessary to compile/link this
+    # component.  We'll AC_DEFINE the A, B, and C values so that they
+    # can be printed as information MCA parameters via ompi_info.  See
+    # op_example_component.c to see how these values are used.
+
+    AC_DEFINE_UNQUOTED(OP_EXAMPLE_LIBFOO_VERSION_MAJOR, ["17"],
+                       [Major version number of the "libfoo" library])
+    AC_DEFINE_UNQUOTED(OP_EXAMPLE_LIBFOO_VERSION_MINOR, ["38"],
+                       [Minor version number of the "libfoo" library])
+    AC_DEFINE_UNQUOTED(OP_EXAMPLE_LIBFOO_VERSION_RELEASE, ["4"],
+                       [Release version number of the "libfoo" library])
+])dnl
--- a/ompi/mca/op/example/configure.params
+++ b/ompi/mca/op/example/configure.params
@ -0,0 +1,25 @@
+# -*- shell-script -*-
+#
+# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+#                         University Research and Technology
+#                         Corporation.  All rights reserved.
+# Copyright (c) 2004-2005 The University of Tennessee and The University
+#                         of Tennessee Research Foundation.  All rights
+#                         reserved.
+# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+#                         University of Stuttgart.  All rights reserved.
+# Copyright (c) 2004-2005 The Regents of the University of California.
+#                         All rights reserved.
+# Copyright (c) 2007      Los Alamos National Security, LLC.  All rights
+#                         reserved. 
+# Copyright (c) 2008-2009 Cisco Systems, Inc.  All rights reserved.
+# $COPYRIGHT$
+# 
+# Additional copyrights may follow
+# 
+# $HEADER$
+#
+
+# Files for configure to generate for this component.
+
+PARAM_CONFIG_FILES="Makefile"
--- a/ompi/mca/op/example/op_example.h
+++ b/ompi/mca/op/example/op_example.h
@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2005 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2008-2009 Cisco, Inc.  All rights reserved.
+ * $COPYRIGHT$
+ * 
+ * Additional copyrights may follow
+ * 
+ * $HEADER$
+ */
+
+#ifndef MCA_OP_EXAMPLE_EXPORT_H
+#define MCA_OP_EXAMPLE_EXPORT_H
+
+#include "ompi_config.h"
+
+#include "opal/mca/mca.h"
+#include "opal/class/opal_object.h"
+
+#include "ompi/mca/op/op.h"
+
+BEGIN_C_DECLS
+
+/**
+ * Derive a struct from the base op component struct, allowing us to
+ * cache some component-specific information on our well-known
+ * component struct.
+ */
+typedef struct {
+    /** The base op component struct */
+    ompi_op_base_component_1_0_0_t super;
+
+    /* What follows is example-component-specific cached information.  We
+       tend to use this scheme (caching information on the example
+       component itself) instead of lots of individual global
+       variables for the component.  The following data fields are
+       examples; replace them with whatever is relevant for your
+       component. */
+
+    /** A simple boolean indicating that the hardware is available. */
+    bool hardware_available;
+
+    /** A simple boolean indicating whether double precision is
+        supported. */
+    bool double_supported;
+} ompi_op_example_component_t;
+
+/**
+ * Derive a struct from the base op module struct, allowing us to
+ * cache some module-specific information for BXOR.  Note that
+ * information that should be shared across all modules should be put
+ * on the example component.
+ */
+typedef struct {
+    ompi_op_base_module_1_0_0_t super;
+
+    /* Just like the ompi_op_example_component_t, this struct is meant to
+       cache information on a per-module basis.  What follows are
+       examples; replace them with whatever is relevant for your
+       component/module.  Keep in mind that there will be one distinct
+       module for each MPI_Op; you may want to have different data
+       cached on the module, depending on the MPI_Op that it is
+       supporting.  */
+    double some_bxor_data;
+} ompi_op_example_module_bxor_t;
+
+/**
+ * To use OMPI's OBJ system, you have to declare each "class".
+ */
+OBJ_CLASS_DECLARATION(ompi_op_example_module_bxor_t);
+
+/**
+ * Globally exported variable.  Note that it is a *example* component
+ * (defined above), which has the ompi_op_base_component_t as its
+ * first member.  Hence, the MCA/op framework will find the data that
+ * it expects in the first memory locations, but then the component
+ * itself can cache additional information after that that can be used
+ * by both the component and modules.
+ */
+OMPI_DECLSPEC extern ompi_op_example_component_t 
+    mca_op_example_component;
+
+/**
+ * Setup for MPI_MAX and return a module.
+ */
+OMPI_DECLSPEC ompi_op_base_module_t *
+    ompi_op_example_setup_max(ompi_op_t *op);
+
+/**
+ * Setup for MPI_BXOR and return a module.
+ */
+OMPI_DECLSPEC ompi_op_base_module_t *
+    ompi_op_example_setup_bxor(ompi_op_t *op);
+
+END_C_DECLS
+
+#endif /* MCA_OP_EXAMPLE_EXPORT_H */
--- a/ompi/mca/op/example/op_example_component.c
+++ b/ompi/mca/op/example/op_example_component.c
@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2007 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2008-2009 Cisco, Inc.  All rights reserved.
+ * $COPYRIGHT$
+ * 
+ * Additional copyrights may follow
+ * 
+ * $HEADER$
+ */
+
+/** @file
+ *
+ * This is the "example" component source code.  It contains the
+ * well-known struct that OMPI will dlsym() (or equivalent) for to
+ * find how to access the rest of the component and any modules that
+ * are created.
+ */
+
+#include "ompi_config.h"
+
+#include "opal/mca/base/mca_base_param.h"
+
+#include "ompi/constants.h"
+#include "ompi/op/op.h"
+#include "ompi/mca/op/op.h"
+#include "ompi/mca/op/base/base.h"
+#include "ompi/mca/op/example/op_example.h"
+
+static int example_component_open(void);
+static int example_component_close(void);
+static int example_component_init_query(bool enable_progress_threads,
+                                     bool enable_mpi_threads);
+static struct ompi_op_base_module_1_0_0_t *
+    example_component_op_query(struct ompi_op_t *op, int *priority);
+static int example_component_register(void);
+
+ompi_op_example_component_t mca_op_example_component = {
+    /* First, the mca_base_component_t struct containing meta
+       information about the component itself */
+    {
+        {
+            OMPI_OP_BASE_VERSION_1_0_0,
+            
+            "example",
+            OMPI_MAJOR_VERSION,
+            OMPI_MINOR_VERSION,
+            OMPI_RELEASE_VERSION,
+            example_component_open,
+            example_component_close,
+            NULL,
+            example_component_register
+        },
+        {
+            /* The component is checkpoint ready */
+            MCA_BASE_METADATA_PARAM_CHECKPOINT
+        },
+        
+        example_component_init_query,
+        example_component_op_query,
+    },
+
+    /* Now comes the example-component-specific data.  In this case,
+       we'll just leave it blank, defaulting all the values to
+       0/false/whatever.  We'll fill them in with meaningful values
+       during _component_init_query(). */
+};
+
+/*
+ * Component open
+ */
+static int example_component_open(void)
+{
+    opal_output(ompi_op_base_output, "example component open");
+
+    /* A first level check to see if example is even available in this
+       process.  E.g., you may want to do a first-order check to see
+       if hardware is available.  If so, return OMPI_SUCCESS.  If not,
+       return anything other than OMPI_SUCCESS and the component will
+       silently be ignored.
+
+       Note that if this function returns non-OMPI_SUCCESS, then this
+       component won't even be shown in ompi_info output (which is
+       probably not what you want).
+    */
+
+    return OMPI_SUCCESS;
+}
+
+
+/*
+ * Component close
+ */
+static int example_component_close(void)
+{
+    opal_output(ompi_op_base_output, "example component close");
+
+    /* If example was opened successfully, close it (i.e., release any
+       resources that may have been allocated on this component).
+       Note that _component_close() will always be called at the end
+       of the process, so it may have been after any/all of the other
+       component functions have been invoked (and possibly even after
+       modules have been created and/or destroyed). */
+
+    return OMPI_SUCCESS;
+}
+
+
+/*
+ * Register MCA params.
+ */
+static int example_component_register(void)
+{
+    int val;
+    char *str;
+
+    opal_output(ompi_op_base_output, "example component register");
+
+    /* Register any relevant MCA params.  At a minimum, perhaps some
+       information MCA params that return version and capability
+       information.  */
+
+    /* For example, let's make a string MCA information parameter
+       containing the major.minor.release version number from the
+       libfoo support library (see configure.m4 for how we got these C
+       macros). */
+    asprintf(&str, "%s.%s.%s", 
+             OP_EXAMPLE_LIBFOO_VERSION_MAJOR,
+             OP_EXAMPLE_LIBFOO_VERSION_MINOR,
+             OP_EXAMPLE_LIBFOO_VERSION_RELEASE);
+    mca_base_param_reg_string(&mca_op_example_component.super.opc_version,
+                              "libfoo_version", 
+                              "Version of the libfoo support library that this component was built against.",
+                              false, true, str, NULL);
+    free(str);
+
+    /* Additionally, since this component is simulating hardware,
+       let's make MCA params that determine whethere a) the hardware
+       is available, and b) whether double precision floating point
+       types are supported.  This allows you to change the behavior of
+       this component at run-time (by setting these MCA params at
+       run-time), simulating different kinds of hardware. */
+    mca_base_param_reg_int(&mca_op_example_component.super.opc_version,
+                           "hardware_available", 
+                           "Whether the hardware is available or not",
+                           false, false, 1, &val);
+    mca_op_example_component.hardware_available = OPAL_INT_TO_BOOL(val);
+
+    mca_base_param_reg_int(&mca_op_example_component.super.opc_version,
+                           "double_supported", 
+                           "Whether the double precision data types are supported or not",
+                           false, false, 1, &val);
+    mca_op_example_component.double_supported = OPAL_INT_TO_BOOL(val);
+
+    return OMPI_SUCCESS;
+}
+
+
+/*
+ * Query whether this component wants to be used in this process.
+ */
+static int example_component_init_query(bool enable_progress_threads,
+                                        bool enable_mpi_threads)
+{
+    opal_output(ompi_op_base_output, "example component init query");
+
+    /* Query to see if we have the desired hardware / resources to be
+       able to perform reduction operations.  This is a much more
+       comprehensive check than _component_open().
+
+       If this component can be used in this process, return
+       OMPI_SUCCESS, meaning that we'll be queried later via during
+       the MPI_Op component selection process via
+       _component_op_query().  Otherwise, return anything other than
+       OMPI_SUCCESS and this component will be silently ignored for
+       the MPI_Op component selection process.
+
+       The input parameters enable_progress_threads and
+       enable_mpi_threads also tell the component the following:
+
+       - If enable_process_threads==true, then the component is
+         allowed to have a progress thread in the background that is
+         supported by the OMPI infrastructure (i.e., all of OMPI's
+         locks and whatnot are active in this build).  Note that the
+         component can *always* have a progress thread in the
+         background regardless of the value of this parameter as lone
+         as the HAVE_THREADS macro is true and the component uses its
+         own locking schemes (i.e., does not rely on external
+         OPAL/ORTE/OMPI data structures to be thread safe).  This flag
+         simply indicates whether OPAL/ORTE/OMPI data structures are
+         multi-threaded safe and whether multi-threading sync/IPC
+         mechanisms in the OMPI code base are active.
+
+       - If enable_mpi_threads==true, the MPI_THREAD_MULTIPLE is
+         active.
+
+       Note that a component can uses these values to deactivate
+       themselves if multi-threading is not supported (keep in mind
+       that in MPI_THREAD_MULTIPLE scenarios, the same MPI_Op can be
+       used in multiple, concurrent operations in different threads).
+       Let's assume that this component does not support
+       MPI_THREAD_MULTIPLE, and will therefore deactivate itself if
+       MPI_THREAD_MULTIPLE is used.
+    */
+
+    /* Note that we used MCA parameters to fill in the
+       _component.hardware_available and _component.double_supported
+       values.  Typically, you'd probe the hardware here and fill in
+       those values instead of using MCA parameters (the MCA params
+       are only used in this example to allow simulating different
+       types of hardware). */
+
+    /* If we have the hardware and are not using MPI_THREAD_MULITPLE,
+       return OMPI_SUCCESS (indicating that _component_op_query() will
+       be called in the future for each intrinsic MPI_Op).  Otherwise,
+       return OMPI_ERR_NOT_SUPPORTED (indicating that this component
+       will be closed and discarded). */
+    if (mca_op_example_component.hardware_available && !enable_mpi_threads) {
+        return OMPI_SUCCESS;
+    }
+    return OMPI_ERR_NOT_SUPPORTED;
+}
+
+
+/*
+ * Query whether this component can be used for a specific op
+ */
+static struct ompi_op_base_module_1_0_0_t *
+    example_component_op_query(struct ompi_op_t *op, int *priority)
+{
+    ompi_op_base_module_t *module = NULL;
+
+    opal_output(ompi_op_base_output, "example component op query");
+
+    /* Sanity check -- although the framework should never invoke the
+       _component_op_query() on non-intrinsic MPI_Op's, we'll put a
+       check here just to be sure. */
+    if (0 == (OMPI_OP_FLAGS_INTRINSIC & op->o_flags)) {
+        opal_output(0, "example component op query: not an intrinsic MPI_Op -- skipping");
+        return NULL;
+    }
+
+    /* What follows is an example of how to determine whether your
+       component supports the queried MPI_Op.  You can do this lots of
+       different ways; this is but one example. */
+
+    /* Note that we *do* have the hardware; _component_init_query()
+       would not have returned OMPI_SUCCESS if we didn't have the
+       hardware (and therefore this function would never have been
+       called).  So we don't need to check for the hardware again.
+       Instead, we need to do finer-grained checks (e.g., do we
+       support this op, and if so, what datatypes are supported?).
+
+       So check to see whether this MPI_Op operation is supported on
+       the hardware that this component supports (which may involve
+       querying the hardware to see what it is capable of).
+
+       You can see what operation is being requested by checking the
+       "op->o_f_to_c_index" value against the OMPI_OP_BASE_FORTRAN_*
+       enums.  See ompi/mca/op/op.h for a full list of the
+       OMPI_OP_BASE_FORTRAN_* enums.
+
+       In this example component, we support MAX and BXOR. */
+    switch (op->o_f_to_c_index) {
+    case OMPI_OP_BASE_FORTRAN_MAX:
+        /* Corresponds to MPI_MAX */
+        module = ompi_op_example_setup_max(op);
+        break;
+
+    case OMPI_OP_BASE_FORTRAN_BXOR:
+        /* Corresponds to MPI_BXOR */
+        module = ompi_op_example_setup_bxor(op);
+        break;
+    }
+
+    /* If we got a module from above, we'll return it.  Otherwise,
+       we'll return NULL, indicating that this component does not want
+       to be considered for selection for this MPI_Op.  Note that the
+       "setup" functions each returned a *example* component pointer
+       (vs. a *base* component pointer -- where an *example* component
+       is a base component plus some other module-specific cached
+       information), so we have to cast it to the right pointer type
+       before returning. */
+    if (NULL != module) {
+        *priority = 50;
+    }
+    return (ompi_op_base_module_1_0_0_t *) module;
+}
--- a/ompi/mca/op/example/op_example_module_bxor.c
+++ b/ompi/mca/op/example/op_example_module_bxor.c
@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2007 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2008-2009 Cisco, Inc.  All rights reserved.
+ * $COPYRIGHT$
+ * 
+ * Additional copyrights may follow
+ * 
+ * $HEADER$
+ */
+
+/** @file
+ *
+ * This is the bxor module source code.  It contains the "setup"
+ * functions that will create a module for the MPI_BXOR MPI_Op.
+ */
+
+#include "ompi_config.h"
+
+#include "opal/class/opal_object.h"
+#include "opal/util/output.h"
+
+#include "ompi/constants.h"
+#include "ompi/op/op.h"
+#include "ompi/mca/op/op.h"
+#include "ompi/mca/op/base/base.h"
+#include "ompi/mca/op/example/op_example.h"
+
+/**
+ * Derive a struct from the base op module struct, allowing us to
+ * cache some module-specific information for BXOR.  Note that
+ * information that should be shared across all modules should be put
+ * on the example component.
+ */
+typedef struct {
+    ompi_op_base_module_1_0_0_t super;
+
+    /* Just like the ompi_op_example_component_t, this struct is meant to
+       cache information on a per-module basis.  What follows are
+       examples; replace them with whatever is relevant for your
+       component/module.  Keep in mind that there will be one distinct
+       module for each MPI_Op; you may want to have different data
+       cached on the module, depending on the MPI_Op that it is
+       supporting.
+
+       In this example, we'll keep the fallback function pointers for
+       several integer types. */
+    ompi_op_base_handler_fn_t fallback_int;
+    ompi_op_base_module_t *fallback_int_module;
+    ompi_op_base_handler_fn_t fallback_long;
+    ompi_op_base_module_t *fallback_long_module;
+    ompi_op_base_handler_fn_t fallback_integer;
+    ompi_op_base_module_t *fallback_integer_module;
+} module_bxor_t;
+
+/**
+ * "Constructor" for the bxor module class
+ */
+static void module_bxor_constructor(module_bxor_t *m)
+{
+    /* Use this function to initialize any data in the class that is
+       specific to this class (i.e. do *not* initialize the parent
+       data members!). */
+    m->fallback_int = NULL;
+    m->fallback_int_module = NULL;
+    m->fallback_long = NULL;
+    m->fallback_long_module = NULL;
+    m->fallback_integer = NULL;
+    m->fallback_integer_module = NULL;
+}
+
+/**
+ * "Destructor" for the bxor module class
+ */
+static void module_bxor_destructor(module_bxor_t *m)
+{
+    /* Use this function to clean up any data members that may be
+       necessary.  This may include freeing resources and/or setting
+       members to sentinel values to know that the object has been
+       destructed. */
+    m->fallback_int = (ompi_op_base_handler_fn_t) 0xdeadbeef;
+    m->fallback_int_module = (ompi_op_base_module_t*) 0xdeadbeef;
+    m->fallback_long = (ompi_op_base_handler_fn_t) 0xdeadbeef;
+    m->fallback_long_module = (ompi_op_base_module_t*) 0xdeadbeef;
+    m->fallback_integer = (ompi_op_base_handler_fn_t) 0xdeadbeef;
+    m->fallback_integer_module = (ompi_op_base_module_t*) 0xdeadbeef;
+}
+
+/**
+ * Setup the class for the bxor module, listing:
+ * - the name of the class
+ * - the "parent" of the class
+ * - function pointer for the constructor (or NULL)
+ * - function pointer for the destructor (or NULL)
+ */
+static OBJ_CLASS_INSTANCE(module_bxor_t,
+                          ompi_op_base_module_t,
+                          module_bxor_constructor,
+                          module_bxor_destructor);
+
+/**
+ * Bxor function for C int
+ */
+static void bxor_int(void *in, void *out, int *count, 
+                    ompi_datatype_t **type, ompi_op_base_module_t *module)
+{
+    module_bxor_t *m = (module_bxor_t*) module;
+
+    /* Be chatty to the output, just so that we can see that this
+       function was called */
+    opal_output(0, "In example bxor int function");
+
+    /* This is where you can decide at run-time whether to use the
+       hardware or the fallback function.  For example, you could have
+       logic something like this:
+
+       extent = *count * size(int);
+       if (memory_accessible_on_hw(in, extent) &&
+           memory_accessible_on_hw(out, extent)) {
+          ...do the function on hardware...
+       } else if (extent >= large_enough) {
+          ...copy host memory -> hardware memory...
+          ...do the function on hardware...
+          ...copy hardware memory -> host memory...
+       } else {
+          m->fallback_int(in, out, count, type, m->fallback_int_module);
+       }
+     */
+
+    /* But for this example, we'll just call the fallback function to
+       actually do the work */
+    m->fallback_int(in, out, count, type, m->fallback_int_module);
+}
+
+/**
+ * Bxor function for C long
+ */
+static void bxor_long(void *in, void *out, int *count, 
+                     ompi_datatype_t **type, ompi_op_base_module_t *module)
+{
+    module_bxor_t *m = (module_bxor_t*) module;
+    opal_output(0, "In example bxor long function");
+
+    /* Just another example function -- similar to bxor_int() */
+
+    m->fallback_long(in, out, count, type, m->fallback_long_module);
+}
+
+/**
+ * Bxor function for Fortran INTEGER
+ */
+static void bxor_integer(void *in, void *out, int *count, 
+                        ompi_datatype_t **type, ompi_op_base_module_t *module)
+{
+    module_bxor_t *m = (module_bxor_t*) module;
+    opal_output(0, "In example bxor integer function");
+
+    /* Just another example function -- similar to bxor_int() */
+
+    m->fallback_integer(in, out, count, type, m->fallback_integer_module);
+}
+
+/**
+ * Setup function for MPI_BXOR.  If we get here, we can assume that a)
+ * the hardware is present, b) the MPI thread scenario is what we
+ * want, and c) the BXOR operation is supported.  So this function's
+ * job is to create a module and fill in function pointers for the
+ * functions that this hardware supports.
+ *
+ * This function is *not* allowed to changed the op; it can only read
+ * it to save functions/modules that were already set.  The op base
+ * will analyze what was returned in the module and re-set values on
+ * the op if necessary.
+ */
+ompi_op_base_module_t *ompi_op_example_setup_bxor(ompi_op_t *op)
+{
+    module_bxor_t *module = OBJ_NEW(module_bxor_t);
+
+    /* Remember that we created an *example* module (vs. a *base*
+       module), so we can cache extra information on there that is
+       specific for the BXOR operation.  Let's cache the original
+       fallback function pointers, that were passed to us in this call
+       (i.e., they're already assigned on the op). */
+
+    /* C int */
+    module->super.opm_fns[OMPI_OP_BASE_TYPE_INT] = bxor_int;
+    module->fallback_int = op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_INT];
+    module->fallback_int_module = 
+        op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_INT];
+    /* If you cache a fallback function, you *must* RETAIN (i.e.,
+       increase the refcount) its module so that the module knows that
+       it is being used and won't be freed/destructed. */
+    OBJ_RETAIN(module->fallback_int_module);
+
+    /* C long */
+    module->super.opm_fns[OMPI_OP_BASE_TYPE_LONG] = bxor_long;
+    module->fallback_long = op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_LONG];
+    module->fallback_long_module = 
+        op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_LONG];
+    OBJ_RETAIN(module->fallback_long_module);
+
+    /* Fortran INTEGER */
+    module->super.opm_fns[OMPI_OP_BASE_TYPE_INTEGER] = bxor_integer;
+    module->fallback_integer = 
+        op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_INTEGER];
+    module->fallback_integer_module = 
+        op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_INTEGER];
+    OBJ_RETAIN(module->fallback_integer_module);
+
+    /* ...not listing the rest of the integer-typed functions in this
+       example... */
+
+    return (ompi_op_base_module_t*) module;
+}
--- a/ompi/mca/op/example/op_example_module_max.c
+++ b/ompi/mca/op/example/op_example_module_max.c
@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2007 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2008-2009 Cisco, Inc.  All rights reserved.
+ * $COPYRIGHT$
+ * 
+ * Additional copyrights may follow
+ * 
+ * $HEADER$
+ */
+
+/** @file
+ *
+ * This is the max module source code.  It contains the "setup"
+ * functions that will create a module for the MPI_MAX MPI_Op.
+ */
+
+#include "ompi_config.h"
+
+#include "opal/class/opal_object.h"
+#include "opal/util/output.h"
+
+#include "ompi/constants.h"
+#include "ompi/op/op.h"
+#include "ompi/mca/op/op.h"
+#include "ompi/mca/op/base/base.h"
+#include "ompi/mca/op/example/op_example.h"
+
+/**
+ * Derive a struct from the base op module struct, allowing us to
+ * cache some module-specific information for MAX.  Note that
+ * information that should be shared across all modules should be put
+ * on the example component.
+ */
+typedef struct {
+    ompi_op_base_module_1_0_0_t super;
+
+    /* Just like the ompi_op_example_component_t, this struct is meant to
+       cache information on a per-module basis.  What follows are
+       examples; replace them with whatever is relevant for your
+       component/module.  Keep in mind that there will be one distinct
+       module for each MPI_Op; you may want to have different data
+       cached on the module, depending on the MPI_Op that it is
+       supporting.
+
+       In this example, we'll keep the fallback function pointers for
+       several integer types. */
+    ompi_op_base_handler_fn_t fallback_float;
+    ompi_op_base_module_t *fallback_float_module;
+    ompi_op_base_handler_fn_t fallback_real;
+    ompi_op_base_module_t *fallback_real_module;
+
+    ompi_op_base_handler_fn_t fallback_double;
+    ompi_op_base_module_t *fallback_double_module;
+    ompi_op_base_handler_fn_t fallback_double_precision;
+    ompi_op_base_module_t *fallback_double_precision_module;
+} module_max_t;
+
+/**
+ * "Constructor" for the max module class
+ */
+static void module_max_constructor(module_max_t *m)
+{
+    /* Use this function to initialize any data in the class that is
+       specific to this class (i.e. do *not* initialize the parent
+       data members!). */
+    m->fallback_float = NULL;
+    m->fallback_float_module = NULL;
+    m->fallback_real = NULL;
+    m->fallback_real_module = NULL;
+
+    m->fallback_double = NULL;
+    m->fallback_double_module = NULL;
+    m->fallback_double_precision = NULL;
+    m->fallback_double_precision_module = NULL;
+}
+
+/**
+ * "Destructor" for the max module class
+ */
+static void module_max_destructor(module_max_t *m)
+{
+    /* Use this function to clean up any data members that may be
+       necessary.  This may include freeing resources and/or setting
+       members to sentinel values to know that the object has been
+       destructed. */
+    m->fallback_float = (ompi_op_base_handler_fn_t) 0xdeadbeef;
+    m->fallback_float_module = (ompi_op_base_module_t*) 0xdeadbeef;
+    m->fallback_real = (ompi_op_base_handler_fn_t) 0xdeadbeef;
+    m->fallback_real_module = (ompi_op_base_module_t*) 0xdeadbeef;
+
+    m->fallback_double = (ompi_op_base_handler_fn_t) 0xdeadbeef;
+    m->fallback_double_module = (ompi_op_base_module_t*) 0xdeadbeef;
+    m->fallback_double_precision = (ompi_op_base_handler_fn_t) 0xdeadbeef;
+    m->fallback_double_precision_module = (ompi_op_base_module_t*) 0xdeadbeef;
+}
+
+/**
+ * Setup the class for the max module, listing:
+ * - the name of the class
+ * - the "parent" of the class
+ * - function pointer for the constructor (or NULL)
+ * - function pointer for the destructor (or NULL)
+ */
+static OBJ_CLASS_INSTANCE(module_max_t,
+                          ompi_op_base_module_t,
+                          module_max_constructor,
+                          module_max_destructor);
+
+/**
+ * Max function for C float
+ */
+static void max_float(void *in, void *out, int *count, 
+                      ompi_datatype_t **type, ompi_op_base_module_t *module)
+{
+    module_max_t *m = (module_max_t*) module;
+
+    /* Be chatty to the output, just so that we can see that this
+       function was called */
+    opal_output(0, "In example max float function");
+
+    /* This is where you can decide at run-time whether to use the
+       hardware or the fallback function.  For example, you could have
+       logic something like this:
+
+       extent = *count * size(int);
+       if (memory_accessible_on_hw(in, extent) &&
+           memory_accessible_on_hw(out, extent)) {
+          ...do the function on hardware...
+       } else if (extent >= large_enough) {
+          ...copy host memory -> hardware memory...
+          ...do the function on hardware...
+          ...copy hardware memory -> host memory...
+       } else {
+          m->fallback_float(in, out, count, type, m->fallback_int_module);
+       }
+     */
+
+    /* But for this example, we'll just call the fallback function to
+       actually do the work */
+    m->fallback_float(in, out, count, type, m->fallback_float_module);
+}
+
+/**
+ * Max function for C double
+ */
+static void max_double(void *in, void *out, int *count, 
+                       ompi_datatype_t **type, ompi_op_base_module_t *module)
+{
+    module_max_t *m = (module_max_t*) module;
+    opal_output(0, "In example max double function");
+
+    /* Just another example function -- similar to max_int() */
+
+    m->fallback_double(in, out, count, type, m->fallback_double_module);
+}
+
+/**
+ * Max function for Fortran REAL
+ */
+static void max_real(void *in, void *out, int *count, 
+                     ompi_datatype_t **type, ompi_op_base_module_t *module)
+{
+    module_max_t *m = (module_max_t*) module;
+    opal_output(0, "In example max real function");
+
+    /* Just another example function -- similar to max_int() */
+
+    m->fallback_real(in, out, count, type, m->fallback_real_module);
+}
+
+/**
+ * Max function for Fortran DOUBLE PRECISION
+ */
+static void max_double_precision(void *in, void *out, int *count, 
+                                 ompi_datatype_t **type, 
+                                 ompi_op_base_module_t *module)
+{
+    module_max_t *m = (module_max_t*) module;
+    opal_output(0, "In example max double precision function");
+
+    /* Just another example function -- similar to max_int() */
+
+    m->fallback_double_precision(in, out, count, type, 
+                                 m->fallback_double_precision_module);
+}
+
+/**
+ * Setup function for MPI_MAX.  If we get here, we can assume that a)
+ * the hardware is present, b) the MPI thread scenario is what we
+ * want, and c) the MAX operation is supported.  So this function's
+ * job is to create a module and fill in function pointers for the
+ * functions that this hardware supports.
+ */
+ompi_op_base_module_t *ompi_op_example_setup_max(ompi_op_t *op)
+{
+    module_max_t *module = OBJ_NEW(module_max_t);
+
+    /* We defintely support the single precision floating point types */
+
+    /* Remember that we created an *example* module (vs. a *base*
+       module), so we can cache extra information on there that is
+       specific for the MAX operation.  Let's cache the original
+       fallback function pointers, that were passed to us in this call
+       (i.e., they're already assigned on the op). */
+
+    /* C float */
+    module->super.opm_fns[OMPI_OP_BASE_TYPE_FLOAT] = max_float;
+    module->fallback_float = op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_FLOAT];
+    module->fallback_float_module = 
+        op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_FLOAT];
+    /* If you cache a fallback function, you *must* RETAIN (i.e.,
+       increase the refcount) its module so that the module knows that
+       it is being used and won't be freed/destructed. */
+    OBJ_RETAIN(module->fallback_float_module);
+
+    /* Fortran REAL */
+    module->super.opm_fns[OMPI_OP_BASE_TYPE_REAL] = max_real;
+    module->fallback_real = 
+        op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_REAL];
+    module->fallback_real_module = 
+        op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_REAL];
+    OBJ_RETAIN(module->fallback_real_module);
+
+    /* Does our hardware support double precision? */
+
+    if (mca_op_example_component.double_supported) {
+        /* C double */
+        module->super.opm_fns[OMPI_OP_BASE_TYPE_DOUBLE] = max_double;
+        module->fallback_double = 
+            op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_DOUBLE];
+        module->fallback_double_module = 
+            op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_DOUBLE];
+        OBJ_RETAIN(module->fallback_double_module);
+        
+        /* Fortran DOUBLE PRECISION */
+        module->super.opm_fns[OMPI_OP_BASE_TYPE_DOUBLE_PRECISION] = 
+            max_double_precision;
+        module->fallback_double_precision = 
+            op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_DOUBLE_PRECISION];
+        module->fallback_double_precision_module = 
+            op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_DOUBLE_PRECISION];
+        OBJ_RETAIN(module->fallback_double_precision_module);
+    }
+
+    /* ...not listing the rest of the floating point-typed functions
+       in this example... */
+
+    return (ompi_op_base_module_t*) module;
+}
--- a/ompi/mca/op/op.h
+++ b/ompi/mca/op/op.h
@ -0,0 +1,418 @@
+/*
+ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2008 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2006-2007 Los Alamos National Security, LLC.  All rights
+ *                         reserved. 
+ * Copyright (c) 2007-2008 UT-Battelle, LLC
+ * Copyright (c) 2007-2009 Cisco Systems, Inc.  All rights reserved.
+ * $COPYRIGHT$
+ * 
+ * Additional copyrights may follow
+ * 
+ * $HEADER$
+ */
+/**
+ * @file
+ *
+ * MPI_Op back-end operation framework.  This framework allows
+ * component-izing the back-end operations of MPI_Op in order to use
+ * specialized hardware (e.g., mathematical accelerators).  In short:
+ * each MPI_Op contains a table of function pointers; one for
+ * implementing the operation on each predefined datatype.
+ *
+ * The MPI interface provides error checking and error handler
+ * invocation, but the op components provide all other functionality.
+ *
+ * Component selection is done on a per-MPI_Op basis when each MPI_Op
+ * is created.  All MPI_Ops go through the selection process, even
+ * user-defined MPI_Ops -- although it is expected that most (all?)
+ * op components will only be able to handle the predefined MPI_Ops.
+ *
+ * The general sequence of usage for the op framework is:
+ *
+ * 1. ompi_op_base_open() is invoked during MPI_INIT to find/open all
+ * op components.
+ *
+ * 2. ompi_op_base_find_available() is invoked during MPI_INIT to call
+ * each successfully opened op component's opc_init_query() function.
+ * All op components that return OMPI_SUCCESS are kept; all others are
+ * closed and removed from the process.
+ *
+ * 3. ompi_op_base_op_select() is invoked during MPI_INIT for each
+ * predefined MPI_Op (e.g., MPI_SUM).  This function will call each
+ * available op component's opc_op_query() function to see if this
+ * component wants to provide a module for one or more of the function
+ * pointers on this MPI_Op.  Priorities are used to rank returned
+ * modules; the module with the highest priority has its function
+ * pointers set in the MPI_Op function table.
+ *
+ * Note that a module may only have *some* non-NULL function pointers
+ * (i.e., for the functions that it can support).  For example, some
+ * modules may only support operations on single-precision floating
+ * point datatypes.  These modules would provide function pointers for
+ * these datatypes and NULL for all the rest.  The op framework will
+ * mix-n-match function pointers between modules to obtain a full set
+ * of non-NULL function pointers for a given MPI_Op (note that the op
+ * base provides a complete set of functions for the MPI_Op, usually a
+ * simple C loop around the operation, such as "+=" -- so even if
+ * there is no specialized op component available, there will *always*
+ * be a full set of MPI_Op function pointers).  The op framework will
+ * OBJ_RETAIN an op module once for each function pointer where it is
+ * used on a given MPI_Op.
+ *
+ * Note that this scheme can result in up to N different modules being
+ * used for a single MPI_Op, one per needed datatype function.
+ *
+ * 5. Finally, during MPI_FINALIZE, ompi_op_base_close() is invoked to
+ * close all available op components.
+ */
+
+#include "opal/mca/mca.h"
+
+#ifndef MCA_OP_H
+#define MCA_OP_H
+
+/*
+ * This file includes some basic struct declarations (but not
+ * definitions) just so that we can avoid including files like op/op.h
+ * and datatype/datatype.h, which would create #include file loops.
+ */
+#include "ompi/types.h"
+
+BEGIN_C_DECLS
+
+/**
+ * Corresponding to the types that we can reduce over.  See
+ * MPI-1:4.9.2, p114-115 and
+ * MPI-2:4.15, p76-77
+ */
+enum {
+    /** C integer: unsigned char */
+    OMPI_OP_BASE_TYPE_UNSIGNED_CHAR,
+    /** C integer: signed char */
+    OMPI_OP_BASE_TYPE_SIGNED_CHAR,
+    /** C integer: int */
+    OMPI_OP_BASE_TYPE_INT,
+    /** C integer: long */
+    OMPI_OP_BASE_TYPE_LONG,
+    /** C integer: short */
+    OMPI_OP_BASE_TYPE_SHORT,
+    /** C integer: unsigned short */
+    OMPI_OP_BASE_TYPE_UNSIGNED_SHORT,
+    /** C integer: unsigned */
+    OMPI_OP_BASE_TYPE_UNSIGNED,
+    /** C integer: unsigned long */
+    OMPI_OP_BASE_TYPE_UNSIGNED_LONG,
+
+    /** C integer: long long int (optional) */
+    OMPI_OP_BASE_TYPE_LONG_LONG_INT,
+    /** C integer: unsigned long long (optional) */
+    OMPI_OP_BASE_TYPE_UNSIGNED_LONG_LONG,
+
+    /** Fortran integer */
+    OMPI_OP_BASE_TYPE_INTEGER,
+    /** Fortran integer*1 */
+    OMPI_OP_BASE_TYPE_INTEGER1,
+    /** Fortran integer*2 */
+    OMPI_OP_BASE_TYPE_INTEGER2,
+    /** Fortran integer*4 */
+    OMPI_OP_BASE_TYPE_INTEGER4,
+    /** Fortran integer*8 */
+    OMPI_OP_BASE_TYPE_INTEGER8,
+    /** Fortran integer*16 */
+    OMPI_OP_BASE_TYPE_INTEGER16,
+
+    /** Floating point: float */
+    OMPI_OP_BASE_TYPE_FLOAT,
+    /** Floating point: double */
+    OMPI_OP_BASE_TYPE_DOUBLE,
+    /** Floating point: real */
+    OMPI_OP_BASE_TYPE_REAL,
+    /** Floating point: real*2 */
+    OMPI_OP_BASE_TYPE_REAL2,
+    /** Floating point: real*4 */
+    OMPI_OP_BASE_TYPE_REAL4,
+    /** Floating point: real*8 */
+    OMPI_OP_BASE_TYPE_REAL8,
+    /** Floating point: real*16 */
+    OMPI_OP_BASE_TYPE_REAL16,
+    /** Floating point: double precision */
+    OMPI_OP_BASE_TYPE_DOUBLE_PRECISION,
+    /** Floating point: long double */
+    OMPI_OP_BASE_TYPE_LONG_DOUBLE,
+
+    /** Logical */
+    OMPI_OP_BASE_TYPE_LOGICAL,
+    /** Bool */
+    OMPI_OP_BASE_TYPE_BOOL,
+
+    /** Complex */
+    OMPI_OP_BASE_TYPE_COMPLEX,
+    /** Double complex */
+    OMPI_OP_BASE_TYPE_DOUBLE_COMPLEX,
+    /** Complex8 */
+    OMPI_OP_BASE_TYPE_COMPLEX8,
+    /** Complex16 */
+    OMPI_OP_BASE_TYPE_COMPLEX16,
+    /** Complex32 */
+    OMPI_OP_BASE_TYPE_COMPLEX32,
+
+    /** Byte */
+    OMPI_OP_BASE_TYPE_BYTE,
+
+    /** 2 location Fortran: 2 real */
+    OMPI_OP_BASE_TYPE_2REAL,
+    /** 2 location Fortran: 2 double precision */
+    OMPI_OP_BASE_TYPE_2DOUBLE_PRECISION,
+    /** 2 location Fortran: 2 integer */
+    OMPI_OP_BASE_TYPE_2INTEGER,
+
+    /** 2 location C: float int */
+    OMPI_OP_BASE_TYPE_FLOAT_INT,
+    /** 2 location C: double int */
+    OMPI_OP_BASE_TYPE_DOUBLE_INT,
+    /** 2 location C: long int */
+    OMPI_OP_BASE_TYPE_LONG_INT,
+    /** 2 location C: int int */
+    OMPI_OP_BASE_TYPE_2INT,
+    /** 2 location C: short int */
+    OMPI_OP_BASE_TYPE_SHORT_INT,
+    /** 2 location C: long double int */
+    OMPI_OP_BASE_TYPE_LONG_DOUBLE_INT,
+
+    /** 2 location C: wchar_t */
+    OMPI_OP_BASE_TYPE_WCHAR,
+
+    /** Maximum type */
+    OMPI_OP_BASE_TYPE_MAX
+};
+
+
+/**
+ * Fortran handles; must be [manually set to be] equivalent to the
+ * values in mpif.h.
+ */
+enum {
+    /** Corresponds to Fortran MPI_OP_NULL */
+    OMPI_OP_BASE_FORTRAN_NULL = 0,
+    /** Corresponds to Fortran MPI_MAX */
+    OMPI_OP_BASE_FORTRAN_MAX,
+    /** Corresponds to Fortran MPI_MIN */
+    OMPI_OP_BASE_FORTRAN_MIN,
+    /** Corresponds to Fortran MPI_SUM */
+    OMPI_OP_BASE_FORTRAN_SUM,
+    /** Corresponds to Fortran MPI_PROD */
+    OMPI_OP_BASE_FORTRAN_PROD,
+    /** Corresponds to Fortran MPI_LAND */
+    OMPI_OP_BASE_FORTRAN_LAND,
+    /** Corresponds to Fortran MPI_BAND */
+    OMPI_OP_BASE_FORTRAN_BAND,
+    /** Corresponds to Fortran MPI_LOR */
+    OMPI_OP_BASE_FORTRAN_LOR,
+    /** Corresponds to Fortran MPI_BOR */
+    OMPI_OP_BASE_FORTRAN_BOR,
+    /** Corresponds to Fortran MPI_LXOR */
+    OMPI_OP_BASE_FORTRAN_LXOR,
+    /** Corresponds to Fortran MPI_BXOR */
+    OMPI_OP_BASE_FORTRAN_BXOR,
+    /** Corresponds to Fortran MPI_MAXLOC */
+    OMPI_OP_BASE_FORTRAN_MAXLOC,
+    /** Corresponds to Fortran MPI_MINLOC */
+    OMPI_OP_BASE_FORTRAN_MINLOC,
+    /** Corresponds to Fortran MPI_REPLACE */
+    OMPI_OP_BASE_FORTRAN_REPLACE,
+
+    /** Maximum value */
+    OMPI_OP_BASE_FORTRAN_OP_MAX
+};
+
+/**
+ * Pre-declare this so that we can pass it as an argument to the
+ * typedef'ed functions.
+ */
+struct ompi_op_base_module_1_0_0_t;
+
+typedef struct ompi_op_base_module_1_0_0_t ompi_op_base_module_t;
+
+/**
+ * Typedef for 2-buffer op functions.  
+ *
+ * We don't use MPI_User_function because this would create a
+ * confusing dependency loop between this file and mpi.h.  So this is
+ * repeated code, but it's better this way (and this typedef will
+ * never change, so there's not much of a maintenance worry).
+ */
+typedef void (*ompi_op_base_handler_fn_1_0_0_t)(void *, void *, int *,
+                                                struct ompi_datatype_t **,
+                                                struct ompi_op_base_module_1_0_0_t *);
+
+typedef ompi_op_base_handler_fn_1_0_0_t ompi_op_base_handler_fn_t;
+
+/*
+ * Typedef for 3-buffer (two input and one output) op functions.
+ */
+typedef void (*ompi_op_base_3buff_handler_fn_1_0_0_t)(void *restrict,
+                                                      void *restrict,
+                                                      void *restrict, int *,
+                                                      struct ompi_datatype_t **,
+                                                      struct ompi_op_base_module_1_0_0_t *);
+
+typedef ompi_op_base_3buff_handler_fn_1_0_0_t ompi_op_base_3buff_handler_fn_t;
+
+/**
+ * Op component initialization
+ *
+ * Initialize the given op component.  This function should initialize
+ * any component-level. data.  It will be called exactly once during
+ * MPI_INIT.
+ *
+ * @note The component framework is not lazily opened, so attempts
+ * should be made to minimze the amount of memory allocated during
+ * this function.
+ *
+ * @param[in] enable_progress_threads True if the component needs to
+ *                                support progress threads
+ * @param[in] enable_mpi_threads  True if the component needs to
+ *                                support MPI_THREAD_MULTIPLE
+ *
+ * @retval OMPI_SUCCESS Component successfully initialized
+ * @retval OMPI_ERROR   An unspecified error occurred
+ */
+typedef int (*ompi_op_base_component_init_query_fn_t)
+     (bool enable_progress_threads, bool enable_mpi_threads);
+
+
+/**
+ * Query whether a component is available for a specific MPI_Op.
+ *
+ * If the component is available, an object should be allocated and
+ * returned (with refcount at 1).  The module will not be used for
+ * reduction operations until module_enable() is called on the module,
+ * but may be destroyed (via OBJ_RELEASE) either before or after
+ * module_enable() is called.  If the module needs to release
+ * resources obtained during query(), it should do so in the module
+ * destructor.
+ *
+ * A component may provide NULL to this function to indicate it does
+ * not wish to run or return an error during module_enable().
+ *
+ * @param[in] op          The MPI_Op being created
+ * @param[out] priority   Priority setting for component on 
+ *                        this op
+ *
+ * @returns An initialized module structure if the component can
+ * provide a module with the requested functionality or NULL if the
+ * component should not be used on the given communicator.
+ */
+typedef struct ompi_op_base_module_1_0_0_t *
+  (*ompi_op_base_component_op_query_1_0_0_fn_t)
+    (struct ompi_op_t *op, int *priority);
+
+/**
+ * Op component interface.
+ *
+ * Component interface for the op framework.  A public instance of
+ * this structure, called mca_op_[component_name]_component, must
+ * exist in any op component.
+ */
+typedef struct ompi_op_base_component_1_0_0_t {
+    /** Base component description */
+    mca_base_component_t opc_version;
+    /** Base component data block */
+    mca_base_component_data_t opc_data;
+
+    /** Component initialization function */
+    ompi_op_base_component_init_query_fn_t opc_init_query;
+    /** Query whether component is useable for given op */
+    ompi_op_base_component_op_query_1_0_0_fn_t opc_op_query;
+} ompi_op_base_component_1_0_0_t;
+
+
+/** Per guidence in mca.h, use the unversioned struct name if you just
+    want to always keep up with the most recent version of the
+    interace. */
+typedef struct ompi_op_base_component_1_0_0_t ompi_op_base_component_t;
+
+/**
+ * Module initialization function.  Should return OPAL_SUCCESS if
+ * everything goes ok.  This function can be NULL in the module struct
+ * if the module doesn't need to do anything between the component
+ * query function and being invoked for MPI_Op operations.
+ */
+typedef int (*ompi_op_base_module_enable_1_0_0_fn_t)
+    (struct ompi_op_base_module_1_0_0_t *module,
+     struct ompi_op_t *op);
+
+/**
+ * Module struct
+ */
+typedef struct ompi_op_base_module_1_0_0_t {
+    /** Op modules all inherit from opal_object */
+    opal_object_t super;
+
+    /** Enable function called when an op module is (possibly) going
+        to be used for the given MPI_Op */
+    ompi_op_base_module_enable_1_0_0_fn_t opm_enable;
+
+    /** Just for reference -- a pointer to the MPI_Op that this module
+        is being used for */
+    struct ompi_op_t *opm_op;
+
+    /** Function pointers for all the different datatypes to be used
+        with the MPI_Op that this module is used with */
+    ompi_op_base_handler_fn_1_0_0_t opm_fns[OMPI_OP_BASE_TYPE_MAX];
+    ompi_op_base_3buff_handler_fn_1_0_0_t opm_3buff_fns[OMPI_OP_BASE_TYPE_MAX];
+} ompi_op_base_module_1_0_0_t;
+
+/**
+ * Declare the module as a class, unversioned
+ */
+OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_op_base_module_t);
+
+/**
+ * Declare the module as a class, unversioned
+ */
+OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_op_base_module_1_0_0_t);
+
+/**
+ * Struct that is used in op.h to hold all the function pointers and
+ * pointers to the corresopnding modules (so that we can properly
+ * RETAIN/RELEASE them) 
+ */
+typedef struct ompi_op_base_op_fns_1_0_0_t {
+    ompi_op_base_handler_fn_1_0_0_t fns[OMPI_OP_BASE_TYPE_MAX];
+    ompi_op_base_module_t *modules[OMPI_OP_BASE_TYPE_MAX];
+} ompi_op_base_op_fns_1_0_0_t;
+
+typedef ompi_op_base_op_fns_1_0_0_t ompi_op_base_op_fns_t;
+
+/**
+ * Struct that is used in op.h to hold all the function pointers and
+ * pointers to the corresopnding modules (so that we can properly
+ * RETAIN/RELEASE them) 
+ */
+typedef struct ompi_op_base_op_3buff_fns_1_0_0_t {
+    ompi_op_base_3buff_handler_fn_1_0_0_t fns[OMPI_OP_BASE_TYPE_MAX];
+    ompi_op_base_module_t *modules[OMPI_OP_BASE_TYPE_MAX];
+} ompi_op_base_op_3buff_fns_1_0_0_t;
+
+typedef ompi_op_base_op_3buff_fns_1_0_0_t ompi_op_base_op_3buff_fns_t;
+
+/*
+ * Macro for use in modules that are of type op v2.0.0
+ */
+#define OMPI_OP_BASE_VERSION_1_0_0 \
+    MCA_BASE_VERSION_2_0_0, \
+    "op", 1, 0, 0
+
+END_C_DECLS
+
+#endif /* OMPI_MCA_OP_H */
--- a/ompi/mca/op/x86/.ompi_ignore
+++ b/ompi/mca/op/x86/.ompi_ignore
--- a/ompi/mca/op/x86/.ompi_unignore
+++ b/ompi/mca/op/x86/.ompi_unignore
@ -0,0 +1 @@
+jsquyres
--- a/ompi/mca/op/x86/Makefile.am
+++ b/ompi/mca/op/x86/Makefile.am
@ -0,0 +1,54 @@
+#
+# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+#                         University Research and Technology
+#                         Corporation.  All rights reserved.
+# Copyright (c) 2004-2005 The University of Tennessee and The University
+#                         of Tennessee Research Foundation.  All rights
+#                         reserved.
+# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+#                         University of Stuttgart.  All rights reserved.
+# Copyright (c) 2004-2005 The Regents of the University of California.
+#                         All rights reserved.
+# Copyright (c) 2008-2009 Cisco Systems, Inc.  All rights reserved.
+# $COPYRIGHT$
+# 
+# Additional copyrights may follow
+# 
+# $HEADER$
+#
+
+sources = \
+    op_x86.h \
+    op_x86_component.c \
+    op_x86_module_sum.c
+
+if OMPI_BUILD_op_x86_DSO
+lib =
+lib_sources =
+component = mca_op_x86.la
+component_sources = $(sources)
+else
+lib = libmca_op_x86.la
+lib_sources = $(sources)
+component =
+component_sources =
+endif
+
+# Specific information for DSO builds.
+#
+# The DSO should install itself in $(pkglibdir) (by default,
+# $prefix/lib/openmpi).
+
+mcacomponentdir = $(pkglibdir)
+mcacomponent_LTLIBRARIES = $(component)
+mca_op_x86_la_SOURCES = $(component_sources)
+mca_op_x86_la_LDFLAGS = -module -avoid-version
+
+# Specific information for static builds.  
+#
+# Note that we *must* "noinst"; the upper-layer Makefile.am's will
+# slurp in the resulting .la library into libmpi.
+
+noinst_LTLIBRARIES = $(lib)
+libmca_op_x86_la_SOURCES = $(lib_sources)
+libmca_op_x86_la_LDFLAGS = -module -avoid-version
--- a/ompi/mca/op/x86/configure.m4
+++ b/ompi/mca/op/x86/configure.m4
@ -0,0 +1,33 @@
+# -*- shell-script -*-
+#
+# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+#                         University Research and Technology
+#                         Corporation.  All rights reserved.
+# Copyright (c) 2004-2005 The University of Tennessee and The University
+#                         of Tennessee Research Foundation.  All rights
+#                         reserved.
+# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+#                         University of Stuttgart.  All rights reserved.
+# Copyright (c) 2004-2005 The Regents of the University of California.
+#                         All rights reserved.
+# Copyright (c) 2008-2009 Cisco Systems, Inc.  All rights reserved.
+# $COPYRIGHT$
+# 
+# Additional copyrights may follow
+# 
+# $HEADER$
+#
+
+# MCA_op_x86_CONFIG([action-if-found], [action-if-not-found])
+# -----------------------------------------------------------
+AC_DEFUN([MCA_op_x86_CONFIG],[
+
+    # check for sockaddr_in (a good sign we have TCP)
+    AC_CHECK_TYPES([struct sockaddr_in], 
+                   [$1],
+                   [$2], 
+                   [AC_INCLUDES_DEFAULT
+#ifdef HAVE_NETINET_IN_H
+#include <netinet/in.h>
+#endif])
+])dnl
--- a/ompi/mca/op/x86/configure.params
+++ b/ompi/mca/op/x86/configure.params
@ -0,0 +1,25 @@
+# -*- shell-script -*-
+#
+# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+#                         University Research and Technology
+#                         Corporation.  All rights reserved.
+# Copyright (c) 2004-2005 The University of Tennessee and The University
+#                         of Tennessee Research Foundation.  All rights
+#                         reserved.
+# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+#                         University of Stuttgart.  All rights reserved.
+# Copyright (c) 2004-2005 The Regents of the University of California.
+#                         All rights reserved.
+# Copyright (c) 2007      Los Alamos National Security, LLC.  All rights
+#                         reserved. 
+# Copyright (c) 2008-2009 Cisco Systems, Inc.  All rights reserved.
+# $COPYRIGHT$
+# 
+# Additional copyrights may follow
+# 
+# $HEADER$
+#
+
+# Files for configure to generate for this component.
+
+PARAM_CONFIG_FILES="Makefile"
--- a/ompi/mca/op/x86/op_x86.h
+++ b/ompi/mca/op/x86/op_x86.h
@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2005 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2008-2009 Cisco, Inc.  All rights reserved.
+ * $COPYRIGHT$
+ * 
+ * Additional copyrights may follow
+ * 
+ * $HEADER$
+ */
+
+#ifndef MCA_OP_X86_EXPORT_H
+#define MCA_OP_X86_EXPORT_H
+
+#include "ompi_config.h"
+
+#include "opal/mca/mca.h"
+#include "opal/class/opal_object.h"
+
+#include "ompi/mca/op/op.h"
+
+BEGIN_C_DECLS
+
+/**
+ * Flags for each hardware type
+ */
+typedef enum {
+    OP_X86_HW_FLAGS_MMX = 1,
+    OP_X86_HW_FLAGS_MMX2 = 2,
+    OP_X86_HW_FLAGS_SSE = 4,
+    OP_X86_HW_FLAGS_SSE2 = 8,
+    OP_X86_HW_FLAGS_SSE3 = 16
+} op_x86_hw_flags_t;
+
+/**
+ * Derive a struct from the base op component struct, allowing us to
+ * cache some component-specific information on our well-known
+ * component struct.
+ */
+typedef struct {
+    /** The base op component struct */
+    ompi_op_base_component_1_0_0_t super;
+
+    /* What hardware do we have? */
+    op_x86_hw_flags_t oxc_hw_flags;
+} ompi_op_x86_component_t;
+
+/**
+ * Derive a struct from the base op module struct, allowing us to
+ * cache some module-specific information for SUM. 
+ */
+typedef struct {
+    ompi_op_base_module_1_0_0_t super;
+
+    /* JMS need anything here? */
+} ompi_op_x86_module_sum_t;
+
+OBJ_CLASS_DECLARATION(ompi_op_x86_module_sum_t);
+
+/**
+ * Well-known component instance
+ */
+OMPI_DECLSPEC extern ompi_op_x86_component_t mca_op_x86_component;
+
+/**
+ * Setup for MPI_MAX and return a module.
+ */
+OMPI_DECLSPEC ompi_op_base_module_t *ompi_op_x86_setup_sum(ompi_op_t *op);
+
+END_C_DECLS
+
+#endif /* MCA_OP_X86_EXPORT_H */
--- a/ompi/mca/op/x86/op_x86_component.c
+++ b/ompi/mca/op/x86/op_x86_component.c
@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2007 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2008-2009 Cisco, Inc.  All rights reserved.
+ * $COPYRIGHT$
+ * 
+ * Additional copyrights may follow
+ * 
+ * $HEADER$
+ */
+
+/** @file
+ *
+ * This is the "x86" component source code.  It contains the
+ * well-known struct that OMPI will dlsym() (or equivalent) for to
+ * find how to access the rest of the component and any modules that
+ * are created.
+ */
+
+#include "ompi_config.h"
+
+#include "opal/mca/base/mca_base_param.h"
+
+#include "ompi/constants.h"
+#include "ompi/op/op.h"
+#include "ompi/mca/op/op.h"
+#include "ompi/mca/op/base/base.h"
+#include "ompi/mca/op/x86/op_x86.h"
+
+static int x86_component_open(void);
+static int x86_component_close(void);
+static int x86_component_init_query(bool enable_progress_threads,
+                                     bool enable_mpi_threads);
+static struct ompi_op_base_module_1_0_0_t *
+    x86_component_op_query(struct ompi_op_t *op, int *priority);
+static int x86_component_register(void);
+
+ompi_op_x86_component_t mca_op_x86_component = {
+    /* First, the mca_base_component_t struct containing meta
+       information about the component itself */
+    {
+        {
+            OMPI_OP_BASE_VERSION_1_0_0,
+            
+            "x86",
+            OMPI_MAJOR_VERSION,
+            OMPI_MINOR_VERSION,
+            OMPI_RELEASE_VERSION,
+            x86_component_open,
+            x86_component_close,
+            NULL,
+            x86_component_register
+        },
+        {
+            /* The component is checkpoint ready */
+            MCA_BASE_METADATA_PARAM_CHECKPOINT
+        },
+        
+        x86_component_init_query,
+        x86_component_op_query,
+    },
+
+    /* Now comes the x86-component-specific data.  In this case,
+       we'll just leave it blank, defaulting all the values to
+       0/false/whatever.  We'll fill them in with meaningful values
+       during _component_init_query(). */
+};
+
+/*
+ * Component open
+ */
+static int x86_component_open(void)
+{
+    opal_output(ompi_op_base_output, "x86 component open");
+
+    /* A first level check to see if x86 is even available in this
+       process.  E.g., you may want to do a first-order check to see
+       if hardware is available.  If so, return OMPI_SUCCESS.  If not,
+       return anything other than OMPI_SUCCESS and the component will
+       silently be ignored.
+
+       Note that if this function returns non-OMPI_SUCCESS, then this
+       component won't even be shown in ompi_info output (which is
+       probably not what you want).
+    */
+
+    return OMPI_SUCCESS;
+}
+
+
+/*
+ * Component close
+ */
+static int x86_component_close(void)
+{
+    opal_output(ompi_op_base_output, "x86 component close");
+
+    /* If x86 was opened successfully, close it (i.e., release any
+       resources that may have been allocated on this component).
+       Note that _component_close() will always be called at the end
+       of the process, so it may have been after any/all of the other
+       component functions have been invoked (and possibly even after
+       modules have been created and/or destroyed). */
+
+    return OMPI_SUCCESS;
+}
+
+
+/*
+ * Probe the hardware and see what we have
+ */
+static void hardware_probe(void)
+{
+    /* ... JMS fill in here ... */
+}
+
+/*
+ * Register MCA params.
+ */
+static int x86_component_register(void)
+{
+    int val;
+
+    opal_output(ompi_op_base_output, "x86 component register");
+
+    /* Probe the hardware and see what we have */
+    hardware_probe();
+
+    val = (0 != (mca_op_x86_component.oxc_hw_flags & OP_X86_HW_FLAGS_MMX));
+    mca_base_param_reg_int(&mca_op_x86_component.super.opc_version,
+                           "mmx_available", 
+                           "Whether the hardware supports MMX or not",
+                           false, false, val, NULL);
+
+    val = (0 != (mca_op_x86_component.oxc_hw_flags & OP_X86_HW_FLAGS_MMX2));
+    mca_base_param_reg_int(&mca_op_x86_component.super.opc_version,
+                           "mmx2_available", 
+                           "Whether the hardware supports MMX2 or not",
+                           false, false, val, NULL);
+    
+    val = (0 != (mca_op_x86_component.oxc_hw_flags & OP_X86_HW_FLAGS_SSE));
+    mca_base_param_reg_int(&mca_op_x86_component.super.opc_version,
+                           "sse_available", 
+                           "Whether the hardware supports SSE or not",
+                           false, false, val, NULL);
+    
+    val = (0 != (mca_op_x86_component.oxc_hw_flags & OP_X86_HW_FLAGS_SSE2));
+    mca_base_param_reg_int(&mca_op_x86_component.super.opc_version,
+                           "sse2_available", 
+                           "Whether the hardware supports SSE2 or not",
+                           false, false, val, NULL);
+    
+    val = (0 != (mca_op_x86_component.oxc_hw_flags & OP_X86_HW_FLAGS_SSE3));
+    mca_base_param_reg_int(&mca_op_x86_component.super.opc_version,
+                           "sse3_available", 
+                           "Whether the hardware supports SSE3 or not",
+                           false, false, val, NULL);
+    
+
+    return OMPI_SUCCESS;
+}
+
+
+/*
+ * Query whether this component wants to be used in this process.
+ */
+static int x86_component_init_query(bool enable_progress_threads,
+                                        bool enable_mpi_threads)
+{
+    opal_output(ompi_op_base_output, "x86 component init query");
+
+    /* If we have any hardware and we're not threaded, success */
+    if (0 != mca_op_x86_component.oxc_hw_flags && !enable_mpi_threads) {
+        return OMPI_SUCCESS;
+    }
+    return OMPI_ERR_NOT_SUPPORTED;
+}
+
+
+/*
+ * Query whether this component can be used for a specific op
+ */
+static struct ompi_op_base_module_1_0_0_t *
+    x86_component_op_query(struct ompi_op_t *op, int *priority)
+{
+    ompi_op_base_module_t *module = NULL;
+
+    opal_output(ompi_op_base_output, "x86 component op query");
+
+    /* Sanity check -- although the framework should never invoke the
+       _component_op_query() on non-intrinsic MPI_Op's, we'll put a
+       check here just to be sure. */
+    if (0 == (OMPI_OP_FLAGS_INTRINSIC & op->o_flags)) {
+        opal_output(0, "x86 component op query: not an intrinsic MPI_Op -- skipping");
+        return NULL;
+    }
+
+    /* What follows is an x86 of how to determine whether your
+       component supports the queried MPI_Op.  You can do this lots of
+       different ways; this is but one x86. */
+
+    /* Note that we *do* have the hardware; _component_init_query()
+       would not have returned OMPI_SUCCESS if we didn't have the
+       hardware (and therefore this function would never have been
+       called).  So we don't need to check for the hardware again.
+       Instead, we need to do finer-grained checks (e.g., do we
+       support this op, and if so, what datatypes are supported?).
+
+       So check to see whether this MPI_Op operation is supported on
+       the hardware that this component supports (which may involve
+       querying the hardware to see what it is capable of).
+
+       You can see what operation is being requested by checking the
+       "op->o_f_to_c_index" value against the OMPI_OP_BASE_FORTRAN_*
+       enums.  See ompi/mca/op/op.h for a full list of the
+       OMPI_OP_BASE_FORTRAN_* enums.
+
+       In this x86 component, we support MAX and BXOR. */
+    switch (op->o_f_to_c_index) {
+    case OMPI_OP_BASE_FORTRAN_SUM:
+        /* Corresponds to MPI_SUM */
+        module = ompi_op_x86_setup_sum(op);
+        break;
+    }
+
+    /* If we got a module from above, we'll return it.  Otherwise,
+       we'll return NULL, indicating that this component does not want
+       to be considered for selection for this MPI_Op.  Note that the
+       "setup" functions each returned a *x86* component pointer
+       (vs. a *base* component pointer -- where an *x86* component
+       is a base component plus some other module-specific cached
+       information), so we have to cast it to the right pointer type
+       before returning. */
+    if (NULL != module) {
+        *priority = 25;
+    }
+    return (ompi_op_base_module_1_0_0_t *) module;
+}
--- a/ompi/mca/op/x86/op_x86_module_sum.c
+++ b/ompi/mca/op/x86/op_x86_module_sum.c
@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2007 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2008-2009 Cisco, Inc.  All rights reserved.
+ * $COPYRIGHT$
+ * 
+ * Additional copyrights may follow
+ * 
+ * $HEADER$
+ */
+
+/** @file
+ *
+ * This is the sum module source code.  It contains the "setup"
+ * functions that will create a module for the MPI_SUM MPI_Op.
+ */
+
+#include "ompi_config.h"
+
+#include "opal/class/opal_object.h"
+#include "opal/util/output.h"
+
+#include "ompi/constants.h"
+#include "ompi/op/op.h"
+#include "ompi/mca/op/op.h"
+#include "ompi/mca/op/base/base.h"
+#include "ompi/mca/op/x86/op_x86.h"
+
+/**
+ * SUM module struct, including local cached info
+ */
+typedef struct {
+    ompi_op_base_module_1_0_0_t super;
+
+    /* Fallback function pointers and modules.  Only doing a few types
+       to begin with... will fill in others once we have figured out
+       the basics of the assembly stuff. */
+    ompi_op_base_handler_fn_t fallback_float;
+    ompi_op_base_module_t *fallback_float_module;
+
+    ompi_op_base_handler_fn_t fallback_short;
+    ompi_op_base_module_t *fallback_short_module;
+    ompi_op_base_handler_fn_t fallback_int;
+    ompi_op_base_module_t *fallback_int_module;
+    ompi_op_base_handler_fn_t fallback_long;
+    ompi_op_base_module_t *fallback_long_module;
+} module_sum_t;
+
+/**
+ * Sum module constructor
+ */
+static void module_sum_constructor(module_sum_t *m)
+{
+    m->fallback_float = NULL;
+    m->fallback_float_module = NULL;
+
+    m->fallback_short = NULL;
+    m->fallback_short_module = NULL;
+    m->fallback_int = NULL;
+    m->fallback_int_module = NULL;
+    m->fallback_long = NULL;
+    m->fallback_long_module = NULL;
+}
+
+/**
+ * Sum module destructor
+ */
+static void module_sum_destructor(module_sum_t *m)
+{
+    m->fallback_float = (ompi_op_base_handler_fn_t) 0xdeadbeef;
+    m->fallback_float_module = (ompi_op_base_module_t*) 0xdeadbeef;
+
+    m->fallback_short = (ompi_op_base_handler_fn_t) 0xdeadbeef;
+    m->fallback_short_module = (ompi_op_base_module_t*) 0xdeadbeef;
+    m->fallback_int = (ompi_op_base_handler_fn_t) 0xdeadbeef;
+    m->fallback_int_module = (ompi_op_base_module_t*) 0xdeadbeef;
+    m->fallback_long = (ompi_op_base_handler_fn_t) 0xdeadbeef;
+    m->fallback_long_module = (ompi_op_base_module_t*) 0xdeadbeef;
+}
+
+/**
+ * Setup the class for the sum module, listing:
+ * - the name of the class
+ * - the "parent" of the class
+ * - function pointer for the constructor (or NULL)
+ * - function pointer for the destructor (or NULL)
+ */
+static OBJ_CLASS_INSTANCE(module_sum_t,
+                          ompi_op_base_module_t,
+                          module_sum_constructor,
+                          module_sum_destructor);
+
+/**
+ * Sum function for C float
+ */
+static void sum_float(void *in, void *out, int *count, 
+                      ompi_datatype_t **type, ompi_op_base_module_t *module)
+{
+    module_sum_t *m = (module_sum_t*) module;
+
+    /* Be chatty to the output, just so that we can see that this
+       function was called */
+    opal_output(0, "In x86 sum float function");
+}
+
+/**
+ * Sum function for C short
+ */
+static void sum_short(void *in, void *out, int *count, 
+                      ompi_datatype_t **type, ompi_op_base_module_t *module)
+{
+    module_sum_t *m = (module_sum_t*) module;
+    opal_output(0, "In x86 sum short function");
+}
+
+/**
+ * Sum function for C int
+ */
+static void sum_int(void *in, void *out, int *count, 
+                    ompi_datatype_t **type, ompi_op_base_module_t *module)
+{
+    module_sum_t *m = (module_sum_t*) module;
+    opal_output(0, "In x86 sum int function");
+}
+
+/**
+ * Sum function for C long
+ */
+static void sum_long(void *in, void *out, int *count, 
+                     ompi_datatype_t **type, ompi_op_base_module_t *module)
+{
+    module_sum_t *m = (module_sum_t*) module;
+    opal_output(0, "In x86 sum int function");
+}
+
+/**
+ * Setup function for MPI_SUM.  If we get here, we can assume that a)
+ * the hardware is present, b) the MPI thread scenario is what we
+ * want, and c) the SUM operation is supported.  So this function's
+ * job is to create a module and fill in function pointers for the
+ * functions that this hardware supports.
+ */
+ompi_op_base_module_t *ompi_op_x86_setup_sum(ompi_op_t *op)
+{
+    module_sum_t *module = OBJ_NEW(module_sum_t);
+
+    /* JMS It might be better to set function pointers here based on
+       the hardware (MMX*, SSE@) -- i.e., make first layer decision of
+       which will be used.  I don't know if that's Right, though,
+       because we might want to dispatch to different hardware based
+       on the size of the operation...?  Just recording the idea
+       here... */
+
+    /* Commenting out everything for the moment, just so that we can
+       focus on the hardware detection piece first. */
+#if 0
+    /* C float */
+    module->super.opm_fns[OMPI_OP_BASE_TYPE_FLOAT] = sum_float;
+    module->fallback_float = op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_FLOAT];
+    module->fallback_float_module = 
+        op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_FLOAT];
+    /* If you cache a fallback function, you *must* RETAIN (i.e.,
+       increase the refcount) its module so that the module knows that
+       it is being used and won't be freed/destructed. */
+    OBJ_RETAIN(module->fallback_float_module);
+
+    /* C short */
+    module->super.opm_fns[OMPI_OP_BASE_TYPE_SHORT] = sum_short;
+    module->fallback_short = op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_SHORT];
+    module->fallback_short_module = 
+        op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_SHORT];
+    /* If you cache a fallback function, you *must* RETAIN (i.e.,
+       increase the refcount) its module so that the module knows that
+       it is being used and won't be freed/destructed. */
+    OBJ_RETAIN(module->fallback_short_module);
+
+    /* C int */
+    module->super.opm_fns[OMPI_OP_BASE_TYPE_INT] = sum_int;
+    module->fallback_int = op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_INT];
+    module->fallback_int_module = 
+        op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_INT];
+    /* If you cache a fallback function, you *must* RETAIN (i.e.,
+       increase the refcount) its module so that the module knows that
+       it is being used and won't be freed/destructed. */
+    OBJ_RETAIN(module->fallback_int_module);
+
+    /* C long */
+    module->super.opm_fns[OMPI_OP_BASE_TYPE_LONG] = sum_long;
+    module->fallback_long = op->o_func.intrinsic.fns[OMPI_OP_BASE_TYPE_LONG];
+    module->fallback_long_module = 
+        op->o_func.intrinsic.modules[OMPI_OP_BASE_TYPE_LONG];
+    /* If you cache a fallback function, you *must* RETAIN (i.e.,
+       increase the refcount) its module so that the module knows that
+       it is being used and won't be freed/destructed. */
+    OBJ_RETAIN(module->fallback_long_module);
+#endif
+
+    return (ompi_op_base_module_t*) module;
+}
--- a/ompi/mpi/c/Makefile.am
+++ b/ompi/mpi/c/Makefile.am
@ -9,6 +9,7 @@
 #                         University of Stuttgart.  All rights reserved.
 # Copyright (c) 2004-2005 The Regents of the University of California.
 #                         All rights reserved.
+# Copyright (c) 2009      Cisco Systems, Inc.  All rights reserved
 # $COPYRIGHT$
 # 
 # Additional copyrights may follow
@ -16,8 +17,6 @@
 # $HEADER$
 #

-
-
 SUBDIRS = profile

 #
@ -205,6 +204,7 @@ libmpi_c_mpi_la_SOURCES = \
        recv_init.c \
        recv.c \
        reduce.c \
+        reduce_local.c \
        reduce_scatter.c \
        request_c2f.c \
        request_f2c.c \
--- a/ompi/mpi/c/op_create.c
+++ b/ompi/mpi/c/op_create.c
@ -9,6 +9,7 @@
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
+ * Copyright (c) 2008-2009 Cisco Systems, Inc.  All rights reserved.
 * $COPYRIGHT$
 * 
 * Additional copyrights may follow
@ -32,30 +33,31 @@
 static const char FUNC_NAME[] = "MPI_Op_create";


-int MPI_Op_create(MPI_User_function *function, int commute,
-                  MPI_Op *op) 
+int MPI_Op_create(MPI_User_function * function, int commute, MPI_Op * op)
 {
-  int err = MPI_SUCCESS;
+    int err = MPI_SUCCESS;

-  /* Error checking */
+    /* Error checking */

-  if (MPI_PARAM_CHECK) {
-    OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
-    if (NULL == op) {
-      return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_OP, FUNC_NAME);
-    } else if (NULL == function) {
-      return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME);
+    if (MPI_PARAM_CHECK) {
+        OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
+        if (NULL == op) {
+            return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_OP,
+                                          FUNC_NAME);
+        } else if (NULL == function) {
+            return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG,
+                                          FUNC_NAME);
+        }
    }
-  }

-  OPAL_CR_ENTER_LIBRARY();
+    OPAL_CR_ENTER_LIBRARY();

-  /* Create and cache the op.  Sets a refcount of 1. */
+    /* Create and cache the op.  Sets a refcount of 1. */

-  *op = ompi_op_create(OPAL_INT_TO_BOOL(commute),
-                       (ompi_op_fortran_handler_fn_t*) function);
-  if (NULL == *op) {
-    err = MPI_ERR_INTERN;
-  }
-  OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, MPI_ERR_INTERN, FUNC_NAME);
+    *op = ompi_op_create_user(OPAL_INT_TO_BOOL(commute),
+                              (ompi_op_fortran_handler_fn_t *) function);
+    if (NULL == *op) {
+        err = MPI_ERR_INTERN;
+    }
+    OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, MPI_ERR_INTERN, FUNC_NAME);
 }
--- a/ompi/mpi/c/profile/Makefile.am
+++ b/ompi/mpi/c/profile/Makefile.am
@ -10,6 +10,7 @@
 #                         University of Stuttgart.  All rights reserved.
 # Copyright (c) 2004-2005 The Regents of the University of California.
 #                         All rights reserved.
+# Copyright (c) 2009      Cisco Systems, Inc.  All rights reserved
 # $COPYRIGHT$
 # 
 # Additional copyrights may follow
@ -17,7 +18,6 @@
 # $HEADER$
 #

-
 #
 # OMPI_PROFILING_DEFINES flag s enabled when we want our MPI_* symbols
 # to be replaced by PMPI_*. In other words, this flag decides 
@ -186,6 +186,7 @@ nodist_libmpi_c_pmpi_la_SOURCES = \
        precv_init.c \
        precv.c \
        preduce.c \
+        preduce_local.c \
        preduce_scatter.c \
        prequest_c2f.c \
        prequest_f2c.c \
--- a/ompi/mpi/c/profile/defines.h
+++ b/ompi/mpi/c/profile/defines.h
@ -9,6 +9,7 @@
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
+ * Copyright (c) 2009      Cisco Systems, Inc.  All rights reserved.
 * $COPYRIGHT$
 * 
 * Additional copyrights may follow
@ -231,6 +232,7 @@
 #define MPI_Recv_init PMPI_Recv_init 
 #define MPI_Recv PMPI_Recv 
 #define MPI_Reduce PMPI_Reduce
+#define MPI_Reduce_local PMPI_Reduce_local
 #define MPI_Reduce_scatter PMPI_Reduce_scatter
 #define MPI_Register_datarep PMPI_Register_datarep
 #define MPI_Request_c2f PMPI_Request_c2f
--- a/ompi/mpi/c/reduce_local.c
+++ b/ompi/mpi/c/reduce_local.c
@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2005 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, 
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2006-2009 Cisco Systems, Inc.  All rights reserved.
+ * $COPYRIGHT$
+ * 
+ * Additional copyrights may follow
+ * 
+ * $HEADER$
+ */
+#include "ompi_config.h"
+#include <stdio.h>
+
+#include "ompi/mpi/c/bindings.h"
+#include "ompi/datatype/datatype.h"
+#include "ompi/op/op.h"
+#include "ompi/memchecker.h"
+
+#if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES
+#pragma weak MPI_Reduce_local = PMPI_Reduce_local
+#endif
+
+#if OMPI_PROFILING_DEFINES
+#include "ompi/mpi/c/profile/defines.h"
+#endif
+
+static const char FUNC_NAME[] = "MPI_Reduce_local";
+
+
+int MPI_Reduce_local(void *inbuf, void *inoutbuf, int count, 
+                     MPI_Datatype datatype, MPI_Op op)
+{
+    int err;
+
+    MEMCHECKER(
+        memchecker_datatype(datatype);
+    );
+
+    if (MPI_PARAM_CHECK) {
+        char *msg;
+        err = MPI_SUCCESS;
+        OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
+
+        if (MPI_OP_NULL == op || NULL == op) {
+            err = MPI_ERR_OP;
+        } else if (!ompi_op_is_valid(op, datatype, &msg, FUNC_NAME)) {
+            int ret = OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_OP, msg);
+            free(msg);
+            return ret;
+        } else {
+            OMPI_CHECK_DATATYPE_FOR_SEND(err, datatype, count);
+        }
+    }
+
+    /* If the count is 0, just return */
+    if (0 == count) {
+        return MPI_SUCCESS;
+    }
+
+    OPAL_CR_ENTER_LIBRARY();
+
+    /* Invoke the op component to perform the back-end operation */
+    OBJ_RETAIN(op);
+    ompi_op_reduce(op, inbuf, inoutbuf, count, datatype);
+    OBJ_RELEASE(op);
+
+    OMPI_ERRHANDLER_RETURN(err, MPI_COMM_WORLD, err, FUNC_NAME);
+}
--- a/ompi/mpi/cxx/op.h
+++ b/ompi/mpi/cxx/op.h
@ -10,7 +10,7 @@
 //                         University of Stuttgart.  All rights reserved.
 // Copyright (c) 2004-2005 The Regents of the University of California.
 //                         All rights reserved.
-// Copyright (c) 2006      Cisco Systems, Inc.  All rights reserved.
+// Copyright (c) 2006-2009 Cisco Systems, Inc.  All rights reserved.
 // $COPYRIGHT$
 // 
 // Additional copyrights may follow
@ -47,6 +47,9 @@ public:
  virtual void Init(User_function *func, bool commute);
  virtual void Free();
 
+  virtual void Reduce_local(const void *inbuf, void *inoutbuf, int count, 
+                            const MPI::Datatype& datatype) const;
+
 #if ! 0 /* OMPI_ENABLE_MPI_PROFILING */
 protected:
  MPI_Op mpi_op;
--- a/ompi/mpi/cxx/op_inln.h
+++ b/ompi/mpi/cxx/op_inln.h
@ -10,7 +10,7 @@
 //                         University of Stuttgart.  All rights reserved.
 // Copyright (c) 2004-2005 The Regents of the University of California.
 //                         All rights reserved.
-// Copyright (c) 2006      Cisco Systems, Inc.  All rights reserved.
+// Copyright (c) 2006-2009 Cisco Systems, Inc.  All rights reserved.
 // $COPYRIGHT$
 // 
 // Additional copyrights may follow
@ -34,18 +34,18 @@ MPI::Op::~Op() { }

 inline
 MPI::Op& MPI::Op::operator=(const MPI::Op& op) {
-  pmpi_op = op.pmpi_op; return *this;
+    pmpi_op = op.pmpi_op; return *this;
 }

 // comparison
 inline bool
 MPI::Op::operator== (const MPI::Op &a) {
-  return (bool)(pmpi_op == a.pmpi_op);
+    return (bool)(pmpi_op == a.pmpi_op);
 }

 inline bool
 MPI::Op::operator!= (const MPI::Op &a) {
-  return (bool)!(*this == a);
+    return (bool)!(*this == a);
 }

 // inter-language operability
@ -76,15 +76,15 @@ inline
 MPI::Op::~Op() 
 { 
 #if 0
-  mpi_op = MPI_OP_NULL;
-  op_user_function = 0;
+    mpi_op = MPI_OP_NULL;
+    op_user_function = 0;
 #endif
 }  

 inline MPI::Op&
 MPI::Op::operator=(const MPI::Op& op) {
-  mpi_op = op.mpi_op;
-  return *this;
+    mpi_op = op.mpi_op;
+    return *this;
 }

 // comparison
@ -127,5 +127,14 @@ MPI::Op::Init(MPI::User_function *func, bool commute)
 inline void
 MPI::Op::Free()
 {
-  (void)MPI_Op_free(&mpi_op);
+    (void)MPI_Op_free(&mpi_op);
+}
+
+
+inline void 
+MPI::Op::Reduce_local(const void *inbuf, void *inoutbuf, int count, 
+                      const MPI::Datatype& datatype) const
+{
+    (void)MPI_Reduce_local(const_cast<void*>(inbuf), inoutbuf, count, 
+                           datatype, mpi_op);
 }
--- a/ompi/mpi/f77/Makefile.am
+++ b/ompi/mpi/f77/Makefile.am
@ -9,7 +9,7 @@
 #                         University of Stuttgart.  All rights reserved.
 # Copyright (c) 2004-2005 The Regents of the University of California.
 #                         All rights reserved.
-# Copyright (c) 2006-2008 Cisco Systems, Inc.  All rights reserved.
+# Copyright (c) 2006-2009 Cisco Systems, Inc.  All rights reserved.
 # $COPYRIGHT$
 # 
 # Additional copyrights may follow
@ -211,6 +211,7 @@ libmpi_f77_la_SOURCES += \
        recv_f.c \
        recv_init_f.c \
        reduce_f.c \
+        reduce_local_f.c \
        reduce_scatter_f.c \
        request_free_f.c \
        request_get_status_f.c \
--- a/ompi/mpi/f77/op_create_f.c
+++ b/ompi/mpi/f77/op_create_f.c
@ -9,6 +9,7 @@
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
+ * Copyright (c) 2008-2009 Cisco Systems, Inc.  All rights reserved.
 * $COPYRIGHT$
 * 
 * Additional copyrights may follow
--- a/ompi/mpi/f77/profile/Makefile.am
+++ b/ompi/mpi/f77/profile/Makefile.am
@ -10,6 +10,7 @@
 #                         University of Stuttgart.  All rights reserved.
 # Copyright (c) 2004-2005 The Regents of the University of California.
 #                         All rights reserved.
+# Copyright (c) 2009      Cisco Systems, Inc.  All rights reserved.
 # $COPYRIGHT$
 # 
 # Additional copyrights may follow
@ -178,6 +179,7 @@ nodist_libmpi_f77_pmpi_la_SOURCES = \
        precv_f.c \
        precv_init_f.c \
        preduce_f.c \
+        preduce_local_f.c \
        preduce_scatter_f.c \
        prequest_free_f.c \
        prequest_get_status_f.c \
--- a/ompi/mpi/f77/prototypes_mpi.h
+++ b/ompi/mpi/f77/prototypes_mpi.h
@ -9,7 +9,7 @@
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
- * Copyright (c) 2006-2007 Cisco Systems, Inc.  All rights reserved.
+ * Copyright (c) 2006-2009 Cisco Systems, Inc.  All rights reserved.
 * $COPYRIGHT$
 * 
 * Additional copyrights may follow
@ -251,6 +251,7 @@ PN(void, mpi_query_thread, MPI_QUERY_THREAD, (MPI_Fint *provided, MPI_Fint *ierr
 PN(void, mpi_recv_init, MPI_RECV_INIT, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *request, MPI_Fint *ierr));
 PN(void, mpi_recv, MPI_RECV, (char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *source, MPI_Fint *tag, MPI_Fint *comm, MPI_Fint *status, MPI_Fint *ierr));
 PN(void, mpi_reduce, MPI_REDUCE, (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *root, MPI_Fint *comm, MPI_Fint *ierr));
+PN(void, mpi_reduce_local, MPI_REDUCE_LOCAL, (char *inbuf, char *inoutbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *ierr));
 PN(void, mpi_reduce_scatter, MPI_REDUCE_SCATTER, (char *sendbuf, char *recvbuf, MPI_Fint *recvcounts, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *comm, MPI_Fint *ierr));
 PN(void, mpi_register_datarep, MPI_REGISTER_DATAREP, (char *datarep, ompi_mpi2_fortran_datarep_conversion_fn_t *read_conversion_fn, ompi_mpi2_fortran_datarep_conversion_fn_t *write_conversion_fn, ompi_mpi2_fortran_datarep_extent_fn_t *dtype_file_extent_fn, MPI_Aint *extra_state, MPI_Fint *ierr, int datarep_len));
 PN(void, mpi_request_free, MPI_REQUEST_FREE, (MPI_Fint *request, MPI_Fint *ierr));
--- a/ompi/mpi/f77/reduce_local_f.c
+++ b/ompi/mpi/f77/reduce_local_f.c
@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2005 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2009      Cisco Systems, Inc.  All rights reserved.
+ * $COPYRIGHT$
+ * 
+ * Additional copyrights may follow
+ * 
+ * $HEADER$
+ */
+
+#include "ompi_config.h"
+
+#include "ompi/mpi/f77/bindings.h"
+#include "ompi/mpi/f77/constants.h"
+
+#if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILE_LAYER
+#pragma weak PMPI_REDUCE_LOCAL = mpi_reduce_local_f
+#pragma weak pmpi_reduce_local = mpi_reduce_local_f
+#pragma weak pmpi_reduce_local_ = mpi_reduce_local_f
+#pragma weak pmpi_reduce_local__ = mpi_reduce_local_f
+#elif OMPI_PROFILE_LAYER
+OMPI_GENERATE_F77_BINDINGS (PMPI_REDUCE_LOCAL,
+                           pmpi_reduce_local,
+                           pmpi_reduce_local_,
+                           pmpi_reduce_local__,
+                           pmpi_reduce_local_f,
+                           (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *ierr),
+                           (sendbuf, recvbuf, count, datatype, op, ierr) )
+#endif
+
+#if OMPI_HAVE_WEAK_SYMBOLS
+#pragma weak MPI_REDUCE_LOCAL = mpi_reduce_local_f
+#pragma weak mpi_reduce_local = mpi_reduce_local_f
+#pragma weak mpi_reduce_local_ = mpi_reduce_local_f
+#pragma weak mpi_reduce_local__ = mpi_reduce_local_f
+#endif
+
+#if ! OMPI_HAVE_WEAK_SYMBOLS && ! OMPI_PROFILE_LAYER
+OMPI_GENERATE_F77_BINDINGS (MPI_REDUCE_LOCAL,
+                           mpi_reduce_local,
+                           mpi_reduce_local_,
+                           mpi_reduce_local__,
+                           mpi_reduce_local_f,
+                           (char *sendbuf, char *recvbuf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *ierr),
+                           (sendbuf, recvbuf, count, datatype, op, ierr) )
+#endif
+
+
+#if OMPI_PROFILE_LAYER && ! OMPI_HAVE_WEAK_SYMBOLS
+#include "ompi/mpi/f77/profile/defines.h"
+#endif
+
+void mpi_reduce_local_f(char *inbuf, char *inoutbuf, MPI_Fint *count,
+                        MPI_Fint *datatype, MPI_Fint *op, MPI_Fint *ierr)
+{
+    MPI_Datatype c_type;
+    MPI_Op c_op;
+
+    c_type = MPI_Type_f2c(*datatype);
+    c_op = MPI_Op_f2c(*op);
+
+    inbuf = OMPI_F2C_BOTTOM(inbuf);
+    inoutbuf = OMPI_F2C_BOTTOM(inoutbuf);
+
+    *ierr = OMPI_INT_2_FINT(MPI_Reduce_local(inbuf, inoutbuf,
+                                             OMPI_FINT_2_INT(*count),
+                                             c_type, c_op));
+}
--- a/ompi/mpi/f90/scripts/mpi-f90-interfaces.h.sh
+++ b/ompi/mpi/f90/scripts/mpi-f90-interfaces.h.sh
@ -5,7 +5,7 @@
 #                         Corporation.  All rights reserved.
 # Copyright (c) 2004-2006 The Regents of the University of California.
 #                         All rights reserved.
-# Copyright (c) 2006-2008 Cisco Systems, Inc.  All rights reserved.
+# Copyright (c) 2006-2009 Cisco Systems, Inc.  All rights reserved.
 # $COPYRIGHT$
 # 
 # Additional copyrights may follow
@ -6431,6 +6431,62 @@ end MPI_Reduce

 #------------------------------------------------------------------------

+output_183_local() {
+    if test "$output" = "0"; then
+        return 0
+    fi
+
+    procedure=$1
+    rank=$2
+    type=$4
+    proc="$1$2D$3"
+    cat <<EOF
+
+subroutine ${proc}(inbuf, inout, count, datatype, op, &
+        ierr)
+  ${type}, intent(in) :: inbuf
+  ${type}, intent(out) :: inout
+  integer, intent(in) :: count
+  integer, intent(in) :: datatype
+  integer, intent(in) :: op
+  integer, intent(out) :: ierr
+end subroutine ${proc}
+
+EOF
+}
+
+start MPI_Reduce_local large
+
+for rank in $allranks
+do
+  case "$rank" in  0)  dim=''  ;  esac
+  case "$rank" in  1)  dim=', dimension(*)'  ;  esac
+  case "$rank" in  2)  dim=', dimension(1,*)'  ;  esac
+  case "$rank" in  3)  dim=', dimension(1,1,*)'  ;  esac
+  case "$rank" in  4)  dim=', dimension(1,1,1,*)'  ;  esac
+  case "$rank" in  5)  dim=', dimension(1,1,1,1,*)'  ;  esac
+  case "$rank" in  6)  dim=', dimension(1,1,1,1,1,*)'  ;  esac
+  case "$rank" in  7)  dim=', dimension(1,1,1,1,1,1,*)'  ;  esac
+
+  output_183_local MPI_Reduce_local ${rank} CH "character${dim}"
+  output_183_local MPI_Reduce_local ${rank} L "logical${dim}"
+  for kind in $ikinds
+  do
+    output_183_local MPI_Reduce_local ${rank} I${kind} "integer*${kind}${dim}"
+  done
+  for kind in $rkinds
+  do
+    output_183_local MPI_Reduce_local ${rank} R${kind} "real*${kind}${dim}"
+  done
+  for kind in $ckinds
+  do
+    output_183_local MPI_Reduce_local ${rank} C${kind} "complex*${kind}${dim}"
+  done
+done
+end MPI_Reduce_local
+
+#------------------------------------------------------------------------
+
 output_184() {
    if test "$output" = "0"; then
        return 0
--- a/ompi/mpi/man/man3/MPI_Reduce_local.3in
+++ b/ompi/mpi/man/man3/MPI_Reduce_local.3in
@ -0,0 +1,276 @@
+.\"Copyright 2006-2008 Sun Microsystems, Inc.
+.\"Copyright 2009 Cisco Systems, Inc.  All rights reserved.
+.\" Copyright (c) 1996 Thinking Machines Corporation
+.TH MPI_Reduce_local 3 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
+.SH NAME
+\fBMPI_Reduce_local\fP \- Perform a local reduction
+
+.SH SYNTAX
+.ft R
+.SH C Syntax
+.nf
+#include <mpi.h>
+int MPI_Reduce_local(void *\fIinbuf\fP, void *\fIinoutbuf\fP, int\fI count\fP,
+	MPI_Datatype\fI datatype\fP, MPI_Op\fI op\fP)
+
+.SH Fortran Syntax
+.nf
+INCLUDE 'mpif.h'
+MPI_REDUCE_LOCAL(\fIINBUF, INOUTBUF, COUNT, DATATYPE, OP, IERROR\fP)
+	<type>	\fIINBUF(*), INOUTBUF(*)\fP
+	INTEGER	\fICOUNT, DATATYPE, OP, IERROR\fP 
+
+.SH C++ Syntax
+.nf
+#include <mpi.h>
+void MPI::Op::Reduce_local(const void* \fIinbuf\fP, void* \fIinoutbuf\fP,
+	int \fIcount\fP, const MPI::Datatype& \fIdatatype\fP, const MPI::Op& \fIop\fP) const
+
+.SH INPUT PARAMETERS
+.ft R
+.TP 1i
+inbuf
+Address of input buffer (choice).
+.TP 1i
+count
+Number of elements in input buffer (integer).
+.TP 1i
+datatype
+Data type of elements of input buffer (handle).
+.TP 1i
+op
+Reduce operation (handle).
+
+.SH OUTPUT PARAMETERS
+.ft R
+.TP 1i
+inoutbuf
+Address of in/out buffer (choice).
+.ft R
+.TP 1i
+IERROR
+Fortran only: Error status (integer). 
+
+.SH DESCRIPTION
+.ft R
+.I The MPI_Reduce_local function is proposed for MPI-2.2 and (as of 10 Jan 2009) has not yet been ratified.  Use at your own risk.  See https://svn.mpi-forum.org/trac/mpi-forum-web/ticket/24.
+.sp
+The global reduce functions (MPI_Reduce_local, MPI_Op_create, MPI_Op_free, MPI_Allreduce, MPI_Reduce_local_scatter, MPI_Scan) perform a global reduce operation (such as sum, max, logical AND, etc.) across all the members of a group. The reduction operation can be either one of a predefined list of operations, or a user-defined operation. The global reduction functions come in several flavors: a reduce that returns the result of the reduction at one node, an all-reduce that returns this result at all nodes, and a scan (parallel prefix) operation. In addition, a reduce-scatter operation combines the functionality of a reduce and a scatter operation.
+.sp
+MPI_Reduce_local combines the elements provided in the input and input/output buffers of the local process, using the operation op, and returns the combined value in the inout/output buffer. The input buffer is defined by the arguments inbuf, count, and datatype; the output buffer is defined by the arguments inoutbuf, count, and datatype; both have the same number of elements, with the same type. The routine is a local call.  The process can provide one element, or a sequence of elements, in which case the combine operation is executed element-wise on each entry of the sequence. For example, if the operation is MPI_MAX and the input buffer contains two elements that are floating-point numbers (count = 2 and datatype = MPI_FLOAT), then inoutbuf(1) = global max (inbuf(1)) and inoutbuf(2) = global max(inbuf(2)). 
+.sp
+.SH USE OF IN-PLACE OPTION
+The use of MPI_IN_PLACE is disallowed with MPI_Reduce_local.
+.sp  
+.SH PREDEFINED REDUCE OPERATIONS
+.sp
+The set of predefined operations provided by MPI is listed below (Predefined Reduce Operations). That section also enumerates the datatypes each operation can be applied to. In addition, users may define their own operations that can be overloaded to operate on several datatypes, either basic or derived. This is further explained in the description of the user-defined operations (see the man pages for MPI_Op_create and MPI_Op_free).
+.sp
+The operation op is always assumed to be associative. All predefined operations are also assumed to be commutative. Users may define operations that are assumed to be associative, but not commutative. The ``canonical'' evaluation order of a reduction is determined by the ranks of the processes in the group. However, the implementation can take advantage of associativity, or associativity and commutativity, in order to change the order of evaluation. This may change the result of the reduction for operations that are not strictly associative and commutative, such as floating point addition.  
+.sp
+Predefined operators work only with the MPI types listed below (Predefined Reduce Operations, and the section MINLOC and MAXLOC, below).  User-defined operators may operate on general, derived datatypes. In this case, each argument that the reduce operation is applied to is one element described by such a datatype, which may contain several basic values. This is further explained in Section 4.9.4 of the MPI Standard, "User-Defined Operations."
+
+The following predefined operations are supplied for MPI_Reduce_local and related functions MPI_Allreduce, MPI_Reduce_scatter, and MPI_Scan. These operations are invoked by placing the following in op:
+.sp
+.nf
+	Name                Meaning 
+     ---------           --------------------
+	MPI_MAX             maximum 
+	MPI_MIN             minimum 
+	MPI_SUM             sum 
+	MPI_PROD            product 
+	MPI_LAND            logical and 
+	MPI_BAND            bit-wise and 
+	MPI_LOR             logical or 
+	MPI_BOR             bit-wise or 
+	MPI_LXOR            logical xor 
+	MPI_BXOR            bit-wise xor 
+	MPI_MAXLOC          max value and location 
+	MPI_MINLOC          min value and location 
+.fi
+.sp
+The two operations MPI_MINLOC and MPI_MAXLOC are discussed separately below (MINLOC and MAXLOC). For the other predefined operations, we enumerate below the allowed combinations of op and datatype arguments. First, define groups of MPI basic datatypes in the following way:
+.sp
+.nf
+	C integer:            MPI_INT, MPI_LONG, MPI_SHORT, 
+	                      MPI_UNSIGNED_SHORT, MPI_UNSIGNED, 
+	                      MPI_UNSIGNED_LONG 
+	Fortran integer:      MPI_INTEGER 
+	Floating-point:       MPI_FLOAT, MPI_DOUBLE, MPI_REAL, 
+	                      MPI_DOUBLE_PRECISION, MPI_LONG_DOUBLE 
+	Logical:              MPI_LOGICAL 
+	Complex:              MPI_COMPLEX 
+	Byte:                 MPI_BYTE 
+.fi
+.sp
+Now, the valid datatypes for each option is specified below.
+.sp
+.nf
+	Op                      	Allowed Types 
+     ----------------         ---------------------------
+	MPI_MAX, MPI_MIN		C integer, Fortran integer, 
+						floating-point 
+
+	MPI_SUM, MPI_PROD 		C integer, Fortran integer, 
+						floating-point, complex 
+
+	MPI_LAND, MPI_LOR,		C integer, logical 
+	MPI_LXOR
+
+	MPI_BAND, MPI_BOR,		C integer, Fortran integer, byte 
+	MPI_BXOR
+.fi
+.sp
+.SH MINLOC AND MAXLOC
+.ft R
+The operator MPI_MINLOC is used to compute a global minimum and also an index attached to the minimum value. MPI_MAXLOC similarly computes a global maximum and index. One application of these is to compute a global minimum (maximum) and the rank of the process containing this value.   
+
+.sp
+The operation that defines MPI_MAXLOC is 
+.sp
+.nf
+         ( u )    (  v )      ( w )
+         (   )  o (    )   =  (   )
+         ( i )    (  j )      ( k )
+
+where
+
+    w = max(u, v)
+
+and
+
+         ( i            if u > v
+         (
+   k   = ( min(i, j)    if u = v
+         (
+         (  j           if u < v) 
+
+
+MPI_MINLOC is defined similarly:
+
+         ( u )    (  v )      ( w )
+         (   )  o (    )   =  (   )
+         ( i )    (  j )      ( k )
+
+where
+
+    w = max(u, v)
+
+and
+
+         ( i            if u < v
+         (
+   k   = ( min(i, j)    if u = v
+         (
+         (  j           if u > v) 
+
+
+.fi
+.sp
+
+Both operations are associative and commutative. Note that if MPI_MAXLOC is
+applied to reduce a sequence of pairs (u(0), 0), (u(1), 1),\ ..., (u(n-1),
+n-1), then the value returned is (u , r), where u= max(i) u(i) and r is
+the index of the first global maximum in the sequence. Thus, if each
+process supplies a value and its rank within the group, then a reduce
+operation with op = MPI_MAXLOC will return the maximum value and the rank
+of the first process with that value. Similarly, MPI_MINLOC can be used to
+return a minimum and its index. More generally, MPI_MINLOC computes a
+lexicographic minimum, where elements are ordered according to the first
+component of each pair, and ties are resolved according to the second
+component.
+.sp
+The reduce operation is defined to operate on arguments that consist of a
+pair: value and index. For both Fortran and C, types are provided to
+describe the pair. The potentially mixed-type nature of such arguments is a
+problem in Fortran. The problem is circumvented, for Fortran, by having the
+MPI-provided type consist of a pair of the same type as value, and coercing
+the index to this type also. In C, the MPI-provided pair type has distinct
+types and the index is an int.
+.sp
+In order to use MPI_MINLOC and MPI_MAXLOC in a reduce operation, one must
+provide a datatype argument that represents a pair (value and index). MPI
+provides nine such predefined datatypes. The operations MPI_MAXLOC and
+MPI_MINLOC can be used with each of the following datatypes:
+.sp
+.nf
+    Fortran: 
+    Name                     Description 
+    MPI_2REAL                pair of REALs 
+    MPI_2DOUBLE_PRECISION    pair of DOUBLE-PRECISION variables 
+    MPI_2INTEGER             pair of INTEGERs 
+    
+    C: 		
+    Name        	    	Description 
+    MPI_FLOAT_INT            float and int 
+    MPI_DOUBLE_INT           double and int 
+    MPI_LONG_INT             long and int 
+    MPI_2INT                 pair of ints 
+    MPI_SHORT_INT            short and int 
+    MPI_LONG_DOUBLE_INT      long double and int
+.fi
+.sp
+The data type MPI_2REAL is equivalent to:
+.nf
+    MPI_TYPE_CONTIGUOUS(2, MPI_REAL, MPI_2REAL)     
+.fi
+.sp
+Similar statements apply for MPI_2INTEGER, MPI_2DOUBLE_PRECISION, and
+MPI_2INT.
+.sp 
+The datatype MPI_FLOAT_INT is as if defined by the following sequence of
+instructions.
+.sp
+.nf
+    type[0] = MPI_FLOAT 
+    type[1] = MPI_INT 
+    disp[0] = 0 
+    disp[1] = sizeof(float) 
+    block[0] = 1 
+    block[1] = 1 
+    MPI_TYPE_STRUCT(2, block, disp, type, MPI_FLOAT_INT)
+.fi
+.sp
+Similar statements apply for MPI_LONG_INT and MPI_DOUBLE_INT.  
+.sp
+All MPI objects (e.g., MPI_Datatype, MPI_Comm) are of type INTEGER in Fortran.
+.SH NOTES ON COLLECTIVE OPERATIONS
+
+The reduction operators (
+.I MPI_Op
+) do not return an error value.  As a result,
+if the functions detect an error, all they can do is either call 
+.I MPI_Abort
+or silently skip the problem.  Thus, if you change the error handler from
+.I MPI_ERRORS_ARE_FATAL
+to something else, for example, 
+.I MPI_ERRORS_RETURN
+,
+then no error may be indicated.
+
+The reason for this is the performance problems in ensuring that
+all collective routines return the same error value.
+
+.SH ERRORS
+Almost all MPI routines return an error value; C routines as the value of the function and Fortran routines in the last argument. C++ functions do not return errors. If the default error handler is set to MPI::ERRORS_THROW_EXCEPTIONS, then on error the C++ exception mechanism will be used to throw an MPI:Exception object.
+.sp
+Before the error value is returned, the current MPI error handler is
+called. By default, this error handler aborts the MPI job, except for I/O function errors. The error handler may be changed with MPI_Comm_set_errhandler; the predefined error handler MPI_ERRORS_RETURN may be used to cause error values to be returned. Note that MPI does not guarantee that an MPI program can continue past an error.  
+
+.SH SEE ALSO
+.ft R
+.sp
+MPI_Allreduce
+.br
+MPI_Reduce
+.br
+MPI_Reduce_scatter
+.br
+MPI_Scan
+.br
+MPI_Op_create
+.br
+MPI_Op_free
+
+
+
--- a/ompi/mpi/man/man3/Makefile.extra
+++ b/ompi/mpi/man/man3/Makefile.extra
@ -1,5 +1,5 @@
 # -*- makefile -*-
-# Copyright (c) 2006-2008 Cisco Systems, Inc.  All rights reserved.
+# Copyright (c) 2006-2009 Cisco Systems, Inc.  All rights reserved.
 # Copyright (c) 2008      Sun Microsystems, Inc.  All rights reserved.
 # $COPYRIGHT$
 # 
@ -216,6 +216,7 @@ mpi_api_man_pages = \
        mpi/man/man3/MPI_Recv.3 \
        mpi/man/man3/MPI_Recv_init.3 \
        mpi/man/man3/MPI_Reduce.3 \
+        mpi/man/man3/MPI_Reduce_local.3 \
        mpi/man/man3/MPI_Reduce_scatter.3 \
        mpi/man/man3/MPI_Register_datarep.3 \
        mpi/man/man3/MPI_Request_c2f.3 \
--- a/ompi/op/Makefile.am
+++ b/ompi/op/Makefile.am
@ -10,6 +10,7 @@
 #                         University of Stuttgart.  All rights reserved.
 # Copyright (c) 2004-2005 The Regents of the University of California.
 #                         All rights reserved.
+# Copyright (c) 2008-2009 Cisco Systems, Inc.  All rights reserved.
 # $COPYRIGHT$
 # 
 # Additional copyrights may follow
@ -17,12 +18,9 @@
 # $HEADER$
 #

-# This makefile.am does not stand on its own - it is included from ompi/Makefile.am
+# This makefile.am does not stand on its own - it is included from
+# ompi/Makefile.am

-headers += \
-        op/op.h \
-        op/op_predefined.h
+headers += op/op.h

-libmpi_la_SOURCES += \
-        op/op.c \
-        op/op_predefined.c
+libmpi_la_SOURCES += op/op.c
--- a/ompi/op/op.c
+++ b/ompi/op/op.c
--- a/ompi/op/op.h
+++ b/ompi/op/op.h
@ -11,7 +11,7 @@
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
 * Copyright (c) 2008      UT-Battelle, LLC
- * Copyright (c) 2008      Cisco Systems, Inc.  All rights reserved.
+ * Copyright (c) 2008-2009 Cisco Systems, Inc.  All rights reserved.
 * $COPYRIGHT$
 * 
 * Additional copyrights may follow
@ -29,214 +29,46 @@

 #include "ompi_config.h"

-#include "mpi.h"
-#include "ompi/datatype/datatype.h"
-#include "opal/class/opal_object.h"
-#include "ompi/mpi/f77/fint_2_int.h"
-
 #include <stdio.h>

+#include "mpi.h"
+
+#include "opal/class/opal_object.h"
+
+#include "ompi/datatype/datatype.h"
+#include "ompi/mpi/f77/fint_2_int.h"
+#include "ompi/mca/op/op.h"
+
 BEGIN_C_DECLS

 /**
- * Fortran handles; must be [manually set to be] equivalent to the
- * values in mpif.h.
- */
-enum {
-    /** Corresponds to Fortran MPI_OP_NULL */
-    OMPI_OP_FORTRAN_NULL = 0,
-    /** Corresponds to Fortran MPI_MAX */
-    OMPI_OP_FORTRAN_MAX,
-    /** Corresponds to Fortran MPI_MIN */
-    OMPI_OP_FORTRAN_MIN,
-    /** Corresponds to Fortran MPI_SUM */
-    OMPI_OP_FORTRAN_SUM,
-    /** Corresponds to Fortran MPI_PROD */
-    OMPI_OP_FORTRAN_PROD,
-    /** Corresponds to Fortran MPI_LAND */
-    OMPI_OP_FORTRAN_LAND,
-    /** Corresponds to Fortran MPI_BAND */
-    OMPI_OP_FORTRAN_BAND,
-    /** Corresponds to Fortran MPI_LOR */
-    OMPI_OP_FORTRAN_LOR,
-    /** Corresponds to Fortran MPI_BOR */
-    OMPI_OP_FORTRAN_BOR,
-    /** Corresponds to Fortran MPI_LXOR */
-    OMPI_OP_FORTRAN_LXOR,
-    /** Corresponds to Fortran MPI_BXOR */
-    OMPI_OP_FORTRAN_BXOR,
-    /** Corresponds to Fortran MPI_MAXLOC */
-    OMPI_OP_FORTRAN_MAXLOC,
-    /** Corresponds to Fortran MPI_MINLOC */
-    OMPI_OP_FORTRAN_MINLOC,
-    /** Corresponds to Fortran MPI_REPLACE */
-    OMPI_OP_FORTRAN_REPLACE,
-
-    /** Maximum value */
-    OMPI_OP_FORTRAN_MAX_TYPE
-};
-
-/**
- * Corresponding to the types that we can reduce over.  See
- * MPI-1:4.9.2, p114-115 and
- * MPI-2:4.15, p76-77
- */
-enum {
-    /** C integer: unsigned char */
-    OMPI_OP_TYPE_UNSIGNED_CHAR,
-    /** C integer: signed char */
-    OMPI_OP_TYPE_SIGNED_CHAR,
-    /** C integer: int */
-    OMPI_OP_TYPE_INT,
-    /** C integer: long */
-    OMPI_OP_TYPE_LONG,
-    /** C integer: short */
-    OMPI_OP_TYPE_SHORT,
-    /** C integer: unsigned short */
-    OMPI_OP_TYPE_UNSIGNED_SHORT,
-    /** C integer: unsigned */
-    OMPI_OP_TYPE_UNSIGNED,
-    /** C integer: unsigned long */
-    OMPI_OP_TYPE_UNSIGNED_LONG,
-
-    /** C integer: long long int (optional) */
-    OMPI_OP_TYPE_LONG_LONG_INT,
-    /** C integer: unsigned long long (optional) */
-    OMPI_OP_TYPE_UNSIGNED_LONG_LONG,
-
-    /** Fortran integer */
-    OMPI_OP_TYPE_INTEGER,
-    /** Fortran integer*1 */
-    OMPI_OP_TYPE_INTEGER1,
-    /** Fortran integer*2 */
-    OMPI_OP_TYPE_INTEGER2,
-    /** Fortran integer*4 */
-    OMPI_OP_TYPE_INTEGER4,
-    /** Fortran integer*8 */
-    OMPI_OP_TYPE_INTEGER8,
-    /** Fortran integer*16 */
-    OMPI_OP_TYPE_INTEGER16,
-
-    /** Floating point: float */
-    OMPI_OP_TYPE_FLOAT,
-    /** Floating point: double */
-    OMPI_OP_TYPE_DOUBLE,
-    /** Floating point: real */
-    OMPI_OP_TYPE_REAL,
-    /** Floating point: real*2 */
-    OMPI_OP_TYPE_REAL2,
-    /** Floating point: real*4 */
-    OMPI_OP_TYPE_REAL4,
-    /** Floating point: real*8 */
-    OMPI_OP_TYPE_REAL8,
-    /** Floating point: real*16 */
-    OMPI_OP_TYPE_REAL16,
-    /** Floating point: double precision */
-    OMPI_OP_TYPE_DOUBLE_PRECISION,
-    /** Floating point: long double */
-    OMPI_OP_TYPE_LONG_DOUBLE,
-
-    /** Logical */
-    OMPI_OP_TYPE_LOGICAL,
-    /** Bool */
-    OMPI_OP_TYPE_BOOL,
-
-    /** Complex */
-    OMPI_OP_TYPE_COMPLEX,
-    /** Double complex */
-    OMPI_OP_TYPE_DOUBLE_COMPLEX,
-    /** Complex8 */
-    OMPI_OP_TYPE_COMPLEX8,
-    /** Complex16 */
-    OMPI_OP_TYPE_COMPLEX16,
-    /** Complex32 */
-    OMPI_OP_TYPE_COMPLEX32,
-
-    /** Byte */
-    OMPI_OP_TYPE_BYTE,
-
-    /** 2 location Fortran: 2 real */
-    OMPI_OP_TYPE_2REAL,
-    /** 2 location Fortran: 2 double precision */
-    OMPI_OP_TYPE_2DOUBLE_PRECISION,
-    /** 2 location Fortran: 2 integer */
-    OMPI_OP_TYPE_2INTEGER,
-
-    /** 2 location C: float int */
-    OMPI_OP_TYPE_FLOAT_INT,
-    /** 2 location C: double int */
-    OMPI_OP_TYPE_DOUBLE_INT,
-    /** 2 location C: long int */
-    OMPI_OP_TYPE_LONG_INT,
-    /** 2 location C: int int */
-    OMPI_OP_TYPE_2INT,
-    /** 2 location C: short int */
-    OMPI_OP_TYPE_SHORT_INT,
-    /** 2 location C: long double int */
-    OMPI_OP_TYPE_LONG_DOUBLE_INT,
-
-    /** 2 location C: wchar_t */
-    OMPI_OP_TYPE_WCHAR,
-
-    /** Maximum type */
-    OMPI_OP_TYPE_MAX
-};
-
-
-/**
- * Typedef for C op functions.  
+ * Typedef for C op functions for user-defined MPI_Ops.
 *
 * We don't use MPI_User_function because this would create a
 * confusing dependency loop between this file and mpi.h.  So this is
 * repeated code, but it's better this way (and this typedef will
 * never change, so there's not much of a maintenance worry).
 */
-typedef void (ompi_op_c_handler_fn_t) (void *, void *, int *,
-                                       MPI_Datatype *);
-
-/*
- *  Three buffer ( two input and one output) function prototype
- */
-typedef void (ompi_op_3buff_c_handler_fn_t) (void *restrict,
-                                             void *restrict,
-                                             void *restrict, int *,
-                                             MPI_Datatype *);
-
+typedef void (ompi_op_c_handler_fn_t)(void *, void *, int *,
+                                      struct ompi_datatype_t **);

 /**
- * Typedef for fortran op functions.
+ * Typedef for fortran user-defined MPI_Ops.
 */
-typedef void (ompi_op_fortran_handler_fn_t) (void *, void *,
-                                             MPI_Fint *, MPI_Fint *);
-/*
- * Three buffer (2 input one output) function prototype
- */
-typedef void (ompi_op_3buff_fortran_handler_fn_t) (void *restrict,
-                                                   void *restrict,
-                                                   void *restrict,
-                                                   MPI_Fint *, MPI_Fint *);
-
+typedef void (ompi_op_fortran_handler_fn_t)(void *, void *,
+                                            MPI_Fint *, MPI_Fint *);

 /**
- * Typedef for C++ op functions intercept.
+ * Typedef for C++ op functions intercept (used for user-defined
+ * MPI::Ops).
 *
 * See the lengthy explanation for why this is different than the C
 * intercept in ompi/mpi/cxx/intercepts.cc in the
 * ompi_mpi_cxx_op_intercept() function.
 */
-typedef void (ompi_op_cxx_handler_fn_t) (void *, void *, int *,
-                                         MPI_Datatype *,
-                                         MPI_User_function * op);
-
-/*
- * Three buffer (two input, one output) function prototype
- */
-typedef void (ompi_op_3buff_cxx_handler_fn_t) (void *restrict,
-                                               void *restrict,
-                                               void *restrict, int *,
-                                               MPI_Datatype *,
-                                               MPI_User_function * op);
-
+typedef void (ompi_op_cxx_handler_fn_t)(void *, void *, int *,
+                                        struct ompi_datatype_t **,
+                                        MPI_User_function * op);

 /*
 * Flags for MPI_Op
@ -274,36 +106,37 @@ struct ompi_op_t {
    /** Flags about the op */
    uint32_t o_flags;

-    /** Array of function pointers, indexed on the operation type.
-        For non-intrinsice MPI_Op's, only the 0th element will be
-        meaningful. */
-    union {
-      /** C handler function pointer */
-        ompi_op_c_handler_fn_t *c_fn;
-      /** Fortran handler function pointer */
-        ompi_op_fortran_handler_fn_t *fort_fn;
-      /** C++ intercept function pointer -- see lengthy comment in
-          ompi/mpi/cxx/intercepts.cc::ompi_mpi_cxx_op_intercept() for
-          an explanation */
-        ompi_op_cxx_handler_fn_t *cxx_intercept_fn;
-    } o_func[OMPI_OP_TYPE_MAX];
-
    /** Index in Fortran <-> C translation array */
    int o_f_to_c_index;

-    /** Array of three buffer function pointers, indexed on the
-        operation type.  For non-intrinsice MPI_Op's, only the 0th
-        element will be meaningful. */
+    /** Union holding (2-buffer functions):
+        1. Function pointers for all supported datatypes when this op
+           is intrinsic
+        2. Function pointers for when this op is user-defined (only
+           need one function pointer for this; we call it for *all*
+           datatypes, even intrinsics)
+     */
    union {
-      /** C handler function pointer */
-        ompi_op_3buff_c_handler_fn_t *c_fn;
-      /** Fortran handler function pointer */
-        ompi_op_3buff_fortran_handler_fn_t *fort_fn;
-      /** C++ intercept function pointer -- see lengthy comment in
-          ompi/mpi/cxx/intercepts.cc::ompi_mpi_cxx_op_intercept() for
-          an explanation */
-        ompi_op_3buff_cxx_handler_fn_t *cxx_intercept_fn;
-    } o_3buff_func[OMPI_OP_TYPE_MAX];
+        /** Function/module pointers for intrinsic ops */
+        ompi_op_base_op_fns_t intrinsic;
+        /** C handler function pointer */
+        ompi_op_c_handler_fn_t *c_fn;
+        /** Fortran handler function pointer */
+        ompi_op_fortran_handler_fn_t *fort_fn;
+        /** C++ intercept function data -- see lengthy comment in
+            ompi/mpi/cxx/intercepts.cc::ompi_mpi_cxx_op_intercept() for
+            an explanation */
+        struct {
+            /* The user's function (it's the wrong type, but that's ok) */
+            ompi_op_c_handler_fn_t *user_fn;
+            /* The OMPI C++ callback/intercept function */
+            ompi_op_cxx_handler_fn_t *intercept_fn;
+        } cxx_data;
+    } o_func;
+    
+    /** 3-buffer functions, which is only for intrinsic ops.  No need
+        for the C/C++/Fortran user-defined functions. */
+    ompi_op_base_op_3buff_fns_t o_3buff_intrinsic;
 };

 /**
@ -442,7 +275,8 @@ int ompi_op_init(void);
 int ompi_op_finalize(void);

 /**
- * Create a ompi_op_t
+ * Create a ompi_op_t with a user-defined callback (vs. creating an
+ * intrinsic ompi_op_t).
 *
 * @param commute Boolean indicating whether the operation is
 *        communative or not
@ -452,8 +286,8 @@ int ompi_op_finalize(void);
 *   created and returned
 *
 * This function is called as the back-end of all the MPI_OP_CREATE
- * functions.  It creates a new ompi_op_t object, initializes it to
- * the correct object type, and sets the callback function on it.
+ * function.  It creates a new ompi_op_t object, initializes it to the
+ * correct object type, and sets the callback function on it.
 *
 * The type of the function pointer is (arbitrarily) the fortran
 * function handler type.  Since this function has to accept 2
@ -467,8 +301,8 @@ int ompi_op_finalize(void);
 * wrapper for MPI_OP_CREATE is expected to reset this flag to true
 * manually.
 */
-ompi_op_t *ompi_op_create(bool commute,
-                          ompi_op_fortran_handler_fn_t * func);
+ompi_op_t *ompi_op_create_user(bool commute,
+                               ompi_op_fortran_handler_fn_t func);

 /**
 * Mark an MPI_Op as holding a C++ callback function, and cache
@ -558,11 +392,8 @@ static inline bool ompi_op_is_valid(ompi_op_t * op, ompi_datatype_t * ddt,
    if (ompi_op_is_intrinsic(op)) {
        if (ompi_ddt_is_predefined(ddt)) {
            /* Intrinsic ddt on intrinsic op */
-            if ((-1 == ompi_op_ddt_map[ddt->id] ||
-                 (0 != (op->o_flags & OMPI_OP_FLAGS_FORTRAN_FUNC) &&
-                  NULL == op->o_func[ompi_op_ddt_map[ddt->id]].fort_fn) ||
-                 (0 == (op->o_flags & OMPI_OP_FLAGS_FORTRAN_FUNC) &&
-                  NULL == op->o_func[ompi_op_ddt_map[ddt->id]].c_fn))) {
+            if (-1 == ompi_op_ddt_map[ddt->id] ||
+                NULL == op->o_func.intrinsic.fns[ompi_op_ddt_map[ddt->id]]) {
                asprintf(msg,
                         "%s: the reduction operation %s is not defined on the %s datatype",
                         func, op->o_name, ddt->name);
@ -627,29 +458,31 @@ static inline void ompi_op_reduce(ompi_op_t * op, void *source,
    /*
     * Call the reduction function.  Two dimensions: a) if both the op
     * and the datatype are intrinsic, we have a series of predefined
-     * functions for each datatype, b) if the op has a fortran callback
-     * function or not.
+     * functions for each datatype (that are *only* in C -- not
+     * Fortran or C++!), or b) the op is user-defined, and therefore
+     * we have to check whether to invoke the callback with the C,
+     * C++, or Fortran callback signature (see lengthy description of
+     * the C++ callback in ompi/mpi/cxx/intercepts.cc).
     *
-     * NOTE: We assume here that we will get a valid result back from
-     * the ompi_op_ddt_map[] (and not -1) -- if we do, then the
-     * parameter check in the top-level MPI function should have caught
-     * it.  If we get -1 because the top-level parameter check is turned
+     * NOTE: We *assume* the following:
+     *
+     * 1. If the op is intrinsic, the op is pre-defined
+     * 2. That we will get a valid result back from the
+     * ompi_op_ddt_map[] (and not -1).
+     *
+     * Failures in these assumptions should have been caught by the
+     * upper layer (i.e., they should never have called this
+     * function).  If either of these assumptions are wrong, it's
+     * likely that the MPI API function parameter checking is turned
     * off, then it's an erroneous program and it's the user's fault.
     * :-)
     */

-    if (0 != (op->o_flags & OMPI_OP_FLAGS_INTRINSIC) &&
-        ompi_ddt_is_predefined(dtype)) {
-        if (0 != (op->o_flags & OMPI_OP_FLAGS_FORTRAN_FUNC)) {
-            f_dtype = OMPI_INT_2_FINT(dtype->d_f_to_c_index);
-            f_count = OMPI_INT_2_FINT(count);
-            op->o_func[ompi_op_ddt_map[dtype->id]].fort_fn(source, target,
-                                                           &f_count,
-                                                           &f_dtype);
-        } else {
-            op->o_func[ompi_op_ddt_map[dtype->id]].c_fn(source, target,
-                                                        &count, &dtype);
-        }
+    /* For intrinsics, we also pass the corresponding op module */
+    if (0 != (op->o_flags & OMPI_OP_FLAGS_INTRINSIC)) {
+        op->o_func.intrinsic.fns[ompi_op_ddt_map[dtype->id]](source, target,
+                                                             &count, &dtype,
+                                                             op->o_func.intrinsic.modules[ompi_op_ddt_map[dtype->id]]);
    }

    /* User-defined function */
@ -657,12 +490,12 @@ static inline void ompi_op_reduce(ompi_op_t * op, void *source,
    else if (0 != (op->o_flags & OMPI_OP_FLAGS_FORTRAN_FUNC)) {
        f_dtype = OMPI_INT_2_FINT(dtype->d_f_to_c_index);
        f_count = OMPI_INT_2_FINT(count);
-        op->o_func[0].fort_fn(source, target, &f_count, &f_dtype);
+        op->o_func.fort_fn(source, target, &f_count, &f_dtype);
    } else if (0 != (op->o_flags & OMPI_OP_FLAGS_CXX_FUNC)) {
-        op->o_func[0].cxx_intercept_fn(source, target, &count, &dtype,
-                                       op->o_func[1].c_fn);
+        op->o_func.cxx_data.intercept_fn(source, target, &count, &dtype,
+                                         op->o_func.cxx_data.user_fn);
    } else {
-        op->o_func[0].c_fn(source, target, &count, &dtype);
+        op->o_func.c_fn(source, target, &count, &dtype);
    }
 }

@ -685,23 +518,14 @@ static inline void ompi_op_reduce(ompi_op_t * op, void *source,
 * with the values in the source buffer and the result is stored in
 * the target buffer).
 *
- * This function figures out which reduction operation function to
- * invoke and whether to invoke it with C- or Fortran-style invocation
- * methods.  If the op is intrinsic and has the operation defined for
- * dtype, the appropriate back-end function will be invoked.
- * Otherwise, the op is assumed to be a user op and the first function
- * pointer in the op array will be used.
+ * This function will *only* be invoked on intrinsic MPI_Ops.
 *
- * NOTE: This function assumes that a correct combination will be
- * given to it; it makes no provision for errors (in the name of
- * optimization).  If you give it an intrinsic op with a datatype that
- * is not defined to have that operation, it is likely to seg fault.
+ * Otherwise, this function is the same as ompi_op_reduce.
 */
 static inline void ompi_3buff_op_reduce(ompi_op_t * op, void *source1,
                                        void *source2, void *target,
                                        int count, ompi_datatype_t * dtype)
 {
-    MPI_Fint f_dtype, f_count;
    void *restrict src1;
    void *restrict src2;
    void *restrict tgt;
@ -709,49 +533,10 @@ static inline void ompi_3buff_op_reduce(ompi_op_t * op, void *source1,
    src2 = source2;
    tgt = target;

-    /*
-     * Call the reduction function.  Two dimensions: a) if both the op
-     * and the datatype are intrinsic, we have a series of predefined
-     * functions for each datatype, b) if the op has a fortran callback
-     * function or not.
-     *
-     * NOTE: We assume here that we will get a valid result back from
-     * the ompi_op_ddt_map[] (and not -1) -- if we do, then the
-     * parameter check in the top-level MPI function should have caught
-     * it.  If we get -1 because the top-level parameter check is turned
-     * off, then it's an erroneous program and it's the user's fault.
-     * :-)
-     */
-
-    if (0 != (op->o_flags & OMPI_OP_FLAGS_INTRINSIC) &&
-        ompi_ddt_is_predefined(dtype)) {
-        if (0 != (op->o_flags & OMPI_OP_FLAGS_FORTRAN_FUNC)) {
-            f_dtype = OMPI_INT_2_FINT(dtype->d_f_to_c_index);
-            f_count = OMPI_INT_2_FINT(count);
-            op->o_3buff_func[ompi_op_ddt_map[dtype->id]].fort_fn(src1,
-                                                                 src2, tgt,
-                                                                 &f_count,
-                                                                 &f_dtype);
-        } else {
-            op->o_3buff_func[ompi_op_ddt_map[dtype->id]].c_fn(src1, src2,
-                                                              tgt, &count,
-                                                              &dtype);
-        }
-    }
-
-    /* User-defined function - this can't work, will never be called.
-     *  need to take this out soon. */
-
-    else if (0 != (op->o_flags & OMPI_OP_FLAGS_FORTRAN_FUNC)) {
-        f_dtype = OMPI_INT_2_FINT(dtype->d_f_to_c_index);
-        f_count = OMPI_INT_2_FINT(count);
-        op->o_3buff_func[0].fort_fn(src1, src2, tgt, &f_count, &f_dtype);
-    } else if (0 != (op->o_flags & OMPI_OP_FLAGS_CXX_FUNC)) {
-        op->o_3buff_func[0].cxx_intercept_fn(src1, src2, tgt, &count,
-                                             &dtype, op->o_func[1].c_fn);
-    } else {
-        op->o_3buff_func[0].c_fn(src1, src2, tgt, &count, &dtype);
-    }
+    op->o_3buff_intrinsic.fns[ompi_op_ddt_map[dtype->id]](src1, src2,
+                                                          tgt, &count,
+                                                          &dtype,
+                                                          op->o_3buff_intrinsic.modules[ompi_op_ddt_map[dtype->id]]);
 }

 END_C_DECLS
--- a/ompi/runtime/ompi_mpi_init.c
+++ b/ompi/runtime/ompi_mpi_init.c
@ -65,6 +65,8 @@
 #include "ompi/errhandler/errcode.h"
 #include "ompi/request/request.h"
 #include "ompi/op/op.h"
+#include "ompi/mca/op/op.h"
+#include "ompi/mca/op/base/base.h"
 #include "ompi/file/file.h"
 #include "ompi/attribute/attribute.h"
 #include "ompi/mca/allocator/base/base.h"
@ -451,16 +453,25 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
        goto error;
    }

-    /* initialize ops. This has to be done *after* ddt_init, but
-       befor mca_coll_base_open, since come collective modules
-       (e.g. the hierarchical) need them in the query function
-    */
+    /* Initialize the op framework. This has to be done *after*
+       ddt_init, but befor mca_coll_base_open, since some collective
+       modules (e.g., the hierarchical coll component) may need ops in
+       their query function. */
+    if (OMPI_SUCCESS != (ret = ompi_op_base_open())) {
+        error = "ompi_op_base_open() failed";
+        goto error;
+    }
+    if (OMPI_SUCCESS != 
+        (ret = ompi_op_base_find_available(OMPI_ENABLE_PROGRESS_THREADS,
+                                           OMPI_ENABLE_MPI_THREADS))) {
+        error = "ompi_op_base_find_available() failed";
+        goto error;
+    }
    if (OMPI_SUCCESS != (ret = ompi_op_init())) {
        error = "ompi_op_init() failed";
        goto error;
    }

-
    /* Open up MPI-related MCA components */

    if (OMPI_SUCCESS != (ret = mca_allocator_base_open())) {
--- a/ompi/tools/ompi_info/components.cc
+++ b/ompi/tools/ompi_info/components.cc
@ -78,6 +78,7 @@
 #include "ompi/mca/osc/base/base.h"
 #include "ompi/mca/pubsub/base/base.h"
 #include "ompi/mca/dpm/base/base.h"
+#include "ompi/mca/op/base/base.h"

 #if OPAL_ENABLE_FT == 1
 #include "ompi/mca/crcp/crcp.h"
@ -410,6 +411,9 @@ void ompi_info::open_components()
  }
  component_map["dpm"] = &ompi_dpm_base_components_available;

+  ompi_op_base_open();
+  component_map["op"] = &ompi_op_base_components_opened;
+
 #if OPAL_ENABLE_FT == 1
  if (OMPI_SUCCESS != ompi_crcp_base_open()) {
      goto error;
@ -460,6 +464,7 @@ void ompi_info::close_components()
 #if OPAL_ENABLE_FT == 1
        (void) ompi_crcp_base_close();
 #endif
+        (void) ompi_op_base_close();
        (void) ompi_dpm_base_close();
        (void) ompi_pubsub_base_close();
        (void) mca_topo_base_close();
@ -472,7 +477,6 @@ void ompi_info::close_components()
        (void) mca_coll_base_close();
        (void) mca_allocator_base_close();
        (void) ompi_osc_base_close();
-
        (void) orte_grpcomm_base_close();
        (void) orte_ess_base_close();
        (void) orte_show_help_finalize();
--- a/ompi/tools/ompi_info/ompi_info.cc
+++ b/ompi/tools/ompi_info/ompi_info.cc
@ -210,6 +210,7 @@ int main(int argc, char *argv[])
  ompi_info::mca_types.push_back("mtl");
  ompi_info::mca_types.push_back("topo");
  ompi_info::mca_types.push_back("osc");
+  ompi_info::mca_types.push_back("op");
  ompi_info::mca_types.push_back("common");
 #if OPAL_ENABLE_FT == 1
  ompi_info::mca_types.push_back("crcp");