diff --git a/acinclude.m4 b/acinclude.m4 index 05af1b2f35..3dbf118cec 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -66,6 +66,7 @@ m4_include(config/ompi_check_bproc.m4) m4_include(config/ompi_check_xcpu.m4) m4_include(config/ompi_check_mvapi.m4) m4_include(config/ompi_check_openib.m4) +m4_include(config/ompi_check_portals.m4) m4_include(config/ompi_check_udapl.m4) m4_include(config/ompi_check_package.m4) m4_include(config/ompi_check_slurm.m4) diff --git a/config/ompi_check_portals.m4 b/config/ompi_check_portals.m4 new file mode 100644 index 0000000000..0f21c794c6 --- /dev/null +++ b/config/ompi_check_portals.m4 @@ -0,0 +1,136 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2006 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# OMPI_CHECK_PORTALS(prefix, [action-if-found], [action-if-not-found]) +# -------------------------------------------------------- +# check if PORTALS support can be found. sets prefix_{CPPFLAGS, +# LDFLAGS, LIBS} as needed and runs action-if-found if there is +# support, otherwise executes action-if-not-found +AC_DEFUN([OMPI_CHECK_PORTALS],[ + # save compiler flags so that we don't alter them for later + # components. + check_portals_save_CPPFLAGS="$CPPFLAGS" + check_portals_save_LDFLAGS="$LDFLAGS" + check_portals_save_LIBS="$LIBS" + + check_portals_CPPFLAGS= + check_portals_LDFLAGS= + check_portals_LIBS= + + check_portals_configuration="none" + ompi_check_portals_happy="yes" + + # Get some configuration information + AC_ARG_WITH([portals], + [AC_HELP_STRING([--with-portals(=DIR)], + [Build Portals support, searching for installation in DIR])]) + AC_ARG_WITH([portals-config], + AC_HELP_STRING([--with-portals-config], + [configuration to use for Portals support. + One of "utcp", "xt3". (default: utcp)])) + AC_ARG_WITH([portals-libs], + [AC_HELP_STRING([--with-portals-libs=LIBS], + [Libraries to link with for portals])]) + + AC_MSG_CHECKING([for Portals configuration]) + if test "$with_portals_config" = "" ; then + with_portals_config="utcp" + elif test "$with_portals_config" = "redstorm" ; then + with_portals_config="xt3" + fi + OMPI_PORTALS_UTCP=0 + OMPI_PORTALS_CRAYXT3=0 + case "$with_portals_config" in + "utcp") + OMPI_PORTALS_UTCP=1 + OMPI_PORTALS_HAVE_EVENT_UNLINK=1 + check_portals_LIBS="-lp3utcp -lp3api -lp3lib -lp3rt -lp3utcp" + check_portals_header_prefix= + AC_MSG_RESULT([utcp]) + ;; + "xt3") + OMPI_PORTALS_CRAYXT3=1 + OMPI_PORTALS_HAVE_EVENT_UNLINK=0 + check_portals_LIBS= + check_portals_header_prefix="portals/" + AC_MSG_RESULT([Cray XT3]) + ;; + *) + # ok to call ERROR here - the user specified something invalid. + # that should be brought to his attention + AC_MSG_ERROR([unknown Portals configuration. Can not continue]) + ;; + esac + + AC_DEFINE_UNQUOTED([OMPI_PORTALS_HAVE_EVENT_UNLINK], + [$OMPI_PORTALS_HAVE_EVENT_UNLINK], + [Does Portals send a PTL_EVENT_UNLINK event]) + + AC_DEFINE_UNQUOTED([OMPI_PORTALS_UTCP], [$OMPI_PORTALS_UTCP], + [Use the UTCP reference implementation of Portals]) + AC_DEFINE_UNQUOTED([OMPI_PORTALS_CRAYXT3], [$OMPI_PORTALS_CRAYXT3], + [Use the Cray XT-3 implementation of Portals]) + + # Add correct -I and -L flags + AS_IF([test -n "$with_portals"], + [AS_IF([test -d "$with_portals/include"], + [check_portals_CPPFLAGS="-I$with_portals/include" + CPPFLAGS="$CPPFLAGS $check_portals_CPPFLAGS"], []) + AS_IF([test -d "$with_portals/lib"], + [check_portals_LDFLAGS="-L$with_portals/lib" + LDFLAGS="$LDFLAGS $check_portals_LDFLAGS"], [])]) + + # Try to find all the portals libraries (this is not fun!) + if test -n "$with_portals_libs" ; then + check_portals_LIBS="" + for lib in $with_portals_libs ; do + check_portals_LIBS="$check_portals_LIBS -l$lib" + done + fi + + # check for portals + LIBS="$LIBS $btl_portals_LIBS" + AC_CHECK_HEADERS([${check_portals_header_prefix}portals3.h], + [AC_MSG_CHECKING([if possible to link Portals application]) + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <${check_portals_header_prefix}portals3.h>], + [int i; PtlInit(&i);])], + [AC_MSG_RESULT([yes]) + $1_WRAPPER_EXTRA_LDFLAGS="$btl_portals_LDFLAGS" + $1_WRAPPER_EXTRA_LIBS="$btl_portals_LIBS" + ompi_check_portals_happy="yes"], + [AC_MSG_RESULT([no]) + ompi_check_portals_happy="yes"])], + [ompi_check_portals_happy="no"]) + + # reset the flags for the next test + CPPFLAGS="$check_portals_save_CPPFLAGS" + LDFLAGS="$check_portals_save_LDFLAGS" + LIBS="$check_portals_save_LIBS" + + $1_CPPFLAGS="$check_portals_CPPFLAGS" + $1_LDFLAGS="$check_portals_LDFLAGS" + $1_LIBS="$check_portals_LIBS" + + AS_IF([test "$ompi_check_portals_happy" = "yes"], + [$2], + [AS_IF([test ! -z "$with_portals" -a "$with_portals" != "no"], + [AC_MSG_ERROR([Portals support requested but not found. Aborting])]) + $3]) +]) + diff --git a/config/ompi_mca.m4 b/config/ompi_mca.m4 index ff2caac8c0..663ba5ae03 100644 --- a/config/ompi_mca.m4 +++ b/config/ompi_mca.m4 @@ -214,6 +214,7 @@ AC_DEFUN([OMPI_MCA],[ # BWB - fix me... need to automate this somehow MCA_SETUP_DIRECT_CALL(pml, ompi) + MCA_SETUP_DIRECT_CALL(mtl, ompi) # make all the config output statements for the no configure # components diff --git a/ompi/mca/bml/base/base.h b/ompi/mca/bml/base/base.h index cc0c157ea1..847419815c 100644 --- a/ompi/mca/bml/base/base.h +++ b/ompi/mca/bml/base/base.h @@ -50,8 +50,7 @@ OBJ_CLASS_DECLARATION(mca_bml_base_selected_module_t); OMPI_DECLSPEC int mca_bml_base_open(void); OMPI_DECLSPEC int mca_bml_base_init(bool enable_progress_threads, - bool enable_mpi_threads, - opal_class_t* endpoint_class); + bool enable_mpi_threads); OMPI_DECLSPEC int mca_bml_base_close(void); diff --git a/ompi/mca/bml/base/bml_base_endpoint.c b/ompi/mca/bml/base/bml_base_endpoint.c index 52fb5deab2..7dbf456541 100644 --- a/ompi/mca/bml/base/bml_base_endpoint.c +++ b/ompi/mca/bml/base/bml_base_endpoint.c @@ -27,7 +27,6 @@ static void mca_bml_base_endpoint_construct(mca_bml_base_endpoint_t* ep) { - ep->copy = NULL; ep->btl_rdma_offset = 0; ep->btl_max_send_size = 0; ep->btl_rdma_size = 0; diff --git a/ompi/mca/bml/base/bml_base_init.c b/ompi/mca/bml/base/bml_base_init.c index ca01093bb8..d199dc2823 100644 --- a/ompi/mca/bml/base/bml_base_init.c +++ b/ompi/mca/bml/base/bml_base_init.c @@ -39,8 +39,7 @@ mca_bml_base_component_t mca_bml_component; int mca_bml_base_init( bool enable_progress_threads, - bool enable_mpi_threads, - opal_class_t* endpoint_class) { + bool enable_mpi_threads) { opal_list_item_t *item = NULL; mca_bml_base_component_t *component = NULL, *best_component = NULL; mca_bml_base_module_t *module = NULL, *best_module = NULL; @@ -60,8 +59,7 @@ int mca_bml_base_init( bool enable_progress_threads, } module = component->bml_init(&priority, enable_progress_threads, - enable_mpi_threads, - endpoint_class); + enable_mpi_threads); if(NULL == module) { continue; diff --git a/ompi/mca/bml/bml.h b/ompi/mca/bml/bml.h index 6fb5e647ec..d1330b0c6b 100644 --- a/ompi/mca/bml/bml.h +++ b/ompi/mca/bml/bml.h @@ -230,20 +230,13 @@ static inline mca_bml_base_btl_t* mca_bml_base_btl_array_find( return NULL; } -/** - * Hook to copy derived class info. - */ - -typedef void (*mca_bml_base_endpoint_copy_fn_t)( - struct mca_bml_base_endpoint_t* dst, - struct mca_bml_base_endpoint_t* src); - /** * Structure associated w/ ompi_proc_t that contains the set * of BTLs used to reach a destination */ struct mca_bml_base_endpoint_t { - mca_pml_proc_t super; + opal_list_item_t super; /**< base_endpoint is a list item */ + struct ompi_proc_t* btl_proc; /**< backpointer to target ompi_proc_t */ size_t btl_rdma_offset; /**< max of min rdma size for available rmda btls */ size_t btl_max_send_size; /**< min of max send size for available send btls */ size_t btl_rdma_size; /**< max of min rdma size for available rmda btls */ @@ -251,7 +244,6 @@ struct mca_bml_base_endpoint_t { mca_bml_base_btl_array_t btl_eager; /**< array of btls to use for first fragments */ mca_bml_base_btl_array_t btl_send; /**< array of btls to use for remaining fragments */ mca_bml_base_btl_array_t btl_rdma; /**< array of btls that support (prefer) rdma */ - mca_bml_base_endpoint_copy_fn_t copy; uint32_t btl_flags_or; /**< the bitwise OR of the btl flags */ uint32_t btl_flags_and; /**< the bitwise AND of the btl flags */ }; @@ -434,8 +426,7 @@ static inline void mca_bml_base_prepare_dst(mca_bml_base_btl_t* bml_btl, typedef struct mca_bml_base_module_t* (*mca_bml_base_component_init_fn_t)( int* priority, bool enable_progress_threads, - bool enable_mpi_threads, - opal_class_t* endpoint_class + bool enable_mpi_threads ); /** diff --git a/ompi/mca/bml/r2/bml_r2.c b/ompi/mca/bml/r2/bml_r2.c index d08b0c7458..542ddf81d5 100644 --- a/ompi/mca/bml/r2/bml_r2.c +++ b/ompi/mca/bml/r2/bml_r2.c @@ -195,9 +195,9 @@ int mca_bml_r2_add_procs( proc = procs[p_index]; OBJ_RETAIN(proc); - if(NULL != proc->proc_pml) { + if(NULL != proc->proc_bml) { bml_endpoints[p_index] = - (mca_bml_base_endpoint_t*) proc->proc_pml; + (mca_bml_base_endpoint_t*) proc->proc_bml; } else { new_procs[n_new_procs++] = proc; } @@ -239,7 +239,7 @@ int mca_bml_r2_add_procs( for(p=0; pproc_pml; + mca_bml_base_endpoint_t * bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_bml; mca_bml_base_btl_t* bml_btl; size_t size; @@ -249,8 +249,7 @@ int mca_bml_r2_add_procs( /* allocate bml specific proc data */ - bml_endpoint = (mca_bml_base_endpoint_t*) - opal_obj_new(mca_bml_r2.endpoint_class); + bml_endpoint = OBJ_NEW(mca_bml_base_endpoint_t); if (NULL == bml_endpoint) { opal_output(0, "mca_bml_r2_add_procs: unable to allocate resources"); free(btl_endpoints); @@ -263,14 +262,14 @@ int mca_bml_r2_add_procs( mca_bml_base_btl_array_reserve(&bml_endpoint->btl_rdma, mca_bml_r2.num_btl_modules); bml_endpoint->btl_max_send_size = -1; bml_endpoint->btl_rdma_size = -1; - bml_endpoint->super.proc_ompi = proc; - proc->proc_pml = (struct mca_pml_proc_t*) bml_endpoint; + bml_endpoint->btl_proc = proc; + proc->proc_bml = bml_endpoint; bml_endpoint->btl_flags_and = 0; bml_endpoint->btl_flags_or = 0; } - bml_endpoints[p] =(mca_bml_base_endpoint_t*) proc->proc_pml; + bml_endpoints[p] =(mca_bml_base_endpoint_t*) proc->proc_bml; /* dont allow an additional BTL with a lower exclusivity ranking */ @@ -354,7 +353,7 @@ int mca_bml_r2_add_procs( /* iterate back through procs and compute metrics for registered r2s */ for(p=0; pproc_pml; + mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_bml; double total_bandwidth = 0; uint32_t latency = 0xffffffff; size_t n_index; @@ -457,7 +456,7 @@ int mca_bml_r2_del_procs(size_t nprocs, for(p = 0; p < n_del_procs; p++) { ompi_proc_t *proc = del_procs[p]; - mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml; + mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_bml; size_t f_index, f_size; size_t n_index, n_size; @@ -590,7 +589,7 @@ int mca_bml_r2_del_btl(mca_btl_base_module_t* btl) int mca_bml_r2_del_proc_btl(ompi_proc_t* proc, mca_btl_base_module_t* btl) { - mca_bml_base_endpoint_t* ep = (mca_bml_base_endpoint_t*)proc->proc_pml; + mca_bml_base_endpoint_t* ep = (mca_bml_base_endpoint_t*)proc->proc_bml; double total_bandwidth = 0; size_t b; diff --git a/ompi/mca/bml/r2/bml_r2.h b/ompi/mca/bml/r2/bml_r2.h index 2155472789..55e4f18b90 100644 --- a/ompi/mca/bml/r2/bml_r2.h +++ b/ompi/mca/bml/r2/bml_r2.h @@ -53,7 +53,6 @@ struct mca_bml_r2_module_t { mca_btl_base_component_progress_fn_t * btl_progress; mca_bml_r2_recv_reg_t r2_reg[256]; bool btls_added; - opal_class_t * endpoint_class; }; typedef struct mca_bml_r2_module_t mca_bml_r2_module_t; @@ -66,8 +65,7 @@ extern int mca_bml_r2_component_close(void); extern mca_bml_base_module_t* mca_bml_r2_component_init( int* priority, bool enable_progress_threads, - bool enable_mpi_threads, - opal_class_t* endpoint_class + bool enable_mpi_threads ); extern int mca_bml_r2_progress(void); diff --git a/ompi/mca/bml/r2/bml_r2_component.c b/ompi/mca/bml/r2/bml_r2_component.c index 3a6c2f84d4..db7ec420b9 100644 --- a/ompi/mca/bml/r2/bml_r2_component.c +++ b/ompi/mca/bml/r2/bml_r2_component.c @@ -83,8 +83,7 @@ int mca_bml_r2_component_close(void) mca_bml_base_module_t* mca_bml_r2_component_init( int* priority, bool enable_progress_threads, - bool enable_mpi_threads, - opal_class_t* endpoint_class + bool enable_mpi_threads ) { /* initialize BTLs */ @@ -94,6 +93,5 @@ mca_bml_base_module_t* mca_bml_r2_component_init( *priority = 100; mca_bml_r2.btls_added = false; - mca_bml_r2.endpoint_class = endpoint_class; return &mca_bml_r2.super; } diff --git a/ompi/mca/btl/portals/Makefile.am b/ompi/mca/btl/portals/Makefile.am index 57a38455d5..4412473575 100644 --- a/ompi/mca/btl/portals/Makefile.am +++ b/ompi/mca/btl/portals/Makefile.am @@ -32,13 +32,8 @@ component_noinst = libmca_btl_portals.la component_install = endif -EXTRA_DIST = \ - btl_portals_compat_utcp.c \ - btl_portals_compat_redstorm.c - portals_SOURCES = \ btl_portals.h \ - btl_portals_compat.h \ btl_portals_endpoint.h \ btl_portals_frag.h \ btl_portals_send.h \ @@ -50,9 +45,6 @@ portals_SOURCES = \ btl_portals_recv.c \ btl_portals_rdma.c -portals_nodist_SOURCES = \ - btl_portals_compat.c - AM_CPPFLAGS = $(btl_portals_CPPFLAGS) mcacomponentdir = $(libdir)/openmpi @@ -61,6 +53,7 @@ mca_btl_portals_la_SOURCES = $(portals_SOURCES) nodist_mca_btl_portals_la_SOURCES = $(portals_nodist_SOURCES) mca_btl_portals_la_LIBADD = \ $(btl_portals_LIBS) \ + $(top_ompi_builddir)/ompi/mca/common/portals/libmca_common_portals.la \ $(top_ompi_builddir)/ompi/libmpi.la \ $(top_ompi_builddir)/orte/liborte.la \ $(top_ompi_builddir)/opal/libopal.la @@ -71,6 +64,3 @@ libmca_btl_portals_la_SOURCES = $(portals_SOURCES) nodist_libmca_btl_portals_la_SOURCES = $(portals_nodist_SOURCES) libmca_btl_portals_la_LIBADD = $(btl_portals_LIBS) libmca_btl_portals_la_LDFLAGS = -module -avoid-version $(btl_portals_LDFLAGS) - -dist-clean-local: - rm -f btl_portals_compat.c diff --git a/ompi/mca/btl/portals/btl_portals.c b/ompi/mca/btl/portals/btl_portals.c index fd6f80c23c..3398d56ccc 100644 --- a/ompi/mca/btl/portals/btl_portals.c +++ b/ompi/mca/btl/portals/btl_portals.c @@ -31,7 +31,6 @@ #include "ompi/datatype/datatype.h" #include "btl_portals.h" -#include "btl_portals_compat.h" #include "btl_portals_endpoint.h" #include "btl_portals_recv.h" #include "btl_portals_frag.h" @@ -89,13 +88,14 @@ mca_btl_portals_add_procs(struct mca_btl_base_module_t* btl_base, "Adding %d procs (%d)", nprocs, mca_btl_portals_module.portals_num_procs); - /* make sure our environment is fully initialized. At end of this - call, we have a working network handle on our module and - portals_procs will have the portals process identifier for each - proc (ordered, in theory) */ - ret = mca_btl_portals_add_procs_compat(&mca_btl_portals_module, - nprocs, procs, - &portals_procs); + /* if we havne't already, get our network handle */ + if (mca_btl_portals_module.portals_ni_h == PTL_INVALID_HANDLE) { + ret = ompi_common_portals_ni_initialize(&mca_btl_portals_module.portals_ni_h); + if (OMPI_SUCCESS != ret) return ret; + } + + portals_procs = malloc(nprocs * sizeof(ptl_process_id_t)); + ret = ompi_common_portals_get_procs(nprocs, procs, portals_procs); if (OMPI_SUCCESS != ret) return ret; if (0 == mca_btl_portals_module.portals_num_procs) { @@ -538,14 +538,8 @@ mca_btl_portals_finalize(struct mca_btl_base_module_t *btl_base) OBJ_DESTRUCT(&mca_btl_portals_module.portals_frag_max); OBJ_DESTRUCT(&mca_btl_portals_module.portals_frag_user); - if (PTL_INVALID_HANDLE != mca_btl_portals_module.portals_ni_h) { - ret = PtlNIFini(mca_btl_portals_module.portals_ni_h); - if (PTL_OK != ret) { - opal_output_verbose(20, mca_btl_portals_component.portals_output, - "PtlNIFini returned %d", ret); - return OMPI_ERROR; - } - } + ompi_common_portals_ni_finalize(); + ompi_common_portals_finalize(); opal_output_verbose(20, mca_btl_portals_component.portals_output, "successfully finalized module"); diff --git a/ompi/mca/btl/portals/btl_portals.h b/ompi/mca/btl/portals/btl_portals.h index 90d7dbb4ea..c71e2d6f56 100644 --- a/ompi/mca/btl/portals/btl_portals.h +++ b/ompi/mca/btl/portals/btl_portals.h @@ -21,7 +21,7 @@ #ifndef OMPI_BTL_PORTALS_H #define OMPI_BTL_PORTALS_H -#include "btl_portals_compat.h" +#include "ompi/mca/common/portals/common_portals.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/btl/btl.h" @@ -33,9 +33,6 @@ #include "btl_portals_endpoint.h" #include "btl_portals_frag.h" -#define OMPI_BTL_PORTALS_SEND_TABLE_ID (OMPI_BTL_PORTALS_STARTING_TABLE_ID + 0) -#define OMPI_BTL_PORTALS_RDMA_TABLE_ID (OMPI_BTL_PORTALS_STARTING_TABLE_ID + 1) - /* * Portals BTL component. */ @@ -55,12 +52,6 @@ struct mca_btl_portals_component_t { */ int portals_output; -#if OMPI_BTL_PORTALS_UTCP - /* ethernet interface to use - only has meaning with utcp - reference */ - char *portals_ifname; -#endif - /* initial size of free lists */ int portals_free_list_init_num; /* max size of free lists */ diff --git a/ompi/mca/btl/portals/btl_portals_compat_redstorm.c b/ompi/mca/btl/portals/btl_portals_compat_redstorm.c deleted file mode 100644 index 26db74e45c..0000000000 --- a/ompi/mca/btl/portals/btl_portals_compat_redstorm.c +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/constants.h" -#include "opal/util/output.h" - -#include "btl_portals.h" -#include "btl_portals_compat.h" - -#include - -int -mca_btl_portals_init_compat(mca_btl_portals_component_t *comp) -{ - int ret, max_interfaces; - uint32_t i; - - /* - * Initialize Portals interface - */ - ret = PtlInit(&max_interfaces); - if (PTL_OK != ret) { - opal_output_verbose(10, mca_btl_portals_component.portals_output, - "PtlInit failed, returning %d\n", ret); - return OMPI_ERR_FATAL; - } - - /* - * Initialize a network device - */ - ret = PtlNIInit(PTL_IFACE_DEFAULT, /* interface to initialize */ - PTL_PID_ANY, /* let library assign our pid */ - NULL, /* no desired limits */ - NULL, /* actual limits */ - &(mca_btl_portals_module.portals_ni_h) /* our interface handle */ - ); - if (PTL_OK != ret && PTL_IFACE_DUP != ret) { - opal_output_verbose(10, mca_btl_portals_component.portals_output, - "PtlNIInit failed, returning %d\n", ret); - return OMPI_ERR_FATAL; - } - - return OMPI_SUCCESS; -} - - -int -mca_btl_portals_add_procs_compat(struct mca_btl_portals_module_t* btl, - size_t nprocs, struct ompi_proc_t **procs, - ptl_process_id_t **portals_procs) -{ - int nptl_procs = 0; - cnos_nidpid_map_t *map; - int i; - - /* - * FIXME - XXX - FIXME - * BWB - implicit assumption that cnos procs list will match our - * procs list. Don't know what to do about that... - */ - - nptl_procs = cnos_get_nidpid_map(&map); - if (nptl_procs <= 0) { - opal_output_verbose(10, mca_btl_portals_component.portals_output, - "cnos_get_nidpid_map() returned %d", nptl_procs); - return OMPI_ERR_FATAL; - } else if (nptl_procs != nprocs) { - opal_output_verbose(10, mca_btl_portals_component.portals_output, - "nptl_procs != nprocs (%d, %d)", nptl_procs, - nprocs); - return OMPI_ERR_FATAL; - } else { - opal_output_verbose(10, mca_btl_portals_component.portals_output, - "nptl_procs: %d", nptl_procs); - } - - /* get space for the portals procs list */ - *portals_procs = calloc(nprocs, sizeof(ptl_process_id_t)); - if (NULL == *portals_procs) { - opal_output_verbose(10, mca_btl_portals_component.portals_output, - "calloc(nprocs, sizeof(ptl_process_id_t)) failed"); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - for (i = 0 ; i < nprocs ; ++i) { - opal_output_verbose(120, mca_btl_portals_component.portals_output, - "rank %d: nid %ld, pid %ld", i, - map[i].nid, map[i].pid); - - /* update my local array of proc structs */ - (*portals_procs)[i].nid = map[i].nid; - (*portals_procs)[i].pid = map[i].pid; - } - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/btl/portals/btl_portals_compat_utcp.c b/ompi/mca/btl/portals/btl_portals_compat_utcp.c deleted file mode 100644 index ec3f6341b1..0000000000 --- a/ompi/mca/btl/portals/btl_portals_compat_utcp.c +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" - -#include -#include -#include -#include -#include - -#include "ompi/constants.h" -#include "opal/util/output.h" -#include "ompi/proc/proc.h" -#include "ompi/mca/pml/base/pml_base_module_exchange.h" - -#include "btl_portals.h" -#include "btl_portals_compat.h" - -#include - -/* how's this for source code diving? - find private method for - getting interface */ -extern int p3tcp_my_nid(const char *if_str, unsigned int *nid); - -static bool use_modex = true; - -int -mca_btl_portals_init_compat(mca_btl_portals_component_t *comp) -{ - ptl_process_id_t info; - int ret, max_interfaces; - - /* if the environment variables for the utcp implementation are - already set, assume the user is running without the full Open - RTE and is doing RTE testing for a more tightly-coupled - platform (like, say, Red Storm). Otherwise, be nice and use - the modex to setup everything for the user */ - if (NULL == getenv("PTL_MY_RID")) { - use_modex = true; - } else { - use_modex = false; - } - - if (use_modex) { - unsigned int nid; - - p3tcp_my_nid(mca_btl_portals_component.portals_ifname, &nid); - - /* post our contact info in the registry */ - info.nid = htonl(nid); - info.pid = htonl((ptl_pid_t) getpid()); - opal_output_verbose(100, mca_btl_portals_component.portals_output, - "contact info: %u, %u", ntohl(info.nid), - ntohl(info.pid)); - - ret = mca_pml_base_modex_send(&mca_btl_portals_component.super.btl_version, - &info, sizeof(ptl_process_id_t)); - if (OMPI_SUCCESS != ret) { - opal_output_verbose(10, mca_btl_portals_component.portals_output, - "mca_pml_base_modex_send failed: %d", ret); - return ret; - } - } else { - /* - * Initialize Portals interface - */ - ret = PtlInit(&max_interfaces); - if (PTL_OK != ret) { - opal_output_verbose(10, mca_btl_portals_component.portals_output, - "PtlInit failed, returning %d\n", ret); - return OMPI_ERR_FATAL; - } - - /* tell the UTCP runtime code to read the env variables */ - PtlSetRank(PTL_INVALID_HANDLE, -1, -1); - - /* - * Initialize a network device - */ - ret = PtlNIInit(PTL_IFACE_DEFAULT, /* interface to initialize */ - PTL_PID_ANY, /* let library assign our pid */ - NULL, /* no desired limits */ - NULL, /* no need to have limits around */ - &mca_btl_portals_module.portals_ni_h /* our interface handle */ - ); - if (PTL_OK != ret) { - opal_output_verbose(10, mca_btl_portals_component.portals_output, - "PtlNIInit failed, returning %d\n", ret); - return OMPI_ERR_FATAL; - } - } - - return OMPI_SUCCESS; -} - - -int -mca_btl_portals_add_procs_compat(struct mca_btl_portals_module_t* btl, - size_t nprocs, struct ompi_proc_t **procs, - ptl_process_id_t **portals_procs) -{ - int ret; - - if (use_modex) { - int my_rid = 0; - ptl_process_id_t *info; - char *nidmap = NULL; - char *pidmap = NULL; - char *nid_str; - char *pid_str; - const size_t map_size = nprocs * 12 + 1; /* 12 is max length of long in decimal */ - size_t size, i; - char *tmp; - ompi_proc_t* proc_self = ompi_proc_local(); - int max_interfaces; - - /* - * Do all the NID/PID map setup - */ - /* each nid is a int, so need 10 there, plus the : */ - nidmap = malloc(map_size); - pidmap = malloc(map_size); - nid_str = malloc(12 + 1); - pid_str = malloc(12 + 1); - if (NULL == nidmap || NULL == pidmap || - NULL == nid_str || NULL == pid_str) - return OMPI_ERROR; - - /* get space for the portals procs list */ - *portals_procs = calloc(nprocs, sizeof(ptl_process_id_t)); - if (NULL == *portals_procs) { - opal_output_verbose(10, mca_btl_portals_component.portals_output, - "calloc(nprocs, sizeof(ptl_process_id_t)) failed"); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - for (i = 0 ; i < nprocs ; ++i) { - if (proc_self == procs[i]) my_rid = i; - - ret = mca_pml_base_modex_recv(&mca_btl_portals_component.super.btl_version, - procs[i], (void**) &info, &size); - if (OMPI_SUCCESS != ret) { - opal_output_verbose(10, mca_btl_portals_component.portals_output, - "mca_pml_base_modex_recv failed: %d", ret); - return ret; - } else if (sizeof(ptl_process_id_t) != size) { - opal_output_verbose(10, mca_btl_portals_component.portals_output, - "mca_pml_base_modex_recv returned size %d, expected %d", - size, sizeof(ptl_process_id_t)); - return OMPI_ERROR; - } - - if (i == 0) { - snprintf(nidmap, map_size, "%u", ntohl(info->nid)); - snprintf(pidmap, map_size, "%u", ntohl(info->pid)); - } else { - snprintf(nid_str, 12 + 1, ":%u", ntohl(info->nid)); - snprintf(pid_str, 12 + 1, ":%u", ntohl(info->pid)); - strncat(nidmap, nid_str, 12); - strncat(pidmap, pid_str, 12); - } - - /* update my local array of proc structs */ - (*portals_procs)[i].nid = ntohl(info->nid); - (*portals_procs)[i].pid = ntohl(info->pid); - - free(info); - } - - opal_output_verbose(100, mca_btl_portals_component.portals_output, - "my rid: %u", my_rid); - opal_output_verbose(100, mca_btl_portals_component.portals_output, - "nid map: %s", nidmap); - opal_output_verbose(100, mca_btl_portals_component.portals_output, - "pid map: %s", pidmap); - opal_output_verbose(100, mca_btl_portals_component.portals_output, - "iface: %s", - mca_btl_portals_component.portals_ifname); - - asprintf(&tmp, "PTL_MY_RID=%u", my_rid); - putenv(tmp); - asprintf(&tmp, "PTL_NIDMAP=%s", nidmap); - putenv(tmp); - asprintf(&tmp, "PTL_PIDMAP=%s", pidmap); - putenv(tmp); - asprintf(&tmp, "PTL_IFACE=%s", mca_btl_portals_component.portals_ifname); - putenv(tmp); - - free(pidmap); - free(nidmap); - free(pid_str); - free(nid_str); - - /* - * Initialize Portals - */ - ret = PtlInit(&max_interfaces); - if (PTL_OK != ret) { - opal_output_verbose(10, mca_btl_portals_component.portals_output, - "PtlInit failed, returning %d\n", ret); - return OMPI_ERR_FATAL; - } - - /* tell the UTCP runtime code to read the env variables */ - PtlSetRank(PTL_INVALID_HANDLE, -1, -1); - - ret = PtlNIInit(PTL_IFACE_DEFAULT, /* interface to initialize */ - PTL_PID_ANY, /* let library assign our pid */ - NULL, /* no desired limits */ - NULL, /* save our limits somewhere */ - &(mca_btl_portals_module.portals_ni_h) /* our interface handle */ - ); - if (PTL_OK != ret) { - opal_output_verbose(10, mca_btl_portals_component.portals_output, - "PtlNIInit failed, returning %d\n", ret); - return OMPI_ERR_FATAL; - } - } else { /* use_modex */ - unsigned int nptl_procs, rank, i; - - /* - */ - ret = PtlGetRank(mca_btl_portals_module.portals_ni_h, &rank, &nptl_procs); - if (ret != PTL_OK) { - opal_output_verbose(10, mca_btl_portals_component.portals_output, - "PtlGetRank() returned %d", ret); - return OMPI_ERR_FATAL; - } else if (nptl_procs != nprocs) { - opal_output_verbose(10, mca_btl_portals_component.portals_output, - "nptl_procs != nprocs (%d, %d)", nptl_procs, - nprocs); - return OMPI_ERR_FATAL; - } - - /* create enough space for all the proc info structs */ - *portals_procs = calloc(nprocs, sizeof(ptl_process_id_t)); - if (NULL == *portals_procs) { - opal_output_verbose(10, mca_btl_portals_component.portals_output, - "calloc(nprocs, sizeof(ptl_process_id_t)) failed"); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - /* fill in all the proc info structs */ - for (i = 0 ; i < nprocs ; ++i) { - ret = PtlGetRankId(mca_btl_portals_module.portals_ni_h, - i, &((*portals_procs)[i])); - if (PTL_OK != ret) { - opal_output_verbose(10, - mca_btl_portals_component.portals_output, - "PtlGetRankId(%d) failed: %d\n", i, ret); - return OMPI_ERR_FATAL; - } - } - } - -#if 0 - PtlNIDebug(mca_btl_portals_module.portals_ni_h, PTL_DBG_ALL); -#endif - - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/btl/portals/btl_portals_component.c b/ompi/mca/btl/portals/btl_portals_component.c index acf4b29a66..a473a80f10 100644 --- a/ompi/mca/btl/portals/btl_portals_component.c +++ b/ompi/mca/btl/portals/btl_portals_component.c @@ -21,18 +21,15 @@ #include #include #include -#if OMPI_BTL_PORTALS_REDSTORM -#include -#endif #include "ompi/constants.h" #include "opal/util/output.h" #include "opal/threads/threads.h" #include "opal/mca/base/mca_base_param.h" +#include "ompi/mca/common/portals/common_portals.h" #include "btl_portals.h" -#include "btl_portals_compat.h" #include "btl_portals_frag.h" #include "btl_portals_send.h" #include "btl_portals_recv.h" @@ -78,6 +75,8 @@ mca_btl_portals_component_open(void) int i; int dummy; + ompi_common_portals_register_mca(); + /* * get configured state for component */ @@ -94,26 +93,11 @@ mca_btl_portals_component_open(void) false, 0, &(portals_output_stream.lds_verbose_level)); -#if OMPI_BTL_PORTALS_REDSTORM asprintf(&(portals_output_stream.lds_prefix), - "btl: portals (%5d): ", cnos_get_rank()); -#else - asprintf(&(portals_output_stream.lds_prefix), - "btl: portals (%5d): ", getpid()); -#endif + "btl: portals (%s): ", ompi_common_portals_nodeid()); mca_btl_portals_component.portals_output = opal_output_open(&portals_output_stream); -#if OMPI_BTL_PORTALS_UTCP - mca_base_param_reg_string(&mca_btl_portals_component.super.btl_version, - "ifname", - "Interface name to use for communication", - false, - false, - "eth0", - &(mca_btl_portals_component.portals_ifname)); -#endif - mca_base_param_reg_int(&mca_btl_portals_component.super.btl_version, "free_list_init_num", "Initial number of elements to initialize in free lists", @@ -277,12 +261,6 @@ int mca_btl_portals_component_close(void) { /* release resources */ -#if OMPI_BTL_PORTALS_UTCP - if (NULL != mca_btl_portals_component.portals_ifname) { - free(mca_btl_portals_component.portals_ifname); - } -#endif - if (NULL != portals_output_stream.lds_prefix) { free(portals_output_stream.lds_prefix); } @@ -311,14 +289,12 @@ mca_btl_portals_component_init(int *num_btls, /* initialize portals btl. note that this is in the compat code because it's fairly non-portable between implementations */ - if (OMPI_SUCCESS != mca_btl_portals_init_compat(&mca_btl_portals_component)) { + if (OMPI_SUCCESS != ompi_common_portals_initialize()) { opal_output_verbose(20, mca_btl_portals_component.portals_output, "disabled because compatibility init failed"); return NULL; } - /* fill in all the portable parts of the module structs - the - compat code filled in the other bits already */ OBJ_CONSTRUCT(&(mca_btl_portals_module.portals_frag_eager), ompi_free_list_t); OBJ_CONSTRUCT(&(mca_btl_portals_module.portals_frag_max), ompi_free_list_t); OBJ_CONSTRUCT(&(mca_btl_portals_module.portals_frag_user), ompi_free_list_t); diff --git a/ompi/mca/btl/portals/configure.m4 b/ompi/mca/btl/portals/configure.m4 index 6b4c6b40f2..c3176e7051 100644 --- a/ompi/mca/btl/portals/configure.m4 +++ b/ompi/mca/btl/portals/configure.m4 @@ -17,126 +17,23 @@ # $HEADER$ # -# _MCA_btl_portals_CONFIG_PLATFORM() -# ---------------------------------- -AC_DEFUN([MCA_btl_portals_CONFIG_PLATFORM], [ - # Configure Portals for our local environment - BTL_PORTALS_UTCP=0 - BTL_PORTALS_REDSTORM=0 - BTL_PORTALS_COMPAT="" - BTL_PORTALS_HAVE_EVENT_UNLINK=0 - btl_portals_compat="none" - btl_portals_header_prefix= - btl_portals_starting_table_id=0 - AC_ARG_WITH([portals-config], - AC_HELP_STRING([--with-portals-config], - [configuration to use for Portals support. - One of "utcp", "redstorm". (default: utcp)])) - AC_MSG_CHECKING([for Portals configuration]) - if test "$with_portals_config" = "" ; then - with_portals_config="utcp" - fi - case "$with_portals_config" in - "utcp") - BTL_PORTALS_UTCP=1 - BTL_PORTALS_HAVE_EVENT_UNLINK=1 - btl_portals_LIBS="-lp3utcp -lp3api -lp3lib -lp3rt -lp3utcp" - btl_portals_compat="utcp" - btl_portals_header_prefix= - btl_portals_starting_table_id=0 - AC_MSG_RESULT([utcp]) - ;; - "redstorm") - BTL_PORTALS_REDSTORM=1 - BTL_PORTALS_HAVE_EVENT_UNLINK=0 - btl_portals_LIBS= - btl_portals_compat="redstorm" - btl_portals_header_prefix="portals/" - btl_portals_starting_table_id=30 - AC_MSG_RESULT([red storm]) - ;; - *) - # ok to call ERROR here - the user specified something invalid. - # that should be brought to his attention - AC_MSG_ERROR([unknown Portals configuration. Can not continue]) - ;; - esac - - # Try to find all the portals libraries (this is not fun!) - AC_ARG_WITH([portals-libs], - [AC_HELP_STRING([--with-portals-libs=LIBS], - [Libraries to link with for portals])]) - if test -n "$with_portals_libs" ; then - btl_portals_LIBS="" - for lib in $with_portals_libs ; do - btl_portals_LIBS="$btl_portals_LIBS -l$lib" - done - fi - - AC_DEFINE_UNQUOTED([OMPI_BTL_PORTALS_HAVE_EVENT_UNLINK], - [$BTL_PORTALS_HAVE_EVENT_UNLINK], - [Does Portals send a BTL_EVENT_UNLINK event]) - - AC_DEFINE_UNQUOTED([OMPI_BTL_PORTALS_UTCP], [$BTL_PORTALS_UTCP], - [Use the UTCP reference implementation or Portals]) - AC_DEFINE_UNQUOTED([OMPI_BTL_PORTALS_REDSTORM], [$BTL_PORTALS_REDSTORM], - [Use the Red Storm implementation or Portals]) - - AC_DEFINE_UNQUOTED([OMPI_BTL_PORTALS_STARTING_TABLE_ID], - [$btl_portals_starting_table_id], - [first table id to use for portals btl]) - - AC_CONFIG_LINKS([ompi/mca/btl/portals/btl_portals_compat.c:ompi/mca/btl/portals/btl_portals_compat_${btl_portals_compat}.c]) -]) - # MCA_btl_portals_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_btl_portals_CONFIG],[ - # save compiler flags so that we don't alter them for later - # components. - btl_portals_save_CPPFLAGS="$CPPFLAGS" - btl_portals_save_LDFLAGS="$LDFLAGS" - btl_portals_save_LIBS="$LIBS" + OMPI_CHECK_PORTALS([btl_portals], + [btl_portals_happy="yes"], + [btl_portals_happy="no"]) - # allow user a way to say where the Portals installation is - AC_ARG_WITH(portals, - AC_HELP_STRING([--with-portals=DIR], - [Specify the installation directory of PORTALS])) + AS_IF([test "$btl_portals_happy" = "yes"], + [btl_portals_WRAPPER_EXTRA_LDFLAGS="$btl_portals_LDFLAGS" + btl_portals_WRAPPER_EXTRA_LIBS="$btl_portals_LIBS" + $1], + [$2]) - AS_IF([test -n "$with_portals"], - [AS_IF([test -d "$with_portals/include"], - [btl_portals_CPPFLAGS="-I$with_portals/include" - CPPFLAGS="$CPPFLAGS $btl_portals_CPPFLAGS"], []) - AS_IF([test -d "$with_portals/lib"], - [btl_portals_LDFLAGS="-L$with_portals/lib" - LDFLAGS="$LDFLAGS $btl_portals_LDFLAGS"], [])]) - - # try to get our platform configuration - MCA_btl_portals_CONFIG_PLATFORM() - - # check for portals - LIBS="$LIBS $btl_portals_LIBS" - AC_CHECK_HEADERS([${btl_portals_header_prefix}portals3.h], - [AC_MSG_CHECKING([if possible to link Portals application]) - AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <${btl_portals_header_prefix}portals3.h>], - [int i; PtlInit(&i);])], - [AC_MSG_RESULT([yes]) - btl_portals_WRAPPER_EXTRA_LDFLAGS="$btl_portals_LDFLAGS" - btl_portals_WRAPPER_EXTRA_LIBS="$btl_portals_LIBS" - $1], - [AC_MSG_RESULT([no]) - $2])], - [$2]) - - # substitute in the things needed to build Portals + # substitute in the things needed to build portals AC_SUBST([btl_portals_CPPFLAGS]) AC_SUBST([btl_portals_LDFLAGS]) AC_SUBST([btl_portals_LIBS]) - - # reset the flags for the next test - CPPFLAGS="$btl_portals_save_CPPFLAGS" - LDFLAGS="$btl_portals_save_LDFLAGS" - LIBS="$btl_portals_save_LIBS" ])dnl diff --git a/ompi/mca/common/portals/Makefile.am b/ompi/mca/common/portals/Makefile.am new file mode 100644 index 0000000000..832ee6f24e --- /dev/null +++ b/ompi/mca/common/portals/Makefile.am @@ -0,0 +1,111 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# A word of explanation... +# +# This library is linked against various MCA components because all +# shared-memory based components (e.g., mpool, ptl, etc.) need to +# share some common code and data. There's two cases: +# +# 1. libmca_common_portals.la is a shared library. By linking that shared +# library to all components that need it, the OS linker will +# automatically load it into the process as necessary, and there will +# only be one copy (i.e., all the components will share *one* copy of +# the code and data). +# 2. libmca_common_portals.la is a static library. In this case, it +# will be rolled up into the top-level libmpi.la. It will also be +# rolled into each component, but then the component will also be +# rolled up into the upper-level libmpi.la. Libtool sorts this all +# out and it all works out in the end. +# +# Note that building this common component statically and linking +# against other dynamic components is *not* supported! + + +AM_CPPFLAGS = $(common_portals_CPPFLAGS) + +# Header files + +headers = \ + common_portals.h + +# Source files + +sources = \ + common_portals.c + +EXTRA_DIST = common_portals_crayxt3.c common_portals_utcp.c + +# As per above, we'll either have an installable or noinst result. +# The installable one should follow the same MCA prefix naming rules +# (i.e., libmca__.la). The noinst one can be named +# whatever it wants, although libmca___noinst.la is +# recommended. + +# To simplify components that link to this library, we will *always* +# have an output libtool library named libmca__.la -- even +# for case 2) described above (i.e., so there's no conditional logic +# necessary in component Makefile.am's that link to this library). +# Hence, if we're creating a noinst version of this library (i.e., +# case 2), we sym link it to the libmca__.la name +# (libtool will do the Right Things under the covers). See the +# all-local and clean-local rules, below, for how this is effected. + +lib_LTLIBRARIES = +noinst_LTLIBRARIES = +comp_inst = libmca_common_portals.la +comp_noinst = libmca_common_portals_noinst.la + +if OMPI_BUILD_common_portals_DSO +lib_LTLIBRARIES += $(comp_inst) +else +noinst_LTLIBRARIES += $(comp_noinst) +endif + +libmca_common_portals_la_SOURCES = $(headers) $(sources) +libmca_common_portals_la_LDFLAGS = $(common_portals_LDFLAGS) +libmca_common_portals_la_LIBADD = $(common_portals_LIBS) + +libmca_common_portals_noinst_la_SOURCES = $(libmca_common_portals_la_SOURCES) +libmca_common_portals_noinst_la_LDFLAGS = $(common_portals_LDFLAGS) +libmca_common_portals_noinst_la_LIBADD = $(common_portals_LIBS) + + +# Conditionally install the header files + +if WANT_INSTALL_HEADERS +ompidir = $(includedir)/openmpi/ompi/mca/common/portals +ompi_HEADERS = $(headers) +else +ompidir = $(includedir) +endif + +# These two rules will sym link the "noinst" libtool library filename +# to the installable libtool library filename in the case where we are +# compiling this component statically (case 2), described above). + +all-local: + if test -z "$(lib_LTLIBRARIES)"; then \ + rm -f "$(comp_inst)"; \ + $(LN_S) "$(comp_noinst)" "$(comp_inst)"; \ + fi + +clean-local: + if test -z "$(lib_LTLIBRARIES)"; then \ + rm -f "$(comp_inst)"; \ + fi diff --git a/ompi/mca/common/portals/common_portals.c b/ompi/mca/common/portals/common_portals.c new file mode 100644 index 0000000000..a27759bc53 --- /dev/null +++ b/ompi/mca/common/portals/common_portals.c @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "common_portals.h" + + +#if OMPI_PORTALS_UTCP + +#include "common_portals_utcp.c" + +#elif OMPI_PORTALS_CRAYXT3 + +#include "common_portals_crayxt3.c" + +#else + +#error "Unknown Portals library configuration" + +#endif + +int +ompi_common_portals_error_ptl_to_ompi(int ptl_error) +{ + int ret; + + switch (ptl_error) { + case PTL_OK: + ret = OMPI_SUCCESS; + break; + case PTL_AC_INDEX_INVALID: + ret = OMPI_ERR_BAD_PARAM; + break; + case PTL_EQ_DROPPED: + ret = OMPI_ERR_OUT_OF_RESOURCE; + break; + case PTL_EQ_INVALID: + ret = OMPI_ERR_BAD_PARAM; + break; + case PTL_FAIL: + ret = OMPI_ERROR; + break; + case PTL_HANDLE_INVALID: + ret = OMPI_ERR_BAD_PARAM; + break; + case PTL_IFACE_INVALID: + ret = OMPI_ERR_BAD_PARAM; + break; + case PTL_MD_ILLEGAL: + ret = OMPI_ERR_BAD_PARAM; + break; + case PTL_MD_INVALID: + ret = OMPI_ERR_BAD_PARAM; + break; + case PTL_MD_IN_USE: + ret = OMPI_ERR_RESOURCE_BUSY; + break; + case PTL_ME_INVALID: + ret = OMPI_ERR_BAD_PARAM; + break; + case PTL_ME_IN_USE: + ret = OMPI_ERR_RESOURCE_BUSY; + break; + case PTL_ME_LIST_TOO_LONG: + ret = OMPI_ERR_OUT_OF_RESOURCE; + break; + case PTL_NI_INVALID: + ret = OMPI_ERR_BAD_PARAM; + break; + case PTL_NO_INIT: + ret = OMPI_ERR_BAD_PARAM; + break; + case PTL_NO_SPACE: + ret = OMPI_ERR_OUT_OF_RESOURCE; + break; + case PTL_PID_INVALID: + ret = OMPI_ERR_BAD_PARAM; + break; + case PTL_PROCESS_INVALID: + ret = OMPI_ERR_BAD_PARAM; + break; + case PTL_PT_FULL: + ret = OMPI_ERR_OUT_OF_RESOURCE; + break; + case PTL_PT_INDEX_INVALID: + ret = OMPI_ERR_BAD_PARAM; + break; + case PTL_SEGV: + ret = OMPI_ERR_VALUE_OUT_OF_BOUNDS; + break; + case PTL_SR_INDEX_INVALID: + ret = OMPI_ERR_BAD_PARAM; + break; + case PTL_UNKNOWN_ERROR: + ret = OMPI_ERROR; + break; + default: + ret = OMPI_ERROR; + } + + return ret; +} diff --git a/ompi/mca/common/portals/common_portals.h b/ompi/mca/common/portals/common_portals.h new file mode 100644 index 0000000000..97d468ddbb --- /dev/null +++ b/ompi/mca/common/portals/common_portals.h @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_MCA_COMMON_PORTALS_H +#define OMPI_MCA_COMMON_PORTALS_H + +#if OMPI_PORTALS_UTCP + +#include +#include +#include +#include +#include + +#define OMPI_BTL_PORTALS_SEND_TABLE_ID 0 +#define OMPI_BTL_PORTALS_RDMA_TABLE_ID 1 + +#define OMPI_MTL_PORTALS_SEND_TABLE_ID 2 +#define OMPI_MTL_PORTALS_READ_TABLE_ID 3 + +#elif OMPI_PORTALS_CRAYXT3 + +#include +#define PTL_EQ_HANDLER_NONE NULL + +#define OMPI_BTL_PORTALS_SEND_TABLE_ID 30 +#define OMPI_BTL_PORTALS_RDMA_TABLE_ID 31 + +#define OMPI_MTL_PORTALS_SEND_TABLE_ID 32 +#define OMPI_MTL_PORTALS_READ_TABLE_ID 33 + +#else + +#error "Unknown Portals library configuration" + +#endif + +#include "ompi/proc/proc.h" + + +/** + * Simple identifier for identifying node/process + * + * Get a string representing a simple way to identify the node/rank of + * the current process. Currently returns the rank in the job on the + * XT-3 or the hostname/pid on the reference implementation. + * + * \note Caller is responsible for calling free() on the returned + * string. + */ +char* ompi_common_portals_nodeid(void); + + +/** + * Register MCA parameters for Portals code + * + * Register MCA parameters for Portals common code. This should be + * called during component open so that parameters are available to + * omp_info and the like. This call will not intiailize the Portals + * interface or cause any communication. + * + * @retval OMPI_SUCCESS + */ +int ompi_common_portals_register_mca(void); + + +/** + * Initialize compatability code + * + * Initialize Portals compatability code. A best effort is made to + * initialize Portals (with PtlInit() and PtlNIInit(), although this + * may not be possible if use of the modex is required to setup the + * network (as is the case with the utcp reference implementation). + * + * @retval OMPI_SUCCESS Portals successfully initialized + * @retval OMPI_ERR_NOT_AVAILABLE Portals could not be initialized + */ +int ompi_common_portals_initialize(void); + + +/** + * Initialize network interface + * + * Initialize the portals network interface. The initializization may + * actually have happened in ompi_common_portals_initialize(), but + * this will return the network interface handle. This function may + * require some information shared by the modex, so should only be + * called after the modex data is available. + * + * @param ni_handle (OUT) network interface handle + * + * @retval OMPI_SUCCESS Portals network interface successfully initialized + * @retval OMPI_ERROR Something bad happened + */ +int ompi_common_portals_ni_initialize(ptl_handle_ni_t *ni_handle); + + +/** + * Get process_id_t array for proc list + * + * Get ptl_process_id_t array for proc list + * + * @param nprocs (IN) Number of procs in proc list + * @param procs (IN) List of OMPI procs + * @param portals_procs (OUT) array of ptl_process_id_t + * structures associated with OMPI procs + * + * @retval OMPI_SUCCESS All went well + * @retval OMPI_ERROR All went poorly + */ +int ompi_common_portals_get_procs(size_t nprocs, + struct ompi_proc_t **procs, + ptl_process_id_t *portals_procs); + + +/** + * Shut down Portals network interface + * + * Shut down Portals network devince , including calling PtlNIFini() + * if appropriate. The common code will reference count so that it is + * safe for each component that calls + * ompi_component_portals_ni_initialize() to call + * ompi_common_portals_ni_finalize() + */ +int ompi_common_portals_ni_finalize(void); + + +/** + * Shut down Portals + * + * Shut down Portals, including calling PtlFini() if appropriate. The + * common code will reference count so that it is safe for each + * component that calls ompi_component_portals_initialize() to call + * ompi_common_portals_finalize() + */ +int ompi_common_portals_finalize(void); + + +int ompi_common_portals_error_ptl_to_ompi(int ptl_error); + + +#endif /* OMPI_MCA_COMMON_PORTALS_H */ diff --git a/ompi/mca/common/portals/common_portals_crayxt3.c b/ompi/mca/common/portals/common_portals_crayxt3.c new file mode 100644 index 0000000000..5a7811cf86 --- /dev/null +++ b/ompi/mca/common/portals/common_portals_crayxt3.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include + +#include "opal/util/output.h" +#include "ompi/constants.h" +#include "ompi/proc/proc.h" + + +char * +ompi_common_portals_nodeid(void) +{ + char *ret; + asprintf(&ret, "%5d", cnos_get_rank()); + return ret; +} + + +int +ompi_common_portals_register_mca(void) +{ + return OMPI_SUCCESS; +} + + +int +ompi_common_portals_initialize(void) +{ + int ret, max_interfaces; + + /* + * Initialize Portals interface + */ + ret = PtlInit(&max_interfaces); + if (PTL_OK != ret) { + opal_output(0, "%5d: PtlInit failed, returning %d\n", + cnos_get_rank(), ret); + return OMPI_ERR_NOT_AVAILABLE; + } + + return OMPI_SUCCESS; +} + + +int +ompi_common_portals_ni_initialize(ptl_handle_ni_t *ni_handle) +{ + int ret; + + /* + * Initialize a network device + */ + ret = PtlNIInit(PTL_IFACE_DEFAULT, /* interface to initialize */ + PTL_PID_ANY, /* let library assign our pid */ + NULL, /* no desired limits */ + NULL, /* actual limits */ + ni_handle /* our interface handle */ + ); + if (PTL_OK != ret && PTL_IFACE_DUP != ret) { + opal_output(0, "%5d: PtlNIInit failed, returning %d\n", + cnos_get_rank(), ret); + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} + + +int +ompi_common_portals_get_procs(size_t nprocs, + struct ompi_proc_t **procs, + ptl_process_id_t *portals_procs) +{ + int nptl_procs = 0; + cnos_nidpid_map_t *map; + int i; + + /* + * FIXME - XXX - FIXME + * BWB - implicit assumption that cnos procs list will match our + * procs list. Don't know what to do about that... + */ + nptl_procs = cnos_get_nidpid_map(&map); + if (nptl_procs <= 0) { + opal_output(0, "%5d: cnos_get_nidpid_map() returned %d", + cnos_get_rank(), nptl_procs); + return OMPI_ERR_FATAL; + } else if (nptl_procs != nprocs) { + opal_output(0, "%5d: nptl_procs != nprocs (%d, %d)", nptl_procs, + cnos_get_rank(), nprocs); + return OMPI_ERR_FATAL; + } + + for (i = 0 ; i < nprocs ; ++i) { + portals_procs[i].nid = map[i].nid; + portals_procs[i].pid = map[i].pid; + } + + return OMPI_SUCCESS; +} + + +int +ompi_common_portals_ni_finalize(void) +{ + return OMPI_SUCCESS; +} + + +int +ompi_common_portals_finalize(void) +{ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/common/portals/common_portals_utcp.c b/ompi/mca/common/portals/common_portals_utcp.c new file mode 100644 index 0000000000..80f20ddaf5 --- /dev/null +++ b/ompi/mca/common/portals/common_portals_utcp.c @@ -0,0 +1,338 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include +#include +#include + +#include "opal/mca/mca.h" +#include "opal/util/output.h" +#include "opal/mca/base/mca_base_param.h" +#include "ompi/proc/proc.h" +#include "ompi/constants.h" +#include "ompi/mca/pml/base/pml_base_module_exchange.h" + +#ifdef __APPLE__ +static char *ptl_ifname = "en0"; +FILE *p3_out = stderr; +#else +static char *ptl_ifname = "eth0"; +#endif + + +/* how's this for source code diving? - find private method for + getting interface */ +extern int p3tcp_my_nid(const char *if_str, unsigned int *nid); + +static volatile int32_t usage_count = 0; +static volatile int32_t ni_usage_count = 0; +static bool setup_utcp_params = true; +static bool init_called = false; +static ptl_handle_ni_t active_ni_h = PTL_INVALID_HANDLE; +static mca_base_component_t portals_component = { + MCA_BASE_VERSION_1_0_0, + "common", + MCA_BASE_VERSION_1_0_0, + "portals", + MCA_BASE_VERSION_1_0_0, + NULL, + NULL +}; + + +char * +ompi_common_portals_nodeid(void) +{ + char *ret; + asprintf(&ret, "%5d", getpid()); + return ret; +} + + +int +ompi_common_portals_register_mca(void) +{ + mca_base_param_reg_string(&portals_component, + "ifname", + "Interface name to use for communication", + false, + false, + ptl_ifname, + &ptl_ifname); + + return OMPI_SUCCESS; +} + + +int +ompi_common_portals_initialize(void) +{ + int ret; + ptl_process_id_t info; + + if (OPAL_THREAD_ADD32(&usage_count, 1) > 1) return OMPI_SUCCESS; + + /* if the environment variables for the utcp implementation are + already set, assume the user is running without the full Open + RTE and is doing RTE testing for a more tightly-coupled + platform (like, say, Red Storm). Otherwise, be nice and use + the modex to setup everything for the user */ + if (NULL == getenv("PTL_MY_RID")) { + setup_utcp_params = true; + } else { + setup_utcp_params = false; + } + + if (setup_utcp_params) { + /* Find our contact information and post to registry. Don't + initialize Portals until we have everyone's contact + information. */ + unsigned int nid; + + p3tcp_my_nid(ptl_ifname, &nid); + info.nid = htonl(nid); + info.pid = htonl((ptl_pid_t) getpid()); + + } else { + /* Initialize Portals and publish our assigned contact + information */ + int max_interfaces; + unsigned int nptl_procs, rank; + + ret = PtlInit(&max_interfaces); + if (PTL_OK != ret) { + opal_output(0, "%5d: PtlInit failed, returning %d\n", + getpid(), ret); + return OMPI_ERR_NOT_AVAILABLE; + } + init_called = true; + + /* tell the UTCP runtime code to read the env variables */ + PtlSetRank(PTL_INVALID_HANDLE, -1, -1); + + /* Initialize a network device */ + ret = PtlNIInit(PTL_IFACE_DEFAULT, /* interface to initialize */ + PTL_PID_ANY, /* let library assign our pid */ + NULL, /* no desired limits */ + NULL, /* no need to have limits around */ + &active_ni_h /* our interface handle */ + ); + if (PTL_OK != ret) { + opal_output(0, "%5d: PtlNIInit failed, returning %d\n", + getpid(), ret); + return OMPI_ERR_FATAL; + } + + ret = PtlGetRank(active_ni_h, &rank, &nptl_procs); + if (ret != PTL_OK) { + opal_output(0, "%5d, PtlGetRank() returned %d", + getpid(), ret); + return OMPI_ERR_FATAL; + } + + ret = PtlGetRankId(active_ni_h, rank, &info); + if (ret != PTL_OK) { + opal_output(0, "%5d, PtlGetRank(rank=%d) returned %d", + getpid(), rank, ret); + return OMPI_ERR_FATAL; + } + } + + ret = mca_pml_base_modex_send(&portals_component, + &info, sizeof(ptl_process_id_t)); + if (OMPI_SUCCESS != ret) { + return ret; + } + + return OMPI_SUCCESS; +} + + +int +ompi_common_portals_ni_initialize(ptl_handle_ni_t *ni_handle) +{ + int ret; + + OPAL_THREAD_ADD32(&usage_count, 1); + if (PTL_INVALID_HANDLE != active_ni_h) { + *ni_handle = active_ni_h; + return OMPI_SUCCESS; + } + + if (setup_utcp_params) { + ompi_proc_t **procs; + int my_rid = 0; + ptl_process_id_t *info; + char *nidmap = NULL, *pidmap = NULL; + char *nid_str, *pid_str; + size_t map_size = 0; + size_t nprocs, size, i; + char *tmp; + ompi_proc_t* proc_self = ompi_proc_local(); + int max_interfaces; + + /* get our world */ + procs = ompi_proc_world(&nprocs); + + map_size = nprocs * 12 + 1; /* 12 is max length of long in decimal */ + nidmap = malloc(map_size); + pidmap = malloc(map_size); + nid_str = malloc(12 + 1); + pid_str = malloc(12 + 1); + if (NULL == nidmap || NULL == pidmap || + NULL == nid_str || NULL == pid_str) + return OMPI_ERROR; + + for (i = 0 ; i < nprocs ; ++i) { + if (proc_self == procs[i]) my_rid = i; + + ret = mca_pml_base_modex_recv(&portals_component, + procs[i], (void**) &info, &size); + if (OMPI_SUCCESS != ret) { + opal_output(0, "%5d: mca_pml_base_modex_recv failed: %d", + getpid(), ret); + return ret; + } else if (sizeof(ptl_process_id_t) != size) { + opal_output(0, "%5d: mca_pml_base_modex_recv returned size %d, expected %d", + getpid(), size, sizeof(ptl_process_id_t)); + return OMPI_ERROR; + } + + if (i == 0) { + snprintf(nidmap, map_size, "%u", ntohl(info->nid)); + snprintf(pidmap, map_size, "%u", ntohl(info->pid)); + } else { + snprintf(nid_str, 12 + 1, ":%u", ntohl(info->nid)); + snprintf(pid_str, 12 + 1, ":%u", ntohl(info->pid)); + strncat(nidmap, nid_str, 12); + strncat(pidmap, pid_str, 12); + } + + free(info); + } + + asprintf(&tmp, "PTL_MY_RID=%u", my_rid); + putenv(tmp); + asprintf(&tmp, "PTL_NIDMAP=%s", nidmap); + putenv(tmp); + asprintf(&tmp, "PTL_PIDMAP=%s", pidmap); + putenv(tmp); + asprintf(&tmp, "PTL_IFACE=%s", ptl_ifname); + putenv(tmp); + + free(pidmap); + free(nidmap); + free(pid_str); + free(nid_str); + + /* + * Initialize Portals + */ + + ret = PtlInit(&max_interfaces); + if (PTL_OK != ret) { + opal_output(0, "%5d: PtlInit failed, returning %d\n", + getpid(), ret); + return OMPI_ERR_NOT_AVAILABLE; + } + init_called = true; + + /* tell the UTCP runtime code to read the env variables */ + PtlSetRank(PTL_INVALID_HANDLE, -1, -1); + + /* Initialize a network device */ + ret = PtlNIInit(PTL_IFACE_DEFAULT, /* interface to initialize */ + PTL_PID_ANY, /* let library assign our pid */ + NULL, /* no desired limits */ + NULL, /* no need to have limits around */ + &active_ni_h /* our interface handle */ + ); + if (PTL_OK != ret) { + opal_output(0, "%5d: PtlNIInit failed, returning %d\n", + getpid(), ret); + return OMPI_ERR_FATAL; + } + + *ni_handle = active_ni_h; + + return OMPI_SUCCESS; + } + + /* shouldn't ever be able to get here */ + return OMPI_ERROR; +} + + +int +ompi_common_portals_get_procs(size_t nprocs, + struct ompi_proc_t **procs, + ptl_process_id_t *portals_procs) +{ + size_t i, size; + int ret; + ptl_process_id_t *info; + + for (i = 0 ; i < nprocs ; ++i) { + ret = mca_pml_base_modex_recv(&portals_component, + procs[i], (void**) &info, &size); + if (OMPI_SUCCESS != ret) { + opal_output(0, "%5d: mca_pml_base_modex_recv failed: %d", + getpid(), ret); + return ret; + } else if (sizeof(ptl_process_id_t) != size) { + opal_output(0, "%5d: mca_pml_base_modex_recv returned size %d, expected %d", + getpid(), size, sizeof(ptl_process_id_t)); + return OMPI_ERROR; + } + + portals_procs[i].nid = ntohl(info->nid); + portals_procs[i].pid = ntohl(info->pid); + } + + return OMPI_SUCCESS; +} + + +int +ompi_common_portals_ni_finalize(void) +{ + if (OPAL_THREAD_ADD32(&ni_usage_count, -1) <= 0) { + if (PTL_INVALID_HANDLE != active_ni_h) { +#if 0 + if (PTL_OK != PtlNIFini(active_ni_h)) { + active_ni_h = PTL_INVALID_HANDLE; + return OMPI_ERROR; + } +#endif + active_ni_h = PTL_INVALID_HANDLE; + } + } + + return OMPI_SUCCESS; +} + + +int +ompi_common_portals_finalize(void) +{ + if (OPAL_THREAD_ADD32(&usage_count, -1) <= 0) { + if (init_called) PtlFini(); + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/common/portals/configure.m4 b/ompi/mca/common/portals/configure.m4 new file mode 100644 index 0000000000..e25a93d0ee --- /dev/null +++ b/ompi/mca/common/portals/configure.m4 @@ -0,0 +1,39 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + + +# MCA_common_portals_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_common_portals_CONFIG],[ + OMPI_CHECK_PORTALS([common_portals], + [common_portals_happy="yes"], + [common_portals_happy="no"]) + + AS_IF([test "$common_portals_happy" = "yes"], + [common_portals_WRAPPER_EXTRA_LDFLAGS="$common_portals_LDFLAGS" + common_portals_WRAPPER_EXTRA_LIBS="$common_portals_LIBS" + $1], + [$2]) + + # substitute in the things needed to build portals + AC_SUBST([common_portals_CPPFLAGS]) + AC_SUBST([common_portals_LDFLAGS]) + AC_SUBST([common_portals_LIBS]) +])dnl diff --git a/ompi/mca/common/portals/configure.params b/ompi/mca/common/portals/configure.params new file mode 100644 index 0000000000..39a4429a02 --- /dev/null +++ b/ompi/mca/common/portals/configure.params @@ -0,0 +1,23 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module + +PARAM_INIT_FILE=common_portals.c +PARAM_CONFIG_FILES="Makefile" diff --git a/ompi/mca/mtl/Makefile.am b/ompi/mca/mtl/Makefile.am new file mode 100644 index 0000000000..a5a80be3fd --- /dev/null +++ b/ompi/mca/mtl/Makefile.am @@ -0,0 +1,44 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2006 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# main library setup +noinst_LTLIBRARIES = libmca_mtl.la +libmca_mtl_la_SOURCES = + +# header setup +nobase_ompi_HEADERS = +nobase_nodist_ompi_HEADERS = + +# local files +headers = mtl.h +nodist_headers = +libmca_mtl_la_SOURCES += $(headers) $(nodist_headers) + +# Conditionally install the header files +if WANT_INSTALL_HEADERS +nobase_ompi_HEADERS += $(headers) +nobase_nodist_ompi_HEADERS += $(nodist_headers) +ompidir = $(includedir)/openmpi/ompi/mca/mtl +else +ompidir = $(includedir) +endif + +include base/Makefile.am + +distclean-local: + rm -f base/static-components.h mtl_direct_call.h diff --git a/ompi/mca/mtl/base/Makefile.am b/ompi/mca/mtl/base/Makefile.am new file mode 100644 index 0000000000..1b3b1096ff --- /dev/null +++ b/ompi/mca/mtl/base/Makefile.am @@ -0,0 +1,24 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2006 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +headers += \ + base/base.h + +libmca_mtl_la_SOURCES += \ + base/mtl_base_datatype.c \ + base/mtl_base_component.c diff --git a/ompi/mca/mtl/base/base.h b/ompi/mca/mtl/base/base.h new file mode 100644 index 0000000000..0929de4924 --- /dev/null +++ b/ompi/mca/mtl/base/base.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_MTL_BASE_H +#define MCA_MTL_BASE_H + +#include "ompi_config.h" + +#include "opal/mca/mca.h" +#include "ompi/mca/mtl/mtl.h" + + +/* + * Global functions for the MTL + */ + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +OMPI_DECLSPEC int ompi_mtl_base_open(void); +OMPI_DECLSPEC int ompi_mtl_base_select(bool enable_progress_threads, + bool enable_mpi_threads); +OMPI_DECLSPEC int ompi_mtl_base_close(void); + + +OMPI_DECLSPEC int ompi_mtl_datatype_pack(struct ompi_convertor_t *convertor, + void **buffer, + size_t *buffer_len, + bool *free_after_use); + +OMPI_DECLSPEC int ompi_mtl_datatype_recv_buf(struct ompi_convertor_t *convertor, + void ** buffer, + size_t *buffer_len, + bool *free_on_error); + +OMPI_DECLSPEC int ompi_mtl_datatype_unpack(struct ompi_convertor_t *convertor, + void *buffer, + size_t buffer_len); + +OMPI_DECLSPEC extern opal_list_t ompi_mtl_base_components_opened; + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif +#endif /* MCA_MTL_BASE_H */ diff --git a/ompi/mca/mtl/base/mtl_base_component.c b/ompi/mca/mtl/base/mtl_base_component.c new file mode 100644 index 0000000000..5018271281 --- /dev/null +++ b/ompi/mca/mtl/base/mtl_base_component.c @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "ompi/constants.h" +#include "ompi/mca/mtl/mtl.h" +#include "ompi/mca/mtl/base/base.h" + +/* + * The following file was created by configure. It contains extern + * statements and the definition of an array of pointers to each + * component's public mca_base_component_t struct. + */ + +#include "ompi/mca/mtl/base/static-components.h" + + +/* BWB - FIX ME - Properly initialize this */ +int ompi_mtl_base_output = 0; +opal_list_t ompi_mtl_base_components_opened; +mca_mtl_base_component_t *ompi_mtl_base_selected_component = NULL; +mca_mtl_base_module_t *ompi_mtl; + + +/* + * Function for finding and opening either all MCA components, or the one + * that was specifically requested via a MCA parameter. + */ +int +ompi_mtl_base_open(void) +{ + /* Open up all available components */ + + if (OMPI_SUCCESS != + mca_base_components_open("mtl", 0, mca_mtl_base_static_components, + &ompi_mtl_base_components_opened, + !MCA_mtl_DIRECT_CALL)) { + return OMPI_ERROR; + } + + /* Set a sentinel in case we don't select any components (e.g., + ompi_info) */ + ompi_mtl = NULL; + + return OMPI_SUCCESS; +} + + +/* + * Function for selecting one component from all those that are + * available. + * + * For now, we take the first component that says it can run. Might + * need to reexamine this at a later time. + */ +int +ompi_mtl_base_select(bool enable_progress_threads, + bool enable_mpi_threads) +{ + opal_list_item_t *item = NULL; + mca_base_component_list_item_t *cli = NULL; + mca_mtl_base_component_t *component = NULL; + mca_mtl_base_module_t *module = NULL; + + /* Traverse the list of available components; call their init + functions. */ + for (item = opal_list_get_first(&ompi_mtl_base_components_opened); + opal_list_get_end(&ompi_mtl_base_components_opened) != item; + item = opal_list_get_next(item) ) { + cli = (mca_base_component_list_item_t *) item; + component = (mca_mtl_base_component_t *) cli->cli_component; + + if (NULL == component->mtl_init) { + opal_output_verbose( 10, ompi_mtl_base_output, + "select: no init function; ignoring component %s", + component->mtl_version.mca_component_name ); + continue; + } + opal_output_verbose( 10, ompi_mtl_base_output, + "select: initializing %s component %s", + component->mtl_version.mca_type_name, + component->mtl_version.mca_component_name ); + module = component->mtl_init(enable_progress_threads, + enable_mpi_threads); + if (NULL == module) { + opal_output_verbose( 10, ompi_mtl_base_output, + "select: init returned failure for component %s", + component->mtl_version.mca_component_name ); + continue; + } + opal_output_verbose( 10, ompi_mtl_base_output, + "select: init returned success"); + + ompi_mtl_base_selected_component = component; + ompi_mtl = module; + } + + /* This base function closes, unloads, and removes from the + available list all unselected components. The available list will + contain only the selected component. */ + mca_base_components_close(ompi_mtl_base_output, + &ompi_mtl_base_components_opened, + (mca_base_component_t *) ompi_mtl_base_selected_component); + + opal_output_verbose( 10, ompi_mtl_base_output, + "select: component %s selected", + ompi_mtl_base_selected_component-> + mtl_version.mca_component_name ); + + /* All done */ + return OMPI_SUCCESS; +} + + +int +ompi_mtl_base_close(void) +{ + /* Close all remaining available modules (may be one if this is a + OMPI RTE program, or [possibly] multiple if this is ompi_info) */ + mca_base_components_close(ompi_mtl_base_output, + &ompi_mtl_base_components_opened, NULL); + + /* All done */ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/mtl/base/mtl_base_datatype.c b/ompi/mca/mtl/base/mtl_base_datatype.c new file mode 100644 index 0000000000..4cfe1f74ad --- /dev/null +++ b/ompi/mca/mtl/base/mtl_base_datatype.c @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/mca/mca.h" +#include "ompi/mca/mtl/mtl.h" +#include "ompi/mca/mtl/base/base.h" +#include "ompi/constants.h" +#include "ompi/datatype/convertor.h" + +int +ompi_mtl_datatype_pack(struct ompi_convertor_t *convertor, + void **buffer, + size_t *buffer_len, + bool *freeAfter) +{ + struct iovec iov; + uint32_t iov_count = 1; + int32_t free_after; + size_t max_data; + + ompi_convertor_get_packed_size(convertor, &max_data); + iov.iov_len = max_data; + + if (max_data > 0 && ompi_convertor_need_buffers(convertor)) { + iov.iov_base = malloc(max_data); + if (NULL == iov.iov_base) return OMPI_ERR_OUT_OF_RESOURCE; + *freeAfter = true; + } else { + iov.iov_base = NULL; + *freeAfter = false; + } + + ompi_convertor_pack(convertor, &iov, &iov_count, &max_data, + &free_after); + + *buffer = iov.iov_base; + *buffer_len = iov.iov_len; + + return OMPI_SUCCESS; +} + + +int +ompi_mtl_datatype_recv_buf(struct ompi_convertor_t *convertor, + void ** buffer, + size_t *buffer_len, + bool *free_on_error) +{ + size_t max_data; + long lb; + + ompi_convertor_get_packed_size(convertor, &max_data); + + if (max_data > 0 && ompi_convertor_need_buffers(convertor)) { + *buffer = malloc(max_data); + *free_on_error = true; + } else { + ompi_ddt_type_lb(convertor->pDesc, &lb); + *buffer = convertor->pBaseBuf + lb; + *free_on_error = false; + } + + *buffer_len = max_data; + + return OMPI_SUCCESS; +} + + +int +ompi_mtl_datatype_unpack(struct ompi_convertor_t *convertor, + void *buffer, + size_t buffer_len) +{ + struct iovec iov; + uint32_t iov_count = 1; + int32_t free_after; + size_t max_data; + + iov.iov_len = buffer_len; + iov.iov_base = buffer; + max_data = iov.iov_len; + + ompi_convertor_unpack(convertor, &iov, &iov_count, + &max_data, &free_after); + + if (max_data > 0 && ompi_convertor_need_buffers(convertor)) { + free(buffer); + } + + return OMPI_ERROR; +} diff --git a/ompi/mca/mtl/mtl.h b/ompi/mca/mtl/mtl.h new file mode 100644 index 0000000000..823c678929 --- /dev/null +++ b/ompi/mca/mtl/mtl.h @@ -0,0 +1,413 @@ +/* + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + * Matching Transport Layer + * + * The Matching Transport Layer (MTL) provides device-layer support + * for transfer of MPI point-to-point messages over devices that + * support hardware / library message matching. This layer is used + * with the MTL PML component to provide lowest latency and highest + * bandwidth on given architectures. Features found in other PML + * interfaces, such as message fragmenting, multi-device support, and + * NIC failover are not provided by the upper layers. + * + * In general, this interface should not be used for transport layer + * support. Instead, the BTL interface should be used. The BTL + * interface allows for multiplexing between multiple users + * (point-to-point, one-sided, etc.) and provides many features not + * found in this interface (RDMA from arbitrary buffers, active + * messaging, reasonable pinned memory caching, etc.) + */ + +#ifndef OMPI_MTL_H +#define OMPI_MTL_H + +#include "ompi_config.h" +#include "mpi.h" /* needed for MPI_ANY_TAG */ +#include "opal/class/opal_list.h" +#include "opal/mca/mca.h" +#include "ompi/mca/pml/pml.h" /* for send_mode enum */ + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +struct ompi_request_t; +struct ompi_convertor_t; + +struct mca_mtl_base_module_t; + +struct mca_mtl_base_endpoint_t; + +struct mca_mtl_request_t { + /** pointer to associated ompi_request_t */ + struct ompi_request_t *ompi_req; + void (*completion_callback)(struct mca_mtl_request_t* mtl_request); +}; +typedef struct mca_mtl_request_t mca_mtl_request_t; + +/** + * Initialization routine for MTL component + * + * Initialization routine for MTL component. This function should + * allocate resources for communication and try to do all local setup. + * It should not attempt to contract it's peers, as that should be + * done at add_procs time. Contact information should be published + * during this initialization function. It will be made available + * during add_procs(). + * + * @param enable_progress_threads (IN) Progress threads have been + * enabled by the user and the component must be + * capable of making asycnhronous progress (either + * with its own thread, with the kernel, or with + * the event library. + * @param enable_mpi_threads (IN) MPI threads have been enabled by the + * user and the component must be capable of coping + * with threads. If the component can cope with + * MPI_THREAD_MULTIPLE, enable_mpi_thread_multiple + * should be set to true. Otherwise, it is assumed + * that only THREAD_FUNNELLED and THREAD_SERIALIZED + * can be used. + * @param enable_mpi_thread_multiple (OUT) Component does / does not + * support MPI_THREAD_MULTIPLE. This variable only + * needs to be set if enable_mpi_threads is true. + * Otherwise, the return value will be ignored. + * + * @retval NULL component can not operate on the current machine + * @retval non-NULL component interface function + */ +typedef struct mca_mtl_base_module_t* +(*mca_mtl_base_component_init_fn_t)(bool enable_progress_threads, + bool enable_mpi_threads); + + +struct mca_mtl_base_component_1_0_0_t { + mca_base_component_t mtl_version; + mca_base_component_data_1_0_0_t mtl_data; + mca_mtl_base_component_init_fn_t mtl_init; +}; +typedef struct mca_mtl_base_component_1_0_0_t mca_mtl_base_component_1_0_0_t; +typedef struct mca_mtl_base_component_1_0_0_t mca_mtl_base_component_t; + + +/** + * MCA->MTL Clean up any resources held by MTL module + * + * Opposite of module_init. Called when communication will no longer + * be necessary. ussually this is during MPI_FINALIZE, but it can be + * earlier if the component was not selected to run. Assuming + * module_init was called, finalize will always be called before the + * component_close function is called. + * + * @param mtl (IN) MTL module returned from call to initialize + * + * @retval OMPI_SUCCESS cleanup finished successfully + * @retval other failure during cleanup + * + */ +typedef int (*mca_mtl_base_module_finalize_fn_t)(struct mca_mtl_base_module_t* mtl); + + +/** + * PML->MTL notification of change in the process list. + * + * The mca_mtl_base_module_add_procs_fn_t() is used by the PML to + * notify the MTL that new processes are connected to the current + * process. Any addressing information exported by the peer via the + * mca_pml_base_modex_send() function should be available during this + * call via the corresponding mca_pml_base_modex_recv() function. The + * MTL may utilize this information to determine reachability of each + * peer process. + * + * It is an error for a proc to not be reachable by the given MTL, and + * an error should be returned if that case is detected. The PML + * provides the MTL the option to return a pointer to a data structure + * defined by the MTL that is passed in with all communication + * functions. The array of procinfo pointers will be allocated by the + * PML, but it is up to the MTL module to create the memory for the + * procinfo structure itself. The procinfo structure is opaque to the + * PML and is only used internally by the MTL. + * + * @param mtl (IN) MTL module + * @param nprocs (IN) Number of processes + * @param procs (IN) Set of processes + * @param endpoint (OUT) Array of (optional) mca_mtl_base_procinfo_t + * structures, one per proc in procs + * + * @retval OMPI_SUCCESS successfully connected to processes + * @retval other failure during setup + */ +typedef int (*mca_mtl_base_module_add_procs_fn_t)( + struct mca_mtl_base_module_t* mtl, + size_t nprocs, + struct ompi_proc_t** procs, + struct mca_mtl_base_endpoint_t **mtl_peer_data); + + +/** + * Notification of change to the process list. + * + * When the process list changes, the PML notifies the MTL of the + * change, to provide the opportunity to cleanup or release any + * resources associated with the peer. + * + * @param mtl (IN) MTL module + * @param nprocs (IN) Number of processes + * @param proc (IN) Set of processes + * @param peer (IN) Set of peer addressing information. + * + * @return Status indicating if cleanup was successful + */ +typedef int (*mca_mtl_base_module_del_procs_fn_t)( + struct mca_mtl_base_module_t* mtl, + size_t nprocs, + struct ompi_proc_t** procs, + struct mca_mtl_base_endpoint_t **mtl_peer_data); + + +/** + * Blocking send to peer + * + * Blocking send (Call should not return until the user buffer may be + * used again). Standard MPI semantics must be met by this call, as + * mandated in the mode argument. There is one special mode argument, + * MCA_PML_BASE_SEND_COMPLETE, which requires local completion before + * the function can return. This is an optimization for coillective + * routines that can otherwise lead to degenerate performance for + * broadcast-based collectives. + * + * @param comm (IN) Communicator used for operation + * @param dest (IN) Destination rank for send (relative to comm) + * @param tag (IN) MPI tag used for sending. See note below. + * @param convertor (IN) Datatype convertor describing send datatype. + * Already prepared for send. + * @param mode (IN) Mode for send operation + * + * @return OMPI_SUCCESS or error value + * + * \note Open MPI is built around non-blocking operations. This + * function is provided for networks where progressing events outside + * of point-to-point (for example, collectives, I/O, one-sided) can + * occur without a progress function regularily being triggered. If + * this is not the case for the given network, this function pointer + * should be set to NULL and non-blocking sends be used. + * + * \note While MPI does not allow users to specify negative tags, they + * are used internally in Open MPI to provide a unique channel for + * collective operations. Therefore, the MTL can *not* cause an error + * if a negative tag is used. + */ +typedef int (*mca_mtl_base_module_send_fn_t)( + struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t *comm, + int dest, + int tag, + struct ompi_convertor_t *convertor, + mca_pml_base_send_mode_t mode); + + +/** + * Non-blocking send to peer + * + * Non-blocking send to peer. Standard MPI semantics must be met by + * this call, as mandated in the mode argument. There is one special + * mode argument, MCA_PML_BASE_SEND_COMPLETE, which requires local + * completion before the request is marked as complete. + * + * The PML will handle creation of the request, leaving the number of + * bytes requested in the module structure available for the MTL + * directly after the ompi_request_t structure. The PML will handle + * proper destruction of the request once it can safely be destructed + * (it has been completed and freeed by a call to REQUEST_FReE or + * TEST/WAIT). The MTL should remove all resources associated with + * the request when it is marked as completed. + * + * @param comm (IN) Communicator used for operation + * @param dest (IN) Destination rank for send (relative to comm) + * @param tag (IN) MPI tag used for sending. See note below. + * @param convertor (IN) Datatype convertor describing send datatype. + * Already prepared for send. + * @param mode (IN) Mode for send operation (see pml.h) + * @param blocking (IN) True if the call originated from a blocking + * call, but the PML decided to use a + * non-blocking operation. This is either for + * internal performance decisions or because the + * blocking send function is NULL. This is an + * optimization flag and is not needed for + * correctness. + * @param mtl_request (IN) Pointer to mtl_request. The ompi_req field + * will be populated with an initialized + * ompi_request_t before calling. + * + * @return OMPI_SUCCESS or error value + * + * \note While MPI does not allow users to specify negative tags, they + * are used internally in Open MPI to provide a unique channel for + * collective operations. Therefore, the MTL can *not* cause an error + * if a negative tag is used. + */ +typedef int (*mca_mtl_base_module_isend_fn_t)( + struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t *comm, + int dest, + int tag, + struct ompi_convertor_t *convertor, + mca_pml_base_send_mode_t mode, + bool blocking, + mca_mtl_request_t *mtl_request); + + +/** + * Non-blocking receive + * + * Non-blocking receive function. Standard MPI semantics for + * MPI_Irecv must be implemented by this call. + * + * The PML will handle creation of the request, leaving the number of + * bytes requested in teh module structure available for the MTL, + * directly after the ompi_request_t structure. The PML will handle + * proper destruction of the request once it can safely be destroyed + * (it has been completed and free'ed by a call to REQUEST_FREE or + * TEST/WAIT). The MTL should remove all resources associated with + * the request when it is marked as completed. + * + * @param comm (IN) Communicator used for operation + * @param src (IN) Source rank for send (relative to comm) + * @param tag (IN) MPI tag used for sending. See note below. + * @param convertor (IN) Datatype convertor describing receive datatype. + * Already prepared for receive. + * @param mtl_request (IN) Pointer to mtl_request. The ompi_req field + * will be populated with an initialized + * ompi_request_t before calling. + * + * @return OMPI_SUCCESS or error value + * + * \note While MPI does not allow users to specify negative tags, they + * are used internally in Open MPI to provide a unique channel for + * collective operations. Therefore, the MTL can *not* cause an error + * if a negative tag is used. Further, MPI_ANY_TAG should *not* match + * against negative tags. + */ +typedef int (*mca_mtl_base_module_irecv_fn_t)( + struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t *comm, + int src, + int tag, + struct ompi_convertor_t *convertor, + struct mca_mtl_request_t *mtl_request); + + +/** + * Non-blocking probe + * + * Non-blocking probe function. Standard MPI semantics for MPI_IPROBE + * must be implemented by this call. + * + * @param comm (IN) Communicator used for operation + * @param src (IN) Source rank for send (relative to comm) + * @param tag (IN) MPI tag used for sending. See note below. + * @param flag (OUT) true if message available, false otherwise + * @param status (OUT) Status structure for information on + * available message + * + * \note While MPI does not allow users to specify negative tags, they + * are used internally in Open MPI to provide a unique channel for + * collective operations. Therefore, the MTL can *not* cause an error + * if a negative tag is used. Further, MPI_ANY_TAG should *not* match + * against negative tags. + */ +typedef int (*mca_mtl_base_module_iprobe_fn_t)( + struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t *comm, + int src, + int tag, + int *flag, + struct ompi_status_public_t *status); + + +/** + * Cancel an existing request + * + * Attempt to cancel an existing request. The (poorly defined) + * semantics for MPI_CANCEL must be implemented by this call. This, + * of course, allows the MTL module to do nothing at all. + * Implementations of the MTL should make a good faith effort to + * cancel receive requests that have not been started, as the "post a + * receive for control messages" paradigm is a common one in loosely + * coupled MPI applications. + * + * @param request(IN) Request that should be cancelled + * @param flag Unknown exactly what this does. + * + */ +typedef int (*mca_mtl_base_module_cancel_fn_t)( + struct mca_mtl_base_module_t* mtl, + mca_mtl_request_t *mtl_request, + int flag); + + +/** + * MTL module interface functions and attributes. + */ +struct mca_mtl_base_module_t { + int mtl_max_contextid; /**< maximum allowable contextid */ + int mtl_max_tag; /**< maximum tag value. note that negative tags must be allowed */ + size_t mtl_request_size; /**< number of bytes to reserve with request structure */ + + uint32_t mtl_flags; /**< flags (put/get...) */ + + /* MTL function table */ + mca_mtl_base_module_add_procs_fn_t mtl_add_procs; + mca_mtl_base_module_del_procs_fn_t mtl_del_procs; + mca_mtl_base_module_finalize_fn_t mtl_finalize; + + mca_mtl_base_module_send_fn_t mtl_send; + mca_mtl_base_module_isend_fn_t mtl_isend; + mca_mtl_base_module_irecv_fn_t mtl_irecv; + mca_mtl_base_module_iprobe_fn_t mtl_iprobe; + + /* Optional MTL functions */ + mca_mtl_base_module_cancel_fn_t mtl_cancel; +}; +typedef struct mca_mtl_base_module_t mca_mtl_base_module_t; + +/* + * Macro for use in modules that are of type mtl v1.0.0 + */ +#define MCA_MTL_BASE_VERSION_1_0_0 \ + /* coll v1.0 is chained to MCA v1.0 */ \ + MCA_BASE_VERSION_1_0_0, \ + /* mtl v1.0 */ \ + "mtl", 1, 0, 0 + +/* + * macro for doing direct call / call through struct + */ +#if MCA_mtl_DIRECT_CALL + +#include "ompi/mca/mtl/mtl_direct_call.h" + +#define OMPI_MTL_CALL_STAMP(a, b) ompi_mtl_ ## a ## _ ## b +#define OMPI_MTL_CALL_EXPANDER(a, b) OMPI_MTL_CALL_STAMP(a,b) +#define OMPI_MTL_CALL(a) OMPI_MTL_CALL_EXPANDER(MCA_mtl_DIRECT_CALL_COMPONENT, a) + +#else +#define OMPI_MTL_CALL(a) ompi_mtl->mtl_ ## a +#endif + +OMPI_DECLSPEC extern mca_mtl_base_module_t *ompi_mtl; + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif +#endif diff --git a/ompi/mca/mtl/mtl_direct_call.h.in b/ompi/mca/mtl/mtl_direct_call.h.in new file mode 100644 index 0000000000..c926887a57 --- /dev/null +++ b/ompi/mca/mtl/mtl_direct_call.h.in @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2006 The Regents of the University of California. + * All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#ifndef MCA_PML_DIRECT_CALL_H_ +#define MCA_PML_DIRECT_CALL_H_ + +#if MCA_pml_DIRECT_CALL +#include @MCA_pml_DIRECT_CALL_HEADER@ +#endif + +#endif diff --git a/ompi/mca/mtl/mx/Makefile.am b/ompi/mca/mtl/mx/Makefile.am new file mode 100644 index 0000000000..8e9a6aa487 --- /dev/null +++ b/ompi/mca/mtl/mx/Makefile.am @@ -0,0 +1,62 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2006 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Use the top-level Makefile.options + + + +AM_CPPFLAGS = $(mtl_mx_CPPFLAGS) + +mtl_mx_sources = \ + mtl_mx.c \ + mtl_mx.h \ + mtl_mx_cancel.c \ + mtl_mx_component.c \ + mtl_mx_endpoint.c \ + mtl_mx_endpoint.h \ + mtl_mx_probe.c \ + mtl_mx_recv.c \ + mtl_mx_request.h \ + mtl_mx_send.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_mtl_mx_DSO +component_noinst = +component_install = mca_mtl_mx.la +else +component_noinst = libmca_mtl_mx.la +component_install = +endif + +mcacomponentdir = $(libdir)/openmpi +mcacomponent_LTLIBRARIES = $(component_install) +mca_mtl_mx_la_SOURCES = $(mtl_mx_sources) +mca_mtl_mx_la_LIBADD = \ + $(mtl_mx_LIBS) \ + $(top_ompi_builddir)/ompi/libmpi.la \ + $(top_ompi_builddir)/orte/liborte.la \ + $(top_ompi_builddir)/opal/libopal.la +mca_mtl_mx_la_LDFLAGS = -module -avoid-version $(mtl_mx_LDFLAGS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_mtl_mx_la_SOURCES = $(mtl_mx_sources) +libmca_mtl_mx_la_LIBADD = $(mtl_mx_LIBS) +libmca_mtl_mx_la_LDFLAGS = -module -avoid-version $(mtl_mx_LDFLAGS) diff --git a/ompi/mca/mtl/mx/configure.m4 b/ompi/mca/mtl/mx/configure.m4 new file mode 100644 index 0000000000..00220a87e4 --- /dev/null +++ b/ompi/mca/mtl/mx/configure.m4 @@ -0,0 +1,41 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + + +# MCA_mtl_mx_CONFIG([action-if-can-compile], +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_mtl_mx_CONFIG],[ + OMPI_CHECK_MX([mtl_mx], + [mtl_mx_happy="yes"], + [mtl_mx_happy="no"]) + + AS_IF([test "$mtl_mx_happy" = "yes"], + [mtl_mx_WRAPPER_EXTRA_LDFLAGS="$mtl_mx_LDFLAGS" + mtl_mx_WRAPPER_EXTRA_LIBS="$mtl_mx_LIBS" + $1], + [$2]) + + # substitute in the things needed to build mx + AC_SUBST([mtl_mx_CFLAGS]) + AC_SUBST([mtl_mx_CPPFLAGS]) + AC_SUBST([mtl_mx_LDFLAGS]) + AC_SUBST([mtl_mx_LIBS]) +])dnl + diff --git a/ompi/mca/mtl/mx/configure.params b/ompi/mca/mtl/mx/configure.params new file mode 100644 index 0000000000..0369ba5a6f --- /dev/null +++ b/ompi/mca/mtl/mx/configure.params @@ -0,0 +1,21 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module +PARAM_CONFIG_FILES="Makefile" diff --git a/ompi/mca/mtl/mx/mtl_mx.c b/ompi/mca/mtl/mx/mtl_mx.c new file mode 100644 index 0000000000..a0b099f8dd --- /dev/null +++ b/ompi/mca/mtl/mx/mtl_mx.c @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mca/mtl/mtl.h" +#include "mtl_mx.h" +#include "ompi/communicator/communicator.h" +#include "opal/class/opal_list.h" +#include "ompi/mca/pml/base/pml_base_module_exchange.h" +#include "mtl_mx_endpoint.h" +#include "mtl_mx_request.h" + +mca_mtl_mx_module_t ompi_mtl_mx = { + { + 8191, /* max cid - 2^13 - 1 */ + (1UL << 30), /* max tag value - must allow negatives */ + 0, /* request reserve space */ + 0, /* flags */ + + + ompi_mtl_mx_add_procs, + ompi_mtl_mx_del_procs, + ompi_mtl_mx_finalize, + + NULL, + ompi_mtl_mx_isend, + + ompi_mtl_mx_irecv, + ompi_mtl_mx_iprobe, + + ompi_mtl_mx_cancel + } +}; + +int ompi_mtl_mx_progress( void ); + +int ompi_mtl_mx_module_init(){ + mx_param_t mx_param; + mx_return_t mx_return; + + + /* setup params */ + mx_param.key = MX_PARAM_UNEXP_QUEUE_MAX; + mx_param.val.unexp_queue_max = ompi_mtl_mx.mx_unexp_queue_max; + + + /* get a local endpoint */ + mx_return = mx_open_endpoint(MX_ANY_NIC, + MX_ANY_ENDPOINT, + ompi_mtl_mx.mx_filter, + NULL, + 0, + &ompi_mtl_mx.mx_endpoint); + + + if(mx_return != MX_SUCCESS) { + opal_output(0, "Error in mx_open_endpoint (error %s)\n", mx_strerror(mx_return)); + return OMPI_ERROR; + } + + /* get the endpoint address */ + mx_return = mx_get_endpoint_addr( ompi_mtl_mx.mx_endpoint, + &ompi_mtl_mx.mx_endpoint_addr); + + if(mx_return != MX_SUCCESS) { + opal_output(0, "Error in mx_get_endpoint_addr (error %s)\n", mx_strerror(mx_return)); + return OMPI_ERROR; + } + + mx_return = mx_decompose_endpoint_addr( ompi_mtl_mx.mx_endpoint_addr, &(ompi_mtl_mx.mx_addr.nic_id), + &(ompi_mtl_mx.mx_addr.endpoint_id) ); + + if(mx_return != MX_SUCCESS) { + opal_output(0, "Error in mx_decompose_endpoint_addr (error %s)\n", mx_strerror(mx_return)); + return OMPI_ERROR; + } + + + + mca_pml_base_modex_send( &mca_mtl_mx_component.super.mtl_version, + &ompi_mtl_mx.mx_addr, + sizeof(mca_mtl_mx_addr_t)); + + /* register the mtl mx progress function */ + opal_progress_register(ompi_mtl_mx_progress); + + + return(mx_return==MX_SUCCESS ? OMPI_SUCCESS : OMPI_ERROR); + + +} + +int +ompi_mtl_mx_finalize(struct mca_mtl_base_module_t* mtl) { + mx_return_t mx_return; + + opal_progress_unregister(ompi_mtl_mx_progress); + + /* free resources */ + + mx_return = mx_finalize(); + if(mx_return != MX_SUCCESS){ + opal_output(0, "Error in mx_finalize (error %s)\n", mx_strerror(mx_return)); + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} + +int +ompi_mtl_mx_add_procs(struct mca_mtl_base_module_t *mtl, + size_t nprocs, + struct ompi_proc_t** procs, + struct mca_mtl_base_endpoint_t **mtl_peer_data) +{ + int i; + assert(mtl == &ompi_mtl_mx.super); + + for( i = 0; i < (int) nprocs; i++ ){ + mca_mtl_mx_endpoint_t* mtl_mx_endpoint = + mca_mtl_mx_endpoint_create(procs[i]); + if(NULL == mtl_mx_endpoint) { + return OMPI_ERROR; + } + mtl_peer_data[i] = (struct mca_mtl_base_endpoint_t*) + mtl_mx_endpoint; + } + + return OMPI_SUCCESS; +} + + +int +ompi_mtl_mx_del_procs(struct mca_mtl_base_module_t *mtl, + size_t nprocs, + struct ompi_proc_t** procs, + struct mca_mtl_base_endpoint_t **mtl_peer_data) +{ + return OMPI_SUCCESS; +} + + + +int ompi_mtl_mx_progress( void ) { + mx_return_t mx_return; + mx_request_t mx_request; + mx_status_t mx_status; + uint32_t result; + mca_mtl_mx_request_t* mtl_mx_request; + + mx_return = mx_ipeek(ompi_mtl_mx.mx_endpoint, + &mx_request, + &result); + + if(mx_return != MX_SUCCESS) { + opal_output(0, "Error in mx_ipeek (error %s)\n", mx_strerror(mx_return)); + } + if(result) { + mx_return = mx_test(ompi_mtl_mx.mx_endpoint, + &mx_request, + &mx_status, + &result); + if(mx_return != MX_SUCCESS) { + opal_output(0, "Error in mx_test (error %s)\n", mx_strerror(mx_return)); + abort(); + } + if(0 == result) { + opal_output(0, "Error in ompi_mtl_mx_progress, mx_ipeek returned a request, mx_test on the request resulted failure.\n"); + abort(); + } + if(mx_status.code != MX_STATUS_SUCCESS) { + opal_output(0, "Error in ompi_mtl_mx_progress, mx_test returned something other than MX_STATUS_SUCCESS: mx_status(%d).\n", + mx_status); + abort(); + } + mtl_mx_request = (mca_mtl_mx_request_t*) mx_status.context; + if(OMPI_MTL_MX_ISEND == mtl_mx_request->type) { + if(mtl_mx_request->free_after) { + free(mtl_mx_request->mx_segment[0].segment_ptr); + } + mtl_mx_request->super.completion_callback(&mtl_mx_request->super); + } + if(OMPI_MTL_MX_IRECV == mtl_mx_request->type) { + ompi_mtl_datatype_unpack(mtl_mx_request->convertor, + mtl_mx_request->mx_segment[0].segment_ptr, + mtl_mx_request->mx_segment[0].segment_length); + /* set the status */ + MX_GET_SRC(mx_status.match_info, + mtl_mx_request->super.ompi_req->req_status.MPI_SOURCE); + MX_GET_TAG(mx_status.match_info, + mtl_mx_request->super.ompi_req->req_status.MPI_TAG); + mtl_mx_request->super.ompi_req->req_status._count = + mx_status.xfer_length; + + switch (mx_status.code) { + case MX_STATUS_SUCCESS: + mtl_mx_request->super.ompi_req->req_status.MPI_ERROR = + OMPI_SUCCESS; + break; + case MX_STATUS_TRUNCATED: + mtl_mx_request->super.ompi_req->req_status.MPI_ERROR = + MPI_ERR_TRUNCATE; + break; + default: + mtl_mx_request->super.ompi_req->req_status.MPI_ERROR = + MPI_ERR_INTERN; + } + mtl_mx_request->super.completion_callback(&mtl_mx_request->super); + } + + } + return 1; +} diff --git a/ompi/mca/mtl/mx/mtl_mx.h b/ompi/mca/mtl/mx/mtl_mx.h new file mode 100644 index 0000000000..d6d60cf905 --- /dev/null +++ b/ompi/mca/mtl/mx/mtl_mx.h @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MTL_MX_H_HAS_BEEN_INCLUDED +#define MTL_MX_H_HAS_BEEN_INCLUDED + +#include "opal/threads/threads.h" +#include "opal/threads/condition.h" +#include "ompi/class/ompi_free_list.h" +#include "opal/util/cmd_line.h" +#include "ompi/request/request.h" +#include "ompi/mca/mtl/mtl.h" +#include "ompi/mca/mtl/base/base.h" +#include "ompi/datatype/datatype.h" +#include "ompi/datatype/convertor.h" +#include "mtl_mx_endpoint.h" + +#include "myriexpress.h" + + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + + +/* MTL interface functions */ +extern int ompi_mtl_mx_add_procs(struct mca_mtl_base_module_t* mtl, + size_t nprocs, + struct ompi_proc_t** procs, + struct mca_mtl_base_endpoint_t **mtl_peer_data); + +extern int ompi_mtl_mx_del_procs(struct mca_mtl_base_module_t* mtl, + size_t nprocs, + struct ompi_proc_t** procs, + struct mca_mtl_base_endpoint_t **mtl_peer_data); + +extern int ompi_mtl_mx_isend(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t* comm, + int dest, + int tag, + struct ompi_convertor_t *convertor, + mca_pml_base_send_mode_t mode, + bool blocking, + mca_mtl_request_t * mtl_request); + +extern int ompi_mtl_mx_irecv(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t *comm, + int src, + int tag, + struct ompi_convertor_t *convertor, + struct mca_mtl_request_t *mtl_request); + + +extern int ompi_mtl_mx_iprobe(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t *comm, + int src, + int tag, + int *flag, + struct ompi_status_public_t *status); + +extern int ompi_mtl_mx_cancel(struct mca_mtl_base_module_t* mtl, + struct mca_mtl_request_t *mtl_request, + int flag); + +extern int ompi_mtl_mx_finalize(struct mca_mtl_base_module_t* mtl); + +int ompi_mtl_mx_module_init(void); + +/** + * MTL Module Interface + */ +struct mca_mtl_mx_module_t { + mca_mtl_base_module_t super; /**< base MTL interface */ + int32_t mx_unexp_queue_max; /**< maximium size of the MX unexpected message queue */ + int32_t mx_filter; /**< user assigned value used to filter incomming messages */ + int32_t mx_timeout; + int32_t mx_retries; + int32_t mx_support_sharedmem; + mx_endpoint_t mx_endpoint; /**< mx data structure for local endpoint */ + mx_endpoint_addr_t mx_endpoint_addr; /**< mx data structure for local endpoint address */ + mca_mtl_mx_addr_t mx_addr; +}; +typedef struct mca_mtl_mx_module_t mca_mtl_mx_module_t; + +extern mca_mtl_mx_module_t ompi_mtl_mx; + +struct mca_mtl_mx_component_t{ + mca_mtl_base_component_1_0_0_t super; /**< base MTL component */ +}; +typedef struct mca_mtl_mx_component_t mca_mtl_mx_component_t; + +extern mca_mtl_mx_component_t mca_mtl_mx_component; + + +/* match/ignore bit manipulation + * + * 01234567 01234567 01234567 01234567 01234567 01234567 01234567 01234567 + * | | + * context id | source | message tag + * | | + */ + +#define MX_CONTEXT_MASK 0xFFFF000000000000 +#define MX_SOURCE_MASK 0x0000FFFF00000000 +#define MX_TAG_MASK 0x00000000FFFFFFFF + +#define MX_CONTEXT_IGNR MX_CONTEXT_MASK +#define MX_SOURCE_IGNR MX_SOURCE_MASK +#define MX_TAG_IGNR 0x00000000EFFFFFFF + +/* get the tag from the bits */ +#define MX_GET_TAG(match_bits, tag) \ +{ \ + tag = (int) (match_bits & MX_TAG_MASK); \ +} + + +/* get the tag from the bits */ +#define MX_GET_SRC(match_bits, src) \ +{ \ + src = (int) ((match_bits & MX_SOURCE_MASK) >> 32); \ +} + +/* send posting */ +#define MX_SET_SEND_BITS(match_bits, contextid, source, tag) \ +{ \ + match_bits = contextid; \ + match_bits = (match_bits << 16); \ + match_bits |= source; \ + match_bits = (match_bits << 32); \ + match_bits |= (MX_TAG_MASK & tag); \ +} + +/* receive posting */ +#define MX_SET_RECV_BITS(match_bits, mask_bits, contextid, source, tag) \ +{ \ + match_bits = mask_bits = 0; \ + match_bits = contextid; \ + match_bits = (match_bits << 16); \ + \ + if (MPI_ANY_SOURCE == source) { \ + match_bits = (match_bits << 32); \ + mask_bits |= MX_SOURCE_IGNR; \ + } else { \ + match_bits |= source; \ + match_bits = (match_bits << 32); \ + } \ + \ + if (MPI_ANY_TAG == tag) { \ + mask_bits |= MX_TAG_IGNR; \ + } else { \ + match_bits |= (MX_TAG_MASK & tag); \ + } \ + \ + mask_bits = ~mask_bits; \ +} + + + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + +#endif /* MTL_MX_H_HAS_BEEN_INCLUDED */ + diff --git a/ompi/mca/mtl/mx/mtl_mx_cancel.c b/ompi/mca/mtl/mx/mtl_mx_cancel.c new file mode 100644 index 0000000000..eea4464e18 --- /dev/null +++ b/ompi/mca/mtl/mx/mtl_mx_cancel.c @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "ompi/request/request.h" +#include "mtl_mx.h" + + +int ompi_mtl_mx_cancel(struct mca_mtl_base_module_t* mtl, + struct mca_mtl_request_t *mtl_request, + int flag) { + + return OMPI_ERR_NOT_IMPLEMENTED; +} diff --git a/ompi/mca/mtl/mx/mtl_mx_component.c b/ompi/mca/mtl/mx/mtl_mx_component.c new file mode 100644 index 0000000000..e4d9a02049 --- /dev/null +++ b/ompi/mca/mtl/mx/mtl_mx_component.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/event/event.h" +#include "opal/mca/base/mca_base_param.h" +#include "ompi/datatype/convertor.h" + +#include "mtl_mx.h" +#include "mtl_mx_request.h" + +#include "myriexpress.h" + +#define MCA_MTL_MX_QUEUE_LENGTH_MAX 2*1024*1024 +static int ompi_mtl_mx_component_open(void); +static int ompi_mtl_mx_component_close(void); + +static mca_mtl_base_module_t* ompi_mtl_mx_component_init( bool enable_progress_threads, + bool enable_mpi_threads ); + +mca_mtl_mx_component_t mca_mtl_mx_component = { + + { + /* First, the mca_base_component_t struct containing meta + * information about the component itself */ + + { + /* Indicate that we are a mtl v1.0.0 component (which also implies + * a specific MCA version) */ + + MCA_MTL_BASE_VERSION_1_0_0, + + "mx", /* MCA component name */ + OMPI_MAJOR_VERSION, /* MCA component major version */ + OMPI_MINOR_VERSION, /* MCA component minor version */ + OMPI_RELEASE_VERSION, /* MCA component release version */ + ompi_mtl_mx_component_open, /* component open */ + ompi_mtl_mx_component_close /* component close */ + }, + + /* Next the MCA v1.0.0 component meta data */ + + { + /* Whether the component is checkpointable or not */ + false + }, + + ompi_mtl_mx_component_init /* component init */ + } +}; + + +static int +ompi_mtl_mx_component_open(void) +{ + + + mca_base_param_reg_int(&mca_mtl_mx_component.super.mtl_version, "filter", + "user assigned value used to filter incomming messages", + false, false, 0xaaaaffff, &ompi_mtl_mx.mx_filter); + + mca_base_param_reg_int(&mca_mtl_mx_component.super.mtl_version, "timeout", + "Timeout for connections", + false, false, 1000, &ompi_mtl_mx.mx_timeout); + + mca_base_param_reg_int(&mca_mtl_mx_component.super.mtl_version, "retries", + "Number of retries for each new connection before considering the peer as unreacheable", + false, false, 20, &ompi_mtl_mx.mx_retries); + + mca_base_param_reg_int(&mca_mtl_mx_component.super.mtl_version, "shared_mem", + "Enable the MX support for shared memory", + false, true, 1, &ompi_mtl_mx.mx_support_sharedmem ); + + mca_base_param_reg_int(&mca_mtl_mx_component.super.mtl_version, "unexpected_queue_length", + "Length of MX unexpected message queue", + false, false, MCA_MTL_MX_QUEUE_LENGTH_MAX, &ompi_mtl_mx.mx_unexp_queue_max); + + if(ompi_mtl_mx.mx_unexp_queue_max > MCA_MTL_MX_QUEUE_LENGTH_MAX) { + ompi_mtl_mx.mx_unexp_queue_max = MCA_MTL_MX_QUEUE_LENGTH_MAX; + } + return OMPI_SUCCESS; + +} + + +static int +ompi_mtl_mx_component_close(void) +{ + return OMPI_SUCCESS; +} + + +static mca_mtl_base_module_t* +ompi_mtl_mx_component_init(bool enable_progress_threads, + bool enable_mpi_threads) +{ + mx_return_t mx_return; + + /* initialize the mx library */ + mx_return = mx_init(); + + if(mx_return!=MX_SUCCESS) { + opal_output(0, "Error in mx_init (error %s)\n", mx_strerror(mx_return)); + return NULL; + } + + + ompi_mtl_mx_module_init(); + + ompi_mtl_mx.super.mtl_request_size = + sizeof(mca_mtl_mx_request_t) - + sizeof(struct mca_mtl_request_t); + + return &ompi_mtl_mx.super; +} + diff --git a/ompi/mca/mtl/mx/mtl_mx_endpoint.c b/ompi/mca/mtl/mx/mtl_mx_endpoint.c new file mode 100644 index 0000000000..8db46e17f8 --- /dev/null +++ b/ompi/mca/mtl/mx/mtl_mx_endpoint.c @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include +#include +#include "ompi/types.h" +#include "orte/mca/ns/base/base.h" +#include "orte/mca/oob/base/base.h" +#include "orte/mca/rml/rml.h" +#include "orte/mca/errmgr/errmgr.h" +#include "opal/util/output.h" +#include "mtl_mx.h" +#include "mtl_mx_endpoint.h" +#include "ompi/mca/pml/base/pml_base_module_exchange.h" + +/* + * Initialize state of the endpoint instance. + * + */ + +static void mca_mtl_mx_endpoint_construct(mca_mtl_mx_endpoint_t* endpoint) +{ + endpoint->mtl_mx_module = NULL; + endpoint->mx_peer = NULL; +} + +/* + * Destroy a endpoint + * + */ + +static void mca_mtl_mx_endpoint_destruct(mca_mtl_mx_endpoint_t* endpoint) +{ +} + + +OBJ_CLASS_INSTANCE( + mca_mtl_mx_endpoint_t, + opal_list_item_t, + mca_mtl_mx_endpoint_construct, + mca_mtl_mx_endpoint_destruct); + + + + + +mca_mtl_mx_endpoint_t* mca_mtl_mx_endpoint_create(ompi_proc_t* ompi_proc) { + mca_mtl_mx_endpoint_t* mtl_mx_endpoint = NULL; + int rc; + mca_mtl_mx_addr_t *mx_peer; + size_t size; + mx_return_t mx_return; + int num_retry = 0; + /* get the remote proc's address (only one) */ + rc = mca_pml_base_modex_recv(&mca_mtl_mx_component.super.mtl_version, + ompi_proc, (void**)&mx_peer, &size); + if( rc != OMPI_SUCCESS || size != sizeof(mca_mtl_mx_addr_t)) { + return NULL; + } + + mtl_mx_endpoint = (mca_mtl_mx_endpoint_t*) OBJ_NEW(mca_mtl_mx_endpoint_t); + mtl_mx_endpoint->mx_peer = mx_peer; + + retry_connect: + mx_return = mx_connect(ompi_mtl_mx.mx_endpoint, + mx_peer->nic_id, + mx_peer->endpoint_id, + ompi_mtl_mx.mx_filter, + ompi_mtl_mx.mx_timeout, + &mtl_mx_endpoint->mx_peer_addr); + if(MX_SUCCESS != mx_return) { + char peer_name[MX_MAX_HOSTNAME_LEN]; + if(MX_TIMEOUT == mx_return) { + if( num_retry++ < ompi_mtl_mx.mx_retries ) { + goto retry_connect; + } + } + + if(MX_SUCCESS != mx_nic_id_to_hostname( mx_peer->nic_id, peer_name)) { + sprintf( peer_name, "unknown %lx nic_id", (long)mx_peer->nic_id ); + } + opal_output( 0, "mx_connect fail for %s(%dth remote address) with key %x (error %s)\n", + peer_name, ompi_mtl_mx.mx_filter, mx_strerror(mx_return) ); + return NULL; + } + + + return mtl_mx_endpoint; + +} diff --git a/ompi/mca/mtl/mx/mtl_mx_endpoint.h b/ompi/mca/mtl/mx/mtl_mx_endpoint.h new file mode 100644 index 0000000000..b37141a1af --- /dev/null +++ b/ompi/mca/mtl/mx/mtl_mx_endpoint.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_MTL_MX_ENDPOINT_H +#define MCA_MTL_MX_ENDPOINT_H + +#include "opal/class/opal_list.h" +#include "opal/event/event.h" +#include "ompi/proc/proc.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/mca/mtl/mtl.h" +#include "mtl_mx.h" + +#include "myriexpress.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +OBJ_CLASS_DECLARATION(mca_mtl_mx_endpoint_t); + +/** + * Structure used to publish MX information to peers + */ +struct mca_mtl_mx_addr_t { + uint64_t nic_id; + uint32_t endpoint_id; +}; +typedef struct mca_mtl_mx_addr_t mca_mtl_mx_addr_t; + +/** + * An abstraction that represents a connection to a endpoint process. + * An instance of mca_mtl_base_endpoint_t is associated w/ each process + * and MTL pair at startup. However, connections to the endpoint + * are established dynamically on an as-needed basis: + */ + +struct mca_mtl_base_endpoint_t { + opal_list_item_t super; + + struct mca_mtl_mx_module_t* mtl_mx_module; + /**< MTL instance that created this connection */ + + struct mca_mtl_mx_addr_t* mx_peer; + /** the address as reported by the peer */ + + mx_endpoint_addr_t mx_peer_addr; + /** the remote MX endpoint address */ + + +}; +typedef struct mca_mtl_base_endpoint_t mca_mtl_base_endpoint_t; +typedef mca_mtl_base_endpoint_t mca_mtl_mx_endpoint_t; +OBJ_CLASS_DECLARATION(mca_mtl_mx_endpoint); + +mca_mtl_mx_endpoint_t* mca_mtl_mx_endpoint_create(ompi_proc_t*); + + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif +#endif diff --git a/ompi/mca/mtl/mx/mtl_mx_probe.c b/ompi/mca/mtl/mx/mtl_mx_probe.c new file mode 100644 index 0000000000..642c04f57a --- /dev/null +++ b/ompi/mca/mtl/mx/mtl_mx_probe.c @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "ompi/request/request.h" +#include "mtl_mx.h" + + +int ompi_mtl_mx_iprobe(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t *comm, + int src, + int tag, + int *flag, + struct ompi_status_public_t *status) +{ + return OMPI_ERR_NOT_IMPLEMENTED; +} diff --git a/ompi/mca/mtl/mx/mtl_mx_recv.c b/ompi/mca/mtl/mx/mtl_mx_recv.c new file mode 100644 index 0000000000..e6f2486235 --- /dev/null +++ b/ompi/mca/mtl/mx/mtl_mx_recv.c @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" +#include "ompi/request/request.h" +#include "ompi/datatype/datatype.h" +#include "ompi/communicator/communicator.h" +#include "ompi/datatype/convertor.h" + +#include "mtl_mx.h" +#include "mtl_mx_request.h" + +int +ompi_mtl_mx_irecv(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t *comm, + int src, + int tag, + struct ompi_convertor_t *convertor, + struct mca_mtl_request_t *mtl_request) +{ + int ret; + mx_return_t mx_return; + mca_mtl_mx_request_t * mtl_mx_request = (mca_mtl_mx_request_t*) mtl_request; + uint64_t match_bits; + uint64_t mask_bits; + size_t length; + + ret = ompi_mtl_datatype_recv_buf(convertor, + &mtl_mx_request->mx_segment[0].segment_ptr, + &length, + &mtl_mx_request->free_after); + + mtl_mx_request->mx_segment[0].segment_length = length; + mtl_mx_request->convertor = convertor; + mtl_mx_request->type = OMPI_MTL_MX_IRECV; + + if(OMPI_SUCCESS != ret) return ret; + + + MX_SET_RECV_BITS(match_bits, + mask_bits, + comm->c_contextid, + src, + tag); + +#if 0 + printf("recv bits: 0x%016llx 0x%016llx\n", match_bits, mask_bits); +#endif + mx_return = mx_irecv( ompi_mtl_mx.mx_endpoint, + mtl_mx_request->mx_segment, + 1, + match_bits, + mask_bits, + mtl_mx_request, + &mtl_mx_request->mx_request); + + + if(mx_return != MX_SUCCESS) { + opal_output(0, "Error in mx_irecv (error %s)\n", mx_strerror(mx_return)); + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} + diff --git a/ompi/mca/btl/portals/btl_portals_compat.h b/ompi/mca/mtl/mx/mtl_mx_request.h similarity index 61% rename from ompi/mca/btl/portals/btl_portals_compat.h rename to ompi/mca/mtl/mx/mtl_mx_request.h index 41303d95e0..9e337093dd 100644 --- a/ompi/mca/btl/portals/btl_portals_compat.h +++ b/ompi/mca/mtl/mx/mtl_mx_request.h @@ -16,29 +16,25 @@ * $HEADER$ */ +#ifndef OMPI_MTL_MX_REQUEST_H +#define OMPI_MTL_MX_REQUEST_H -#ifndef OMPI_BTL_PORTALS_COMPAT_H -#define OMPI_BTL_PORTALS_COMPAT_H +#include "ompi/datatype/convertor.h" -#if OMPI_BTL_PORTALS_UTCP -#include +typedef enum { + OMPI_MTL_MX_ISEND, + OMPI_MTL_MX_IRECV +} mca_mtl_mx_request_type_t; -#include -#include -#include -#include - -#elif OMPI_BTL_PORTALS_REDSTORM - -#include - -#define PTL_EQ_HANDLER_NONE NULL - -#else - -#error "Unknown Portals library configuration" +struct mca_mtl_mx_request_t { + struct mca_mtl_request_t super; + mx_request_t mx_request; + mx_segment_t mx_segment[1]; + struct ompi_convertor_t *convertor; + bool free_after; + mca_mtl_mx_request_type_t type; +}; +typedef struct mca_mtl_mx_request_t mca_mtl_mx_request_t; #endif - -#endif /* OMPI_BTL_PORTALS_NAL_H */ diff --git a/ompi/mca/mtl/mx/mtl_mx_send.c b/ompi/mca/mtl/mx/mtl_mx_send.c new file mode 100644 index 0000000000..f0b39e0593 --- /dev/null +++ b/ompi/mca/mtl/mx/mtl_mx_send.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "ompi/datatype/datatype.h" +#include "ompi/communicator/communicator.h" +#include "ompi/datatype/convertor.h" + +#include "mtl_mx.h" +#include "mtl_mx_request.h" + + +int +ompi_mtl_mx_isend(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t* comm, + int dest, + int tag, + struct ompi_convertor_t *convertor, + mca_pml_base_send_mode_t mode, + bool blocking, + mca_mtl_request_t * mtl_request) +{ + mx_return_t mx_return; + uint64_t match_bits; + int ret; + mca_mtl_mx_request_t * mtl_mx_request = (mca_mtl_mx_request_t*) mtl_request; + size_t length; + + +mca_mtl_mx_endpoint_t* mx_endpoint = + (mca_mtl_mx_endpoint_t*) comm->c_pml_procs[dest]->proc_ompi->proc_pml; + + assert(mtl == &ompi_mtl_mx.super); + + MX_SET_SEND_BITS(match_bits, comm->c_contextid, comm->c_my_rank, tag); + + ret = ompi_mtl_datatype_pack(convertor, + &mtl_mx_request->mx_segment[0].segment_ptr, + &length, + &mtl_mx_request->free_after); + mtl_mx_request->mx_segment[0].segment_length = length; + mtl_mx_request->convertor = convertor; + mtl_mx_request->type = OMPI_MTL_MX_ISEND; + + if (OMPI_SUCCESS != ret) return ret; + + if(mode == MCA_PML_BASE_SEND_SYNCHRONOUS) { + +#if 0 + printf("issend bits: 0x%016llx\n", match_bits); +#endif + mx_return = mx_issend( ompi_mtl_mx.mx_endpoint, + mtl_mx_request->mx_segment, + 1, + mx_endpoint->mx_peer_addr, + match_bits, + mtl_mx_request, + &mtl_mx_request->mx_request + ); + if(mx_return != MX_SUCCESS ) { + char peer_name[MX_MAX_HOSTNAME_LEN]; + if(MX_SUCCESS != mx_nic_id_to_hostname( mx_endpoint->mx_peer->nic_id, peer_name)) { + sprintf( peer_name, "unknown %lx nic_id", (long)mx_endpoint->mx_peer->nic_id ); + } + opal_output(0, "Error in mx_issend (error %s) sending to %s\n", mx_strerror(mx_return), peer_name); + } + } else { +#if 0 + printf("isend bits: 0x%016llx\n", match_bits); +#endif + mx_return = mx_isend( ompi_mtl_mx.mx_endpoint, + mtl_mx_request->mx_segment, + 1, + mx_endpoint->mx_peer_addr, + match_bits, + mtl_mx_request, + &mtl_mx_request->mx_request + ); + + if(mx_return != MX_SUCCESS ) { + char peer_name[MX_MAX_HOSTNAME_LEN]; + if(MX_SUCCESS != mx_nic_id_to_hostname( mx_endpoint->mx_peer->nic_id, peer_name)) { + sprintf( peer_name, "unknown %lx nic_id", (long)mx_endpoint->mx_peer->nic_id ); + } + opal_output(0, "Error in mx_isend (error %s) sending to %s\n", mx_strerror(mx_return), peer_name); + } + + } + + return mx_return == MX_SUCCESS ? OMPI_SUCCESS : OMPI_ERROR; +} diff --git a/ompi/mca/mtl/portals/.ompi_ignore b/ompi/mca/mtl/portals/.ompi_ignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ompi/mca/mtl/portals/.ompi_unignore b/ompi/mca/mtl/portals/.ompi_unignore new file mode 100644 index 0000000000..c1c16e5ee2 --- /dev/null +++ b/ompi/mca/mtl/portals/.ompi_unignore @@ -0,0 +1,4 @@ +brbarret +bbarrett +gshipman +rbbrigh diff --git a/ompi/mca/mtl/portals/Makefile.am b/ompi/mca/mtl/portals/Makefile.am new file mode 100644 index 0000000000..bd0eaf2baf --- /dev/null +++ b/ompi/mca/mtl/portals/Makefile.am @@ -0,0 +1,58 @@ +# +# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +AM_CPPFLAGS = $(mtl_portals_CPPFLAGS) + +if OMPI_BUILD_mtl_portals_DSO +component_noinst = +component_install = mca_mtl_portals.la +else +component_noinst = libmca_mtl_portals.la +component_install = +endif + +local_sources = \ + mtl_portals_component.c \ + mtl_portals.c \ + mtl_portals.h \ + mtl_portals_endpoint.h \ + mtl_portals_recv.c \ + mtl_portals_request.h \ + mtl_portals_send.c \ + mtl_portals_probe.c + +mcacomponentdir = $(libdir)/openmpi +mcacomponent_LTLIBRARIES = $(component_install) +mca_mtl_portals_la_SOURCES = $(local_sources) +mca_mtl_portals_la_LIBADD = \ + $(mtl_portals_LIBS) \ + $(top_ompi_builddir)/ompi/mca/common/portals/libmca_common_portals.la \ + $(top_ompi_builddir)/ompi/libmpi.la \ + $(top_ompi_builddir)/orte/liborte.la \ + $(top_ompi_builddir)/opal/libopal.la +mca_mtl_portals_la_LDFLAGS = -module -avoid-version $(mtl_portals_LDFLAGS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_mtl_portals_la_SOURCES = $(local_sources) +libmca_mtl_portals_la_LIBADD = $(mtl_portals_LIBS) +libmca_mtl_portals_la_LDFLAGS = -module -avoid-version $(mtl_portals_LDFLAGS) + diff --git a/ompi/mca/mtl/portals/configure.m4 b/ompi/mca/mtl/portals/configure.m4 new file mode 100644 index 0000000000..0671d30157 --- /dev/null +++ b/ompi/mca/mtl/portals/configure.m4 @@ -0,0 +1,58 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + + +# MCA_mtl_portals_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_mtl_portals_CONFIG],[ + OMPI_CHECK_PORTALS([mtl_portals], + [mtl_portals_happy="yes"], + [mtl_portals_happy="no"]) + + AS_IF([test "$mtl_portals_happy" = "yes"], + [mtl_portals_WRAPPER_EXTRA_LDFLAGS="$mtl_portals_LDFLAGS" + mtl_portals_WRAPPER_EXTRA_LIBS="$mtl_portals_LIBS" + $1], + [$2]) + + # substitute in the things needed to build portals + AC_SUBST([mtl_portals_CPPFLAGS]) + AC_SUBST([mtl_portals_LDFLAGS]) + AC_SUBST([mtl_portals_LIBS]) +])dnl diff --git a/ompi/mca/mtl/portals/configure.params b/ompi/mca/mtl/portals/configure.params new file mode 100644 index 0000000000..0369ba5a6f --- /dev/null +++ b/ompi/mca/mtl/portals/configure.params @@ -0,0 +1,21 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module +PARAM_CONFIG_FILES="Makefile" diff --git a/ompi/mca/mtl/portals/mtl_portals.c b/ompi/mca/mtl/portals/mtl_portals.c new file mode 100644 index 0000000000..d8e6da614d --- /dev/null +++ b/ompi/mca/mtl/portals/mtl_portals.c @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mca/mtl/mtl.h" +#include "ompi/communicator/communicator.h" +#include "opal/class/opal_list.h" + +#include "mtl_portals.h" +#include "mtl_portals_endpoint.h" +#include "mtl_portals_request.h" + + +mca_mtl_portals_module_t ompi_mtl_portals = { + { + 8191, /* max cid - 2^13 - 1 */ + (1UL << 30), /* max tag value - must allow negatives */ + 0, /* request reserve space */ + 0, /* flags */ + + ompi_mtl_portals_add_procs, + ompi_mtl_portals_del_procs, + ompi_mtl_portals_finalize, + + NULL, + ompi_mtl_portals_isend, + ompi_mtl_portals_irecv, + ompi_mtl_portals_iprobe, + + NULL /* cancel */ + } +}; + + +/* BWB - fix me - this should be an ompi_free_list_item_t */ +OBJ_CLASS_INSTANCE(ompi_mtl_portals_event_t, opal_list_item_t, + NULL, NULL); + + +static int ompi_mtl_portals_progress(void); + + +int +ompi_mtl_portals_add_procs(struct mca_mtl_base_module_t *mtl, + size_t nprocs, + struct ompi_proc_t** procs, + struct mca_mtl_base_endpoint_t **mtl_peer_data) +{ + int ret = OMPI_SUCCESS; + ptl_process_id_t *portals_procs = NULL; + size_t i; + + assert(mtl == &ompi_mtl_portals.base); + + /* if we havne't already initialized the network, do so now. We + delay until add_procs because if we want the automatic runtime + environment setup the common code does for the utcp + implementation, we can't do it until modex information can be + received. */ + if (PTL_INVALID_HANDLE == ompi_mtl_portals.ptl_ni_h) { + ptl_md_t md; + ptl_handle_md_t md_h; + ptl_process_id_t anyproc; + uint64_t match_bits = 0; + + ret = ompi_common_portals_ni_initialize(&(ompi_mtl_portals.ptl_ni_h)); + if (OMPI_SUCCESS != ret) goto cleanup; + + /* initialize the event queues */ + ret = PtlEQAlloc(ompi_mtl_portals.ptl_ni_h, + 1024, /* BWB - fix me */ + PTL_EQ_HANDLER_NONE, + &(ompi_mtl_portals.ptl_eq_h)); + assert(ret == PTL_OK); + + ret = PtlEQAlloc(ompi_mtl_portals.ptl_ni_h, + 1024, /* BWB - fix me */ + PTL_EQ_HANDLER_NONE, + &(ompi_mtl_portals.ptl_unexpected_recv_eq_h)); + assert(ret == PTL_OK); + + /* create unexpected message match entry */ + anyproc.nid = PTL_NID_ANY; + anyproc.pid = PTL_PID_ANY; + + /* unexpected message match entry should receive from anyone, + so ignore bits are all 1 */ + ret = PtlMEAttach(ompi_mtl_portals.ptl_ni_h, + OMPI_MTL_PORTALS_SEND_TABLE_ID, + anyproc, + match_bits, + ~match_bits, + PTL_RETAIN, + PTL_INS_AFTER, + &(ompi_mtl_portals.ptl_unexpected_me_h)); + assert(ret == PTL_OK); + + md.start = NULL; + md.length = 0; + md.threshold = PTL_MD_THRESH_INF; + md.max_size = 0; + md.options = (PTL_MD_OP_PUT | PTL_MD_TRUNCATE | PTL_MD_ACK_DISABLE | PTL_MD_EVENT_START_DISABLE); + md.eq_handle = ompi_mtl_portals.ptl_unexpected_recv_eq_h; + + ret = PtlMDAttach(ompi_mtl_portals.ptl_unexpected_me_h, + md, + PTL_RETAIN, + &md_h); + assert(ret == PTL_OK); + + opal_progress_register(ompi_mtl_portals_progress); + } + + /* get the list of ptl_process_id_t structures for the given proc + structures. If the Portals runtime environment supports + comm_spawn, we'll be able to support it as well. */ + portals_procs = malloc(sizeof(ptl_process_id_t) * nprocs); + if (NULL == portals_procs) goto cleanup; + ret = ompi_common_portals_get_procs(nprocs, procs, portals_procs); + if (OMPI_SUCCESS != ret) goto cleanup; + + /* copy the ptl_process_id_t information into our per-proc data + store */ + for (i = 0 ; i < nprocs ; ++i) { + mtl_peer_data[i] = malloc(sizeof(struct mca_mtl_base_endpoint_t)); + if (NULL == mtl_peer_data[i]) goto cleanup; + + mtl_peer_data[i]->ptl_proc.nid = portals_procs[i].nid; + mtl_peer_data[i]->ptl_proc.pid = portals_procs[i].pid; + } + + cleanup: + if (NULL != portals_procs) free(portals_procs); + return ret; +} + + +int +ompi_mtl_portals_del_procs(struct mca_mtl_base_module_t *mtl, + size_t nprocs, + struct ompi_proc_t** procs, + struct mca_mtl_base_endpoint_t **mtl_peer_data) +{ + size_t i; + + assert(mtl == &ompi_mtl_portals.base); + + for (i = 0 ; i < nprocs ; ++i) { + if (NULL != mtl_peer_data[i]) { + free(mtl_peer_data[i]); + } + } + + return OMPI_SUCCESS; +} + + +int +ompi_mtl_portals_finalize(struct mca_mtl_base_module_t *mtl) +{ + assert(mtl == &ompi_mtl_portals.base); + + opal_progress_unregister(ompi_mtl_portals_progress); + ompi_common_portals_ni_finalize(); + ompi_common_portals_finalize(); + + return OMPI_SUCCESS; +} + + +static int +ompi_mtl_portals_progress(void) +{ + int count = 0, ret; + ptl_event_t ev; + ompi_mtl_portals_request_t *ptl_request; + + while (true) { + ret = PtlEQGet(ompi_mtl_portals.ptl_eq_h, &ev); + if (PTL_OK == ret) { + if (ev.type == PTL_EVENT_UNLINK) continue; + + ptl_request = ev.md.user_ptr; + ret = ptl_request->event_callback(&ev, ptl_request); + if (OMPI_SUCCESS != ret) { + abort(); + } + } else if (PTL_EQ_EMPTY == ret) { + break; + } else { + abort(); + } + } + + return count; +} diff --git a/ompi/mca/mtl/portals/mtl_portals.h b/ompi/mca/mtl/portals/mtl_portals.h new file mode 100644 index 0000000000..de41f63ab7 --- /dev/null +++ b/ompi/mca/mtl/portals/mtl_portals.h @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MTL_PORTALS_H_HAS_BEEN_INCLUDED +#define MTL_PORTALS_H_HAS_BEEN_INCLUDED + +#include "opal/threads/threads.h" +#include "opal/threads/condition.h" +#include "opal/class/opal_list.h" +#include "ompi/class/ompi_free_list.h" +#include "opal/util/cmd_line.h" +#include "ompi/request/request.h" +#include "ompi/mca/mtl/mtl.h" +#include "ompi/mca/mtl/base/base.h" +#include "ompi/datatype/datatype.h" +#include "ompi/datatype/convertor.h" + +#include "ompi/mca/common/portals/common_portals.h" + + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +#define OMPI_MTL_PORTALS_DEBUG 0 + +struct mca_mtl_portals_module_t { + mca_mtl_base_module_t base; + + ptl_handle_ni_t ptl_ni_h; + size_t eager_limit; + + ptl_handle_eq_t ptl_eq_h; + ptl_handle_eq_t ptl_unexpected_recv_eq_h; + + ptl_handle_me_t ptl_unexpected_me_h; + + ompi_free_list_t event_fl; + + opal_list_t unexpected_messages; +}; +typedef struct mca_mtl_portals_module_t mca_mtl_portals_module_t; + +extern mca_mtl_portals_module_t ompi_mtl_portals; + + +struct ompi_mtl_portals_event_t { + struct ompi_free_list_item_t super; + ptl_event_t ev; +}; +typedef struct ompi_mtl_portals_event_t ompi_mtl_portals_event_t; +OBJ_CLASS_DECLARATION(ompi_mtl_portals_event_t); + + +/* match/ignore bit manipulation + * + * 0123 4567 01234567 01234567 01234567 01234567 01234567 01234567 01234567 + * | | | + * ^ | context id | source | message tag + * | | | | + * +---- protocol + */ + +#define PTL_PROTOCOL_MASK 0xF000000000000000ULL +#define PTL_CONTEXT_MASK 0x0FFF000000000000ULL +#define PTL_SOURCE_MASK 0x0000FFFF00000000ULL +#define PTL_TAG_MASK 0x00000000FFFFFFFFULL + +#define PTL_PROTOCOL_IGNR PTL_PROTOCOL_MASK +#define PTL_CONTEXT_IGNR PTL_CONTEXT_MASK +#define PTL_SOURCE_IGNR PTL_SOURCE_MASK +#define PTL_TAG_IGNR 0x00000000EFFFFFFFULL + +#define PTL_SHORT_MSG 0x1000000000000000ULL +#define PTL_LONG_MSG 0x2000000000000000ULL +#define PTL_READY_MSG 0x3000000000000000ULL + +/* send posting */ +#define PTL_SET_SEND_BITS(match_bits, contextid, source, tag, type) \ + { \ + match_bits = contextid; \ + match_bits = (match_bits << 16); \ + match_bits |= source; \ + match_bits = (match_bits << 32); \ + match_bits |= (PTL_TAG_MASK & tag) | type; \ +} + +/* receive posting */ +#define PTL_SET_RECV_BITS(match_bits, ignore_bits, contextid, source, tag) \ +{ \ + match_bits = 0; \ + ignore_bits = PTL_PROTOCOL_IGNR; \ + \ + match_bits = contextid; \ + match_bits = (match_bits << 16); \ + \ + if (MPI_ANY_SOURCE == source) { \ + match_bits = (match_bits << 32); \ + ignore_bits |= PTL_SOURCE_IGNR; \ + } else { \ + match_bits |= source; \ + match_bits = (match_bits << 32); \ + } \ + \ + if (MPI_ANY_TAG == tag) { \ + ignore_bits |= PTL_TAG_IGNR; \ + } else { \ + match_bits |= (PTL_TAG_MASK & tag); \ + } \ +} + +#define PTL_IS_SHORT_MSG(match_bits, ret) \ +{ \ + ret = (0 != (PTL_SHORT_MSG & match_bits)); \ +} + +#define PTL_GET_TAG(match_bits) ((int)(match_bits & PTL_TAG_MASK)) +#define PTL_GET_SOURCE(match_bits) ((int)((match_bits & PTL_SOURCE_MASK) >> 32)) + +/* MTL interface functions */ +extern int ompi_mtl_portals_finalize(struct mca_mtl_base_module_t *mtl); + +extern int ompi_mtl_portals_add_procs(struct mca_mtl_base_module_t* mtl, + size_t nprocs, + struct ompi_proc_t** procs, + struct mca_mtl_base_endpoint_t **mtl_peer_data); + +extern int ompi_mtl_portals_del_procs(struct mca_mtl_base_module_t* mtl, + size_t nprocs, + struct ompi_proc_t** procs, + struct mca_mtl_base_endpoint_t **mtl_peer_data); + +extern int ompi_mtl_portals_send(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t* comm, + int dest, + int tag, + struct ompi_convertor_t *convertor, + mca_pml_base_send_mode_t mode); + +extern int ompi_mtl_portals_isend(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t* comm, + int dest, + int tag, + struct ompi_convertor_t *convertor, + mca_pml_base_send_mode_t mode, + bool blocking, + mca_mtl_request_t *mtl_request); + +extern int ompi_mtl_portals_irecv(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t *comm, + int src, + int tag, + struct ompi_convertor_t *convertor, + mca_mtl_request_t *mtl_request); + +extern int ompi_mtl_portals_iprobe(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t *comm, + int src, + int tag, + int *flag, + struct ompi_status_public_t *status); + +extern int ompi_mtl_portals_cancel(struct mca_mtl_base_module_t* mtl, + mca_mtl_request_t *mtl_request, + int flag); + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + +#endif /* MTL_PORTALS_H_HAS_BEEN_INCLUDED */ diff --git a/ompi/mca/mtl/portals/mtl_portals_component.c b/ompi/mca/mtl/portals/mtl_portals_component.c new file mode 100644 index 0000000000..df7bf823e5 --- /dev/null +++ b/ompi/mca/mtl/portals/mtl_portals_component.c @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/event/event.h" +#include "opal/mca/base/mca_base_param.h" +#include "ompi/datatype/convertor.h" +#include "ompi/mca/common/portals/common_portals.h" + +#include "mtl_portals.h" +#include "mtl_portals_request.h" + + +static int ompi_mtl_portals_component_open(void); +static int ompi_mtl_portals_component_close(void); +static mca_mtl_base_module_t* ompi_mtl_portals_component_init( + bool enable_progress_threads, bool enable_mpi_threads); + +mca_mtl_base_component_1_0_0_t mca_mtl_portals_component = { + + /* First, the mca_base_component_t struct containing meta + * information about the component itself */ + + { + /* Indicate that we are a mtl v1.0.0 component (which also implies + * a specific MCA version) */ + + MCA_MTL_BASE_VERSION_1_0_0, + + "portals", /* MCA component name */ + OMPI_MAJOR_VERSION, /* MCA component major version */ + OMPI_MINOR_VERSION, /* MCA component minor version */ + OMPI_RELEASE_VERSION, /* MCA component release version */ + ompi_mtl_portals_component_open, /* component open */ + ompi_mtl_portals_component_close /* component close */ + }, + + /* Next the MCA v1.0.0 component meta data */ + + { + /* Whether the component is checkpointable or not */ + false + }, + + ompi_mtl_portals_component_init, /* component init */ +}; + +static int +ompi_mtl_portals_component_open(void) +{ + int tmp; + + ompi_common_portals_register_mca(); + + ompi_mtl_portals.base.mtl_request_size = + sizeof(ompi_mtl_portals_request_t) - + sizeof(struct mca_mtl_request_t); + + mca_base_param_reg_int(&mca_mtl_portals_component.mtl_version, + "eager_limit", + "Cross-over point from eager to rendezvous sends", + false, + false, + 0, + &tmp); + + ompi_mtl_portals.eager_limit = tmp; + + ompi_mtl_portals.ptl_ni_h = PTL_INVALID_HANDLE; + + return OMPI_SUCCESS; +} + + +static int +ompi_mtl_portals_component_close(void) +{ + return OMPI_SUCCESS; +} + + +static mca_mtl_base_module_t* +ompi_mtl_portals_component_init(bool enable_progress_threads, + bool enable_mpi_threads) +{ + /* we don't run with no stinkin' threads */ + if (enable_progress_threads || enable_mpi_threads) return NULL; + + /* initialize our interface */ + if (OMPI_SUCCESS != ompi_common_portals_initialize()) { + return NULL; + } + + OBJ_CONSTRUCT(&ompi_mtl_portals.event_fl, ompi_free_list_t); + ompi_free_list_init(&ompi_mtl_portals.event_fl, + sizeof(ompi_mtl_portals_event_t), + OBJ_CLASS(ompi_mtl_portals_event_t), + 1, -1, 1, NULL); + OBJ_CONSTRUCT(&ompi_mtl_portals.unexpected_messages, opal_list_t); + + return &ompi_mtl_portals.base; +} diff --git a/ompi/mca/mtl/portals/mtl_portals_endpoint.h b/ompi/mca/mtl/portals/mtl_portals_endpoint.h new file mode 100644 index 0000000000..f6debfbd89 --- /dev/null +++ b/ompi/mca/mtl/portals/mtl_portals_endpoint.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_MTL_PORTALS_ENDPOINT_H +#define OMPI_MTL_PORTALS_ENDPOINT_H + +struct mca_mtl_base_endpoint_t { + ptl_process_id_t ptl_proc; +}; +typedef struct mca_mtl_base_endpoint_t mca_mtl_base_endpoint_t; + +#endif diff --git a/ompi/mca/mtl/portals/mtl_portals_probe.c b/ompi/mca/mtl/portals/mtl_portals_probe.c new file mode 100644 index 0000000000..d5b20ea877 --- /dev/null +++ b/ompi/mca/mtl/portals/mtl_portals_probe.c @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "ompi/request/request.h" +#include "mtl_portals.h" + + +int +ompi_mtl_portals_iprobe(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t *comm, + int src, + int tag, + int *flag, + struct ompi_status_public_t *status) +{ + return OMPI_ERR_NOT_IMPLEMENTED; +} diff --git a/ompi/mca/mtl/portals/mtl_portals_recv.c b/ompi/mca/mtl/portals/mtl_portals_recv.c new file mode 100644 index 0000000000..5034078316 --- /dev/null +++ b/ompi/mca/mtl/portals/mtl_portals_recv.c @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" + +#include "opal/class/opal_list.h" +#include "ompi/request/request.h" +#include "ompi/datatype/datatype.h" +#include "ompi/communicator/communicator.h" +#include "ompi/datatype/convertor.h" +#include "ompi/mca/mtl/base/base.h" +#include "ompi/mca/pml/base/pml_base_recvreq.h" + +#include "mtl_portals.h" +#include "mtl_portals_endpoint.h" +#include "mtl_portals_request.h" + +/* called when a receive should be progressed */ +static int +ompi_mtl_portals_recv_progress(ptl_event_t *ev, + struct ompi_mtl_portals_request_t* ptl_request) +{ + switch (ev->type) { + case PTL_EVENT_PUT_END: + case PTL_EVENT_REPLY_END: + { + mca_pml_base_recv_request_t *recvreq = + (mca_pml_base_recv_request_t*) ptl_request->super.ompi_req; + + /* make sure the data is in the right place */ + ompi_mtl_datatype_unpack(&recvreq->req_convertor, + ev->md.start, ev->md.length); + + /* set the status */ + recvreq->req_base.req_ompi.req_status.MPI_SOURCE = + PTL_GET_SOURCE(ev->match_bits); + recvreq->req_base.req_ompi.req_status.MPI_TAG = + PTL_GET_TAG(ev->match_bits); + /* BWB - fix me - this is right for put but not for + unexpected, I think */ + recvreq->req_base.req_ompi.req_status.MPI_ERROR = + (ev->rlength > ev->mlength) ? + MPI_ERR_TRUNCATE : MPI_SUCCESS; + recvreq->req_base.req_ompi.req_status._count = + ev->mlength; + + ptl_request->super.completion_callback(&ptl_request->super); + } + break; + + default: + break; + } + + return OMPI_SUCCESS; +} + + +static int +ompi_mtl_portals_get_data(ompi_mtl_portals_event_t *recv_event, + struct ompi_convertor_t *convertor, + ompi_mtl_portals_request_t *ptl_request) +{ + int ret; + ptl_md_t md; + ptl_handle_md_t md_h; + + PTL_IS_SHORT_MSG(recv_event->ev.match_bits, ret); + if (ret) { + /* the buffer is sitting in the short message queue */ + abort(); + } else { + size_t buflen; + ret = ompi_mtl_datatype_recv_buf(convertor, &md.start, &buflen, + &ptl_request->free_after); + if (OMPI_SUCCESS != ret) abort(); + md.length = buflen; + md.threshold = 2; /* send and get */ + md.options = PTL_MD_EVENT_START_DISABLE; + md.user_ptr = ptl_request; + md.eq_handle = ompi_mtl_portals.ptl_eq_h; + + ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h, md, + PTL_UNLINK, &md_h); + if (PTL_OK != ret) abort(); + + ret = PtlGet(md_h, + recv_event->ev.initiator, + OMPI_MTL_PORTALS_READ_TABLE_ID, + 0, + recv_event->ev.hdr_data, + 0); + if (PTL_OK != ret) abort(); + + ptl_request->event_callback = ompi_mtl_portals_recv_progress; + } + + return OMPI_SUCCESS; +} + +int +ompi_mtl_portals_irecv(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t *comm, + int src, + int tag, + struct ompi_convertor_t *convertor, + mca_mtl_request_t *mtl_request) +{ + ptl_match_bits_t match_bits, ignore_bits; + ptl_md_t md; + ptl_handle_md_t md_h; + ptl_handle_me_t me_h; + int ret; + ptl_process_id_t remote_proc; + mca_mtl_base_endpoint_t *endpoint = NULL; + opal_list_item_t *list_item; + ompi_mtl_portals_request_t *ptl_request = + (ompi_mtl_portals_request_t*) mtl_request; + size_t buflen; + + if (MPI_ANY_SOURCE == src) { + remote_proc.nid = PTL_NID_ANY; + remote_proc.pid = PTL_PID_ANY; + } else { + endpoint = (mca_mtl_base_endpoint_t*) comm->c_pml_procs[src]->proc_ompi->proc_pml; + remote_proc = endpoint->ptl_proc; + } + + PTL_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid, + src, tag); +#if OMPI_MTL_PORTALS_DEBUG + printf("recv bits: 0x%016llx 0x%016llx\n", match_bits, ignore_bits); +#endif + + /* first, check the queue of processed unexpected messages */ + list_item = opal_list_get_first(&ompi_mtl_portals.unexpected_messages); + while (list_item != opal_list_get_end(&ompi_mtl_portals.unexpected_messages)) { + opal_list_item_t *next_item = opal_list_get_next(list_item); + ompi_mtl_portals_event_t *recv_event = + (ompi_mtl_portals_event_t*) list_item; + + if ((recv_event->ev.match_bits & ~ignore_bits) == + (match_bits & ~ignore_bits)) { + /* we have a match... */ + opal_list_remove_item(&(ompi_mtl_portals.unexpected_messages), + list_item); + ompi_mtl_portals_get_data(recv_event, convertor, ptl_request); + OMPI_FREE_LIST_RETURN(&ompi_mtl_portals.event_fl, list_item); + goto cleanup; + } + list_item = next_item; + } + + /* now check the unexpected queue */ + restart_search: + while (true) { + ompi_mtl_portals_event_t *recv_event; + OMPI_FREE_LIST_GET(&ompi_mtl_portals.event_fl, recv_event, ret); + ret = PtlEQGet(ompi_mtl_portals.ptl_unexpected_recv_eq_h, + &recv_event->ev); + if (PTL_OK == ret) { + if ((recv_event->ev.match_bits & ~ignore_bits) == + (match_bits & ~ignore_bits)) { + /* we have a match... */ + ompi_mtl_portals_get_data(recv_event, convertor, ptl_request); + goto cleanup; + } else { + /* not ours - put in unexpected queue */ + opal_list_append(&(ompi_mtl_portals.unexpected_messages), + (opal_list_item_t*) recv_event); + } + } else if (PTL_EQ_EMPTY == ret) { + break; + } else { + abort(); + } + } + + /* now post the receive */ + ret = ompi_mtl_datatype_recv_buf(convertor, &md.start, &buflen, + &ptl_request->free_after); + md.length = buflen; + + PtlMEInsert(ompi_mtl_portals.ptl_unexpected_me_h, + remote_proc, + match_bits, + ignore_bits, + PTL_UNLINK, + PTL_INS_BEFORE, + &me_h); + + md.threshold = 0; + md.options = PTL_MD_OP_PUT | PTL_MD_TRUNCATE | PTL_MD_EVENT_START_DISABLE; + md.user_ptr = ptl_request; + md.eq_handle = ompi_mtl_portals.ptl_eq_h; + PtlMDAttach(me_h, md, PTL_UNLINK, &md_h); + + /* now try to make active */ + md.threshold = 1; + + ret = PtlMDUpdate(md_h, NULL, &md, + ompi_mtl_portals.ptl_unexpected_recv_eq_h); + if (ret == PTL_MD_NO_UPDATE) { + /* a message has arrived since we searched - look again */ + PtlMDUnlink(md_h); + if (ptl_request->free_after) { free(md.start); } + goto restart_search; + } + + ptl_request->event_callback = ompi_mtl_portals_recv_progress; + + cleanup: + + return OMPI_SUCCESS; +} + diff --git a/ompi/mca/mtl/portals/mtl_portals_request.h b/ompi/mca/mtl/portals/mtl_portals_request.h new file mode 100644 index 0000000000..a169e990bf --- /dev/null +++ b/ompi/mca/mtl/portals/mtl_portals_request.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OMPI_MTL_PORTALS_REQUEST_H +#define OMPI_MTL_PORTALS_REQUEST_H + +struct ompi_mtl_portals_request_t { + struct mca_mtl_request_t super; + ptl_handle_eq_t eq_h; + bool free_after; + + int (*event_callback)(ptl_event_t *ev, struct ompi_mtl_portals_request_t*); +}; +typedef struct ompi_mtl_portals_request_t ompi_mtl_portals_request_t; + + +#endif diff --git a/ompi/mca/mtl/portals/mtl_portals_send.c b/ompi/mca/mtl/portals/mtl_portals_send.c new file mode 100644 index 0000000000..af974bf5d0 --- /dev/null +++ b/ompi/mca/mtl/portals/mtl_portals_send.c @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/datatype/datatype.h" +#include "ompi/communicator/communicator.h" +#include "ompi/datatype/convertor.h" +#include "ompi/mca/mtl/base/base.h" +#include "ompi/mca/pml/base/pml_base_sendreq.h" + +#include "mtl_portals.h" +#include "mtl_portals_request.h" +#include "mtl_portals_endpoint.h" + +/* called when a send should be progressed */ +static int +ompi_mtl_portals_send_progress(ptl_event_t *ev, + struct ompi_mtl_portals_request_t* ptl_request) +{ + switch (ev->type) { + case PTL_EVENT_ACK: + /* message received - if they receivd the entire message, + we're done. If not, wait for the get */ + if (ev->md.length == ev->mlength) { + if (ptl_request->free_after) { + free(ev->md.start); + } + PtlMDUnlink(ev->md_handle); + ptl_request->super.completion_callback(&ptl_request->super); + } + break; + + case PTL_EVENT_GET_END: + { + /* the get finished, so we're done. */ + if (ptl_request->free_after) { + free(ev->md.start); + } + PtlMDUnlink(ev->md_handle); + ptl_request->super.completion_callback(&ptl_request->super); + } + break; + + default: + break; + } + + return OMPI_SUCCESS; +} + + +int +ompi_mtl_portals_send(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t* comm, + int dest, + int tag, + struct ompi_convertor_t *convertor, + mca_pml_base_send_mode_t mode) +{ + return OMPI_ERR_NOT_IMPLEMENTED; +} + + +int +ompi_mtl_portals_isend(struct mca_mtl_base_module_t* mtl, + struct ompi_communicator_t* comm, + int dest, + int tag, + struct ompi_convertor_t *convertor, + mca_pml_base_send_mode_t mode, + bool blocking, + mca_mtl_request_t *mtl_request) +{ + int ret; + ptl_match_bits_t match_bits; + ptl_md_t md; + ptl_handle_md_t md_h; + ptl_handle_me_t me_h; + mca_mtl_base_endpoint_t *endpoint = + (mca_mtl_base_endpoint_t*) comm->c_pml_procs[dest]->proc_ompi->proc_pml; + ompi_mtl_portals_request_t *ptl_request = + (ompi_mtl_portals_request_t*) mtl_request; + size_t buflen; + ompi_ptr_t ptr; + + assert(mtl == &ompi_mtl_portals.base); + + ret = ompi_mtl_datatype_pack(convertor, &md.start, &buflen, + &(ptl_request->free_after)); + if (OMPI_SUCCESS != ret) return ret; + md.length = buflen; + + ptl_request->event_callback = ompi_mtl_portals_send_progress; + + if (MCA_PML_BASE_SEND_READY == mode) { + /* ready send - same protocol regardless of length */ + PTL_SET_SEND_BITS(match_bits, comm->c_contextid, + comm->c_my_rank, + tag, PTL_READY_MSG); +#if OMPI_MTL_PORTALS_DEBUG + printf("ready send bits: 0x%016llx\n", match_bits); +#endif + + md.threshold = 1; + md.options = PTL_MD_EVENT_START_DISABLE; + md.user_ptr = ptl_request; + md.eq_handle = ompi_mtl_portals.ptl_eq_h; + + ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h, + md, + PTL_RETAIN, + &(md_h)); + if (OMPI_SUCCESS != ret) { + if (ptl_request->free_after) free(md.start); + return ompi_common_portals_error_ptl_to_ompi(ret); + } + + ret = PtlPut(md_h, + PTL_NO_ACK_REQ, + endpoint->ptl_proc, + OMPI_MTL_PORTALS_SEND_TABLE_ID, + 0, + match_bits, + 0, + 0); + if (OMPI_SUCCESS != ret) { + PtlMDUnlink(md_h); + if (ptl_request->free_after) free(md.start); + return ompi_common_portals_error_ptl_to_ompi(ret); + } + + } else if (md.length > ompi_mtl_portals.eager_limit) { + /* it's a long message - same protocol for all send modes */ + PTL_SET_SEND_BITS(match_bits, comm->c_contextid, + comm->c_my_rank, + tag, PTL_LONG_MSG); +#if OMPI_MTL_PORTALS_DEBUG + printf("long send bits: 0x%016llx\n", match_bits); +#endif + + md.threshold = 3; /* send, ack, get */ + md.options = PTL_MD_OP_GET | PTL_MD_EVENT_START_DISABLE; + md.user_ptr = ptl_request; + md.eq_handle = ompi_mtl_portals.ptl_eq_h; + + /* gets are all by unique key, so we don't really care + where in the list it goes */ + ptr.pval = ptl_request; + ret = PtlMEAttach(ompi_mtl_portals.ptl_ni_h, + OMPI_MTL_PORTALS_READ_TABLE_ID, + endpoint->ptl_proc, + (ptl_match_bits_t) ptr.lval, + 0, + PTL_UNLINK, + PTL_INS_AFTER, + &me_h); + if (OMPI_SUCCESS != ret) { + if (ptl_request->free_after) free(md.start); + return ompi_common_portals_error_ptl_to_ompi(ret); + } + + ret = PtlMDAttach(me_h, + md, + PTL_RETAIN, + &(md_h)); + + if (OMPI_SUCCESS != ret) { + PtlMEUnlink(me_h); + if (ptl_request->free_after) free(md.start); + return ompi_common_portals_error_ptl_to_ompi(ret); + } + + ret = PtlPut(md_h, + PTL_ACK_REQ, + endpoint->ptl_proc, + OMPI_MTL_PORTALS_SEND_TABLE_ID, + 0, + match_bits, + 0, + (ptl_hdr_data_t) ptr.lval); + if (OMPI_SUCCESS != ret) { + PtlMDUnlink(md_h); + if (ptl_request->free_after) free(md.start); + return ompi_common_portals_error_ptl_to_ompi(ret); + } + } else if (MCA_PML_BASE_SEND_SYNCHRONOUS) { + /* short synchronous message */ + PTL_SET_SEND_BITS(match_bits, comm->c_contextid, + comm->c_my_rank, + tag, PTL_SHORT_MSG); +#if OMPI_MTL_PORTALS_DEBUG + printf("short ssend bits: 0x%016llx\n", match_bits); +#endif + + /* BWB - fix me */ + return OMPI_ERR_NOT_IMPLEMENTED; + + } else { + /* short message for something not ack-worthy */ + PTL_SET_SEND_BITS(match_bits, comm->c_contextid, + comm->c_my_rank, + tag, PTL_SHORT_MSG); +#if OMPI_MTL_PORTALS_DEBUG + printf("short send bits: 0x%016llx\n", match_bits); +#endif + return OMPI_ERR_NOT_IMPLEMENTED; + + + md.threshold = 1; + md.options = PTL_MD_EVENT_START_DISABLE; + md.user_ptr = ptl_request; + md.eq_handle = ompi_mtl_portals.ptl_eq_h; + + ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h, + md, + PTL_RETAIN, + &(md_h)); + if (OMPI_SUCCESS != ret) { + if (ptl_request->free_after) free(md.start); + return ompi_common_portals_error_ptl_to_ompi(ret); + } + + ret = PtlPut(md_h, + PTL_NO_ACK_REQ, + endpoint->ptl_proc, + OMPI_MTL_PORTALS_SEND_TABLE_ID, + 0, + match_bits, + 0, + 0); + if (OMPI_SUCCESS != ret) { + PtlMDUnlink(md_h); + if (ptl_request->free_after) free(md.start); + return ompi_common_portals_error_ptl_to_ompi(ret); + } + } + + return OMPI_SUCCESS; +} + diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_component.c b/ompi/mca/osc/pt2pt/osc_pt2pt_component.c index 790aeec5b7..7c67fd3d9c 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_component.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_component.c @@ -204,7 +204,9 @@ ompi_osc_pt2pt_component_finalize(void) num_modules); } +#if 0 mca_bml.bml_register(MCA_BTL_TAG_OSC_PT2PT, NULL, NULL); +#endif OBJ_DESTRUCT(&mca_osc_pt2pt_component.p2p_c_longreqs); OBJ_DESTRUCT(&mca_osc_pt2pt_component.p2p_c_replyreqs); diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c index 2da4f3d9ea..afefb878ca 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c @@ -173,7 +173,7 @@ ompi_osc_pt2pt_sendreq_send(ompi_osc_pt2pt_module_t *module, } /* Get a BTL so we have the eager limit */ - endpoint = (mca_bml_base_endpoint_t*) sendreq->req_target_proc->proc_pml; + endpoint = (mca_bml_base_endpoint_t*) sendreq->req_target_proc->proc_bml; bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); descriptor = bml_btl->btl_alloc(bml_btl->btl, needed_len < bml_btl->btl_eager_limit ? needed_len : @@ -383,7 +383,7 @@ ompi_osc_pt2pt_replyreq_send(ompi_osc_pt2pt_module_t *module, size_t written_data = 0; /* Get a BTL and a fragment to go with it */ - endpoint = (mca_bml_base_endpoint_t*) replyreq->rep_origin_proc->proc_pml; + endpoint = (mca_bml_base_endpoint_t*) replyreq->rep_origin_proc->proc_bml; bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); descriptor = bml_btl->btl_alloc(bml_btl->btl, bml_btl->btl_eager_limit); @@ -781,7 +781,7 @@ ompi_osc_pt2pt_control_send(ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_control_header_t *header = NULL; /* Get a BTL and a fragment to go with it */ - endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml; + endpoint = (mca_bml_base_endpoint_t*) proc->proc_bml; bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); descriptor = bml_btl->btl_alloc(bml_btl->btl, sizeof(ompi_osc_pt2pt_control_header_t)); diff --git a/ompi/mca/pml/base/pml_base_bsend.c b/ompi/mca/pml/base/pml_base_bsend.c index c1f4e5feef..a56b8985a1 100644 --- a/ompi/mca/pml/base/pml_base_bsend.c +++ b/ompi/mca/pml/base/pml_base_bsend.c @@ -223,8 +223,8 @@ int mca_pml_base_bsend_request_start(ompi_request_t* request) /* setup request to reflect the contigous buffer */ sendreq->req_count = sendreq->req_bytes_packed; - sendreq->req_datatype = MPI_BYTE; +#if 0 /* In case we reuse an old request recreate the correct convertor, the one * using the user buffers. Otherwise at the end of this function we replace * it with a convertor using the allocator buffer !!! @@ -234,6 +234,11 @@ int mca_pml_base_bsend_request_start(ompi_request_t* request) /* increment count of pending requests */ mca_pml_bsend_count++; +#endif + + sendreq->req_datatype = MPI_BYTE; + + OPAL_THREAD_UNLOCK(&mca_pml_bsend_mutex); /* The convertor is already initialized in the begining so we just have to @@ -243,8 +248,10 @@ int mca_pml_base_bsend_request_start(ompi_request_t* request) iov.iov_len = sendreq->req_count; iov_count = 1; max_data = iov.iov_len; - if((rc = ompi_convertor_pack( &sendreq->req_convertor, &iov, &iov_count, - &max_data, &freeAfter )) <= 0) { + if((rc = ompi_convertor_pack( &sendreq->req_convertor, + &iov, + &iov_count, + &max_data, &freeAfter )) < 0) { return OMPI_ERROR; } @@ -265,6 +272,8 @@ int mca_pml_base_bsend_request_alloc(ompi_request_t* request) { mca_pml_base_send_request_t* sendreq = (mca_pml_base_send_request_t*)request; + if (sendreq->req_count == 0) return OMPI_SUCCESS; + /* has a buffer been provided */ OPAL_THREAD_LOCK(&mca_pml_bsend_mutex); if(NULL == mca_pml_bsend_addr) { diff --git a/ompi/mca/pml/cm/Makefile.am b/ompi/mca/pml/cm/Makefile.am new file mode 100644 index 0000000000..3795293aab --- /dev/null +++ b/ompi/mca/pml/cm/Makefile.am @@ -0,0 +1,54 @@ +# +# Copyright (c) 2004-2006 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_pml_cm_DSO +component_noinst = +component_install = mca_pml_cm.la +else +component_noinst = libmca_pml_cm.la +component_install = +endif + +local_sources = \ + pml_cm.c \ + pml_cm.h \ + pml_cm_cancel.c \ + pml_cm_component.c \ + pml_cm_probe.c \ + pml_cm_proc.h \ + pml_cm_proc.c \ + pml_cm_recv.c \ + pml_cm_recvreq.h \ + pml_cm_recvreq.c \ + pml_cm_send.c \ + pml_cm_sendreq.h \ + pml_cm_sendreq.c \ + pml_cm_progress.c \ + pml_cm_start.c + +mcacomponentdir = $(libdir)/openmpi +mcacomponent_LTLIBRARIES = $(component_install) +mca_pml_cm_la_SOURCES = $(local_sources) +mca_pml_cm_la_LIBADD = \ + $(pml_cm_LIBS) \ + $(top_ompi_builddir)/ompi/libmpi.la \ + $(top_ompi_builddir)/orte/liborte.la \ + $(top_ompi_builddir)/opal/libopal.la +mca_pml_cm_la_LDFLAGS = -module -avoid-version $(pml_cm_LDFLAGS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_pml_cm_la_SOURCES = $(local_sources) +libmca_pml_cm_la_LIBADD = $(pml_cm_LIBS) +libmca_pml_cm_la_LDFLAGS = -module -avoid-version $(pml_cm_LDFLAGS) + diff --git a/ompi/mca/pml/cm/configure.params b/ompi/mca/pml/cm/configure.params new file mode 100644 index 0000000000..cbd49d24d3 --- /dev/null +++ b/ompi/mca/pml/cm/configure.params @@ -0,0 +1,14 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2006 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module + +PARAM_CONFIG_FILES="Makefile" diff --git a/ompi/mca/pml/cm/pml_cm.c b/ompi/mca/pml/cm/pml_cm.c new file mode 100644 index 0000000000..e5696613fb --- /dev/null +++ b/ompi/mca/pml/cm/pml_cm.c @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/communicator/communicator.h" +#include "opal/class/opal_list.h" +#include "ompi/mca/pml/base/pml_base_request.h" +#include "ompi/mca/pml/base/pml_base_bsend.h" + +#include "pml_cm.h" +#include "pml_cm_proc.h" +#include "pml_cm_sendreq.h" +#include "pml_cm_recvreq.h" + +ompi_pml_cm_t ompi_pml_cm = { + { + mca_pml_cm_add_procs, + mca_pml_cm_del_procs, + mca_pml_cm_enable, + mca_pml_cm_progress, + mca_pml_cm_add_comm, + mca_pml_cm_del_comm, + mca_pml_cm_irecv_init, + mca_pml_cm_irecv, + mca_pml_cm_recv, + mca_pml_cm_isend_init, + mca_pml_cm_isend, + mca_pml_cm_send, + mca_pml_cm_iprobe, + mca_pml_cm_probe, + mca_pml_cm_start, + mca_pml_cm_dump, + 0, + 0 + } +}; + + +int +mca_pml_cm_enable(bool enable) +{ + /* BWB - FIX ME - need to have this actually do something, + maybe? */ + return OMPI_SUCCESS; +} + + +int +mca_pml_cm_add_comm(ompi_communicator_t* comm) +{ + mca_pml_cm_proc_t *pml_proc; + int i; + + /* setup our per-communicator data */ + comm->c_pml_comm = NULL; + + /* setup our proc cache on the communicator. This should be + something that can be safely cast to a mca_pml_proc_t* */ + comm->c_pml_procs = (mca_pml_proc_t**) malloc( + comm->c_remote_group->grp_proc_count * sizeof(mca_pml_proc_t*)); + if(NULL == comm->c_pml_procs) { + return OMPI_ERR_OUT_OF_RESOURCE; + } + + for(i = 0 ; i < comm->c_remote_group->grp_proc_count ; i++){ + pml_proc = OBJ_NEW(mca_pml_cm_proc_t); + pml_proc->base.proc_ompi = comm->c_remote_group->grp_proc_pointers[i]; + comm->c_pml_procs[i] = (mca_pml_proc_t*) pml_proc; + } + + return OMPI_SUCCESS; +} + + +int +mca_pml_cm_del_comm(ompi_communicator_t* comm) +{ + int i; + + /* clean up our per-communicator data */ + comm->c_pml_comm = NULL; + + /* clean up our proc cache on the communicator */ + if (comm->c_pml_procs != NULL) { + for(i = 0 ; i < comm->c_remote_group->grp_proc_count ; i++){ + mca_pml_cm_proc_t *pml_proc = + (mca_pml_cm_proc_t*) comm->c_pml_procs[i]; + OBJ_RELEASE(pml_proc); + } + free(comm->c_pml_procs); + comm->c_pml_procs = NULL; + } + + return OMPI_SUCCESS; +} + + +int +mca_pml_cm_add_procs(struct ompi_proc_t** procs, size_t nprocs) +{ + int ret; + size_t i; + struct mca_mtl_base_endpoint_t **endpoints; + + endpoints = malloc(nprocs * sizeof(struct mca_mtl_base_endpoint_t*)); + if (NULL == endpoints) return OMPI_ERROR; + +#if OMPI_ENABLE_DEBUG + for (i = 0 ; i < nprocs ; ++i) { + endpoints[i] = NULL; + } +#endif + + ret = OMPI_MTL_CALL(add_procs(ompi_mtl, nprocs, procs, endpoints)); + if (OMPI_SUCCESS != ret) { + free(endpoints); + return ret; + } + + for (i = 0 ; i < nprocs ; ++i) { + procs[i]->proc_pml = (struct mca_pml_base_endpoint_t*) endpoints[i]; + } + + free(endpoints); + return OMPI_SUCCESS; +} + + +int +mca_pml_cm_del_procs(struct ompi_proc_t** procs, size_t nprocs) +{ + int ret; + size_t i; + struct mca_mtl_base_endpoint_t **endpoints; + + endpoints = malloc(nprocs * sizeof(struct mca_mtl_base_endpoint_t*)); + if (NULL == endpoints) return OMPI_ERROR; + + for (i = 0 ; i < nprocs ; ++i) { + endpoints[i] = (struct mca_mtl_base_endpoint_t*) procs[i]->proc_pml; + } + + ret = OMPI_MTL_CALL(del_procs(ompi_mtl, nprocs, procs, endpoints)); + if (OMPI_SUCCESS != ret) { + free(endpoints); + return ret; + } + + free(endpoints); + return OMPI_SUCCESS; +} + + +/* print any available useful information from this communicator */ +int +mca_pml_cm_dump(struct ompi_communicator_t* comm, int verbose) +{ + return OMPI_ERR_NOT_IMPLEMENTED; +} + + +void +mca_pml_cm_request_completion(struct mca_mtl_request_t *mtl_request) +{ + mca_pml_base_request_t *base_request = + (mca_pml_base_request_t*) mtl_request->ompi_req; + + switch (base_request->req_type) { + case MCA_PML_REQUEST_SEND: + { + mca_pml_cm_send_request_t* sendreq = + (mca_pml_cm_send_request_t*) base_request; + MCA_PML_CM_SEND_REQUEST_PML_COMPLETE(sendreq); + } + break; + case MCA_PML_REQUEST_RECV: + { + mca_pml_cm_recv_request_t* recvreq = + (mca_pml_cm_recv_request_t*) base_request; + MCA_PML_CM_RECV_REQUEST_PML_COMPLETE(recvreq); + } + break; + default: + break; + } +} diff --git a/ompi/mca/pml/cm/pml_cm.h b/ompi/mca/pml/cm/pml_cm.h new file mode 100644 index 0000000000..bb7911856f --- /dev/null +++ b/ompi/mca/pml/cm/pml_cm.h @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PML_CM_H +#define PML_CM_H + +#include "ompi/class/ompi_free_list.h" +#include "opal/util/cmd_line.h" +#include "ompi/request/request.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/mca/pml/base/base.h" +#include "ompi/datatype/datatype.h" +#include "ompi/datatype/convertor.h" +#include "opal/class/opal_free_list.h" +#include "ompi/mca/mtl/mtl.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +struct ompi_pml_cm_t { + mca_pml_base_module_t super; + /** free list of send request structures */ + ompi_free_list_t cm_send_requests; + /** free list of recv request structures */ + ompi_free_list_t cm_recv_requests; +}; +typedef struct ompi_pml_cm_t ompi_pml_cm_t; +extern ompi_pml_cm_t ompi_pml_cm; + +extern mca_pml_base_component_1_0_0_t mca_pml_cm_component; + + +/* PML interface functions */ +extern int mca_pml_cm_add_procs(struct ompi_proc_t **procs, size_t nprocs); +extern int mca_pml_cm_del_procs(struct ompi_proc_t **procs, size_t nprocs); + +extern int mca_pml_cm_enable(bool enable); +extern int mca_pml_cm_progress(void); + +extern int mca_pml_cm_add_comm(struct ompi_communicator_t* comm); +extern int mca_pml_cm_del_comm(struct ompi_communicator_t* comm); + +extern int mca_pml_cm_irecv_init(void *buf, + size_t count, + ompi_datatype_t *datatype, + int src, + int tag, + struct ompi_communicator_t* comm, + struct ompi_request_t **request); + +extern int mca_pml_cm_irecv(void *buf, + size_t count, + ompi_datatype_t *datatype, + int src, + int tag, + struct ompi_communicator_t* comm, + struct ompi_request_t **request); + +extern int mca_pml_cm_recv(void *buf, + size_t count, + ompi_datatype_t *datatype, + int src, + int tag, + struct ompi_communicator_t* comm, + ompi_status_public_t* status ); + +extern int mca_pml_cm_isend_init(void *buf, + size_t count, + ompi_datatype_t *datatype, + int dst, + int tag, + mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm, + struct ompi_request_t **request); + +extern int mca_pml_cm_isend(void *buf, + size_t count, + ompi_datatype_t *datatype, + int dst, + int tag, + mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm, + struct ompi_request_t **request); + +extern int mca_pml_cm_send(void *buf, + size_t count, + ompi_datatype_t *datatype, + int dst, + int tag, + mca_pml_base_send_mode_t mode, + struct ompi_communicator_t* comm); + +extern int mca_pml_cm_iprobe(int dst, + int tag, + struct ompi_communicator_t* comm, + int *matched, + ompi_status_public_t* status); + +extern int mca_pml_cm_probe(int dst, + int tag, + struct ompi_communicator_t* comm, + ompi_status_public_t* status); + +extern int mca_pml_cm_start(size_t count, ompi_request_t** requests); + + +extern int mca_pml_cm_dump(struct ompi_communicator_t* comm, + int verbose); + +extern int mca_pml_cm_cancel(struct ompi_request_t *request, int flag); + +extern void mca_pml_cm_request_completion(struct mca_mtl_request_t *mtl_request); + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + +#endif /* PML_CM_H_HAS_BEEN_INCLUDED */ diff --git a/ompi/mca/pml/cm/pml_cm_cancel.c b/ompi/mca/pml/cm/pml_cm_cancel.c new file mode 100644 index 0000000000..fb59d5b8e1 --- /dev/null +++ b/ompi/mca/pml/cm/pml_cm_cancel.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/request/request.h" +#include "ompi/mca/pml/base/pml_base_request.h" + +#include "pml_cm.h" +#include "pml_cm_sendreq.h" +#include "pml_cm_recvreq.h" + +int +mca_pml_cm_cancel(struct ompi_request_t *request, int flag) +{ + int ret; + mca_pml_base_request_t *base_request = + (mca_pml_base_request_t*) request; + + switch (base_request->req_type) { + case MCA_PML_REQUEST_SEND: + { + mca_pml_cm_send_request_t* sendreq = + (mca_pml_cm_send_request_t*) request; + ret = OMPI_MTL_CALL(cancel(ompi_mtl, + &sendreq->req_mtl, + flag)); + } + break; + case MCA_PML_REQUEST_RECV: + { + mca_pml_cm_recv_request_t* recvreq = + (mca_pml_cm_recv_request_t*) request; + ret = OMPI_MTL_CALL(cancel(ompi_mtl, + &recvreq->req_mtl, + flag)); + } + break; + default: + ret = OMPI_SUCCESS; + } + + return ret; +} diff --git a/ompi/mca/pml/cm/pml_cm_component.c b/ompi/mca/pml/cm/pml_cm_component.c new file mode 100644 index 0000000000..d00fdd4d64 --- /dev/null +++ b/ompi/mca/pml/cm/pml_cm_component.c @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "pml_cm.h" +#include "opal/event/event.h" +#include "opal/mca/base/mca_base_param.h" +#include "ompi/datatype/convertor.h" +#include "ompi/mca/mtl/mtl.h" +#include "ompi/mca/mtl/base/base.h" +#include "ompi/mca/pml/base/pml_base_bsend.h" + +#include "pml_cm_sendreq.h" +#include "pml_cm_recvreq.h" + +static int mca_pml_cm_component_open(void); +static int mca_pml_cm_component_close(void); +static mca_pml_base_module_t* mca_pml_cm_component_init( int* priority, + bool enable_progress_threads, bool enable_mpi_threads); +static int mca_pml_cm_component_fini(void); + + +mca_pml_base_component_1_0_0_t mca_pml_cm_component = { + + /* First, the mca_base_component_t struct containing meta + * information about the component itself */ + + { + /* Indicate that we are a pml v1.0.0 component (which also implies + * a specific MCA version) */ + + MCA_PML_BASE_VERSION_1_0_0, + + "cm", /* MCA component name */ + OMPI_MAJOR_VERSION, /* MCA component major version */ + OMPI_MINOR_VERSION, /* MCA component minor version */ + OMPI_RELEASE_VERSION, /* MCA component release version */ + mca_pml_cm_component_open, /* component open */ + mca_pml_cm_component_close /* component close */ + }, + + /* Next the MCA v1.0.0 component meta data */ + + { + /* Whether the component is checkpointable or not */ + false + }, + + mca_pml_cm_component_init, /* component init */ + mca_pml_cm_component_fini /* component finalize */ +}; + + +static int +mca_pml_cm_component_open(void) +{ + int ret; + + ret = ompi_mtl_base_open(); + if (OMPI_SUCCESS != ret) return ret; + + /* BWB - FIX ME - register MCA parameters here */ + + return OMPI_SUCCESS; +} + + +static int +mca_pml_cm_component_close(void) +{ + return ompi_mtl_base_close(); +} + + +static mca_pml_base_module_t* +mca_pml_cm_component_init(int* priority, + bool enable_progress_threads, + bool enable_mpi_threads) +{ + int ret; + + *priority = 1; + + /* find a useable MTL */ + ret = ompi_mtl_base_select(enable_progress_threads, enable_mpi_threads); + if (OMPI_SUCCESS != ret) return NULL; + + /* update our tag / context id max values based on MTL + information */ + ompi_pml_cm.super.pml_max_contextid = ompi_mtl->mtl_max_contextid; + ompi_pml_cm.super.pml_max_tag = ompi_mtl->mtl_max_tag; + + /* BWB - FIX ME - add mca parameters for free list water marks */ + OBJ_CONSTRUCT(&ompi_pml_cm.cm_send_requests, ompi_free_list_t); + ompi_free_list_init(&ompi_pml_cm.cm_send_requests, + sizeof(mca_pml_cm_send_request_t) + + ompi_mtl->mtl_request_size, + OBJ_CLASS(mca_pml_cm_send_request_t), + 1, -1, 1, + NULL); + + OBJ_CONSTRUCT(&ompi_pml_cm.cm_recv_requests, ompi_free_list_t); + ompi_free_list_init(&ompi_pml_cm.cm_recv_requests, + sizeof(mca_pml_cm_recv_request_t) + + ompi_mtl->mtl_request_size, + OBJ_CLASS(mca_pml_cm_recv_request_t), + 1, -1, 1, + NULL); + + /* initialize buffered send code */ + if(OMPI_SUCCESS != mca_pml_base_bsend_init(enable_mpi_threads)) { + opal_output(0, "mca_pml_ob1_component_init: mca_pml_bsend_init failed\n"); + return NULL; + } + + + return &ompi_pml_cm.super; +} + + +static int +mca_pml_cm_component_fini(void) +{ + /* shut down buffered send code */ + mca_pml_base_bsend_fini(); + + OBJ_DESTRUCT(&ompi_pml_cm.cm_send_requests); + OBJ_DESTRUCT(&ompi_pml_cm.cm_recv_requests); + + if (NULL != ompi_mtl && NULL != ompi_mtl->mtl_finalize) { + return ompi_mtl->mtl_finalize(ompi_mtl); + } + + return OMPI_SUCCESS; +} + diff --git a/ompi/mca/pml/cm/pml_cm_probe.c b/ompi/mca/pml/cm/pml_cm_probe.c new file mode 100644 index 0000000000..8889ea95e9 --- /dev/null +++ b/ompi/mca/pml/cm/pml_cm_probe.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/runtime/opal_progress.h" +#include "ompi/request/request.h" +#include "ompi/mca/mtl/mtl.h" + +#include "pml_cm.h" + +int +mca_pml_cm_iprobe(int src, int tag, + struct ompi_communicator_t *comm, + int *matched, ompi_status_public_t * status) +{ + return OMPI_MTL_CALL(iprobe(ompi_mtl, + comm, src, tag, + matched, status)); +} + + +int +mca_pml_cm_probe(int src, int tag, + struct ompi_communicator_t *comm, + ompi_status_public_t * status) +{ + int ret, matched = 0; + + while (true) { + ret = OMPI_MTL_CALL(iprobe(ompi_mtl, + comm, src, tag, + &matched, status)); + if (OMPI_SUCCESS != ret) break; + if (matched) break; + opal_progress(); + } + + return ret; +} diff --git a/ompi/mca/pml/cm/pml_cm_proc.c b/ompi/mca/pml/cm/pml_cm_proc.c new file mode 100644 index 0000000000..709ac454fe --- /dev/null +++ b/ompi/mca/pml/cm/pml_cm_proc.c @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/sys/atomic.h" +#include "pml_cm.h" +#include "pml_cm_proc.h" + + +static void mca_pml_cm_proc_construct(mca_pml_cm_proc_t* proc) +{ + proc->base.proc_ompi = NULL; + OBJ_CONSTRUCT(&proc->base.proc_lock, opal_mutex_t); +} + + +static void mca_pml_cm_proc_destruct(mca_pml_cm_proc_t* proc) +{ + OBJ_DESTRUCT(&proc->base.proc_lock); +} + + +OBJ_CLASS_INSTANCE( + mca_pml_cm_proc_t, + opal_list_item_t, + mca_pml_cm_proc_construct, + mca_pml_cm_proc_destruct +); + diff --git a/ompi/mca/pml/cm/pml_cm_proc.h b/ompi/mca/pml/cm/pml_cm_proc.h new file mode 100644 index 0000000000..27f59cfdcf --- /dev/null +++ b/ompi/mca/pml/cm/pml_cm_proc.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + */ +#ifndef MCA_PML_CM_PROC_H +#define MCA_PML_CM_PROC_H + +#include "opal/threads/mutex.h" +#include "ompi/communicator/communicator.h" +#include "ompi/group/group.h" +#include "ompi/proc/proc.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/mca/mtl/mtl.h" + +struct mca_mtl_base_procinfo_t; + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +struct mca_pml_cm_proc_t { + mca_pml_proc_t base; +}; +typedef struct mca_pml_cm_proc_t mca_pml_cm_proc_t; +OMPI_COMP_EXPORT extern opal_class_t mca_pml_cm_proc_t_class; + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif +#endif + diff --git a/ompi/mca/pml/cm/pml_cm_progress.c b/ompi/mca/pml/cm/pml_cm_progress.c new file mode 100644 index 0000000000..cf8b75a582 --- /dev/null +++ b/ompi/mca/pml/cm/pml_cm_progress.c @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "pml_cm.h" + + +int +mca_pml_cm_progress(void) +{ + return 0; +} diff --git a/ompi/mca/pml/cm/pml_cm_recv.c b/ompi/mca/pml/cm/pml_cm_recv.c new file mode 100644 index 0000000000..8e818483c0 --- /dev/null +++ b/ompi/mca/pml/cm/pml_cm_recv.c @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "ompi_config.h" + +#include "ompi/request/request.h" +#include "ompi/datatype/datatype.h" +#include "ompi/communicator/communicator.h" +#include "ompi/datatype/convertor.h" + +#include "pml_cm.h" +#include "pml_cm_recvreq.h" + +int +mca_pml_cm_irecv_init(void *addr, + size_t count, + ompi_datatype_t * datatype, + int src, + int tag, + struct ompi_communicator_t *comm, + struct ompi_request_t **request) +{ + int ret; + mca_pml_cm_recv_request_t *recvreq; + + MCA_PML_CM_RECV_REQUEST_ALLOC(recvreq, ret); + if (NULL == recvreq || OMPI_SUCCESS != ret) return ret; + + MCA_PML_CM_RECV_REQUEST_INIT(recvreq, addr, count, + datatype, src, tag, comm, true); + *request = (ompi_request_t*) recvreq; + + return OMPI_SUCCESS; +} + + +int +mca_pml_cm_irecv(void *addr, + size_t count, + ompi_datatype_t * datatype, + int src, + int tag, + struct ompi_communicator_t *comm, + struct ompi_request_t **request) +{ + int ret; + mca_pml_cm_recv_request_t *recvreq; + + MCA_PML_CM_RECV_REQUEST_ALLOC(recvreq, ret); + if (NULL == recvreq || OMPI_SUCCESS != ret) return ret; + + MCA_PML_CM_RECV_REQUEST_INIT(recvreq, addr, count, + datatype, src, tag, comm, false); + + MCA_PML_CM_RECV_REQUEST_START(recvreq, ret); + + if (OMPI_SUCCESS == ret) *request = (ompi_request_t*) recvreq; + + return ret; +} + + +int +mca_pml_cm_recv(void *buf, + size_t count, + ompi_datatype_t * datatype, + int src, + int tag, + struct ompi_communicator_t *comm, + ompi_status_public_t * status) +{ + int ret; + mca_pml_cm_recv_request_t *recvreq; + + MCA_PML_CM_RECV_REQUEST_ALLOC(recvreq, ret); + if (NULL == recvreq || OMPI_SUCCESS != ret) return ret; + + MCA_PML_CM_RECV_REQUEST_INIT(recvreq, buf, count, + datatype, src, tag, comm, false); + + MCA_PML_CM_RECV_REQUEST_START(recvreq, ret); + if (OMPI_SUCCESS != ret) { + /* BWB - XXX - need cleanup of request here */ + MCA_PML_CM_RECV_REQUEST_RETURN(recvreq); + } + + if (recvreq->req_recv.req_base.req_ompi.req_complete == false) { + /* give up and sleep until completion */ + if (opal_using_threads()) { + opal_mutex_lock(&ompi_request_lock); + ompi_request_waiting++; + while (recvreq->req_recv.req_base.req_ompi.req_complete == false) + opal_condition_wait(&ompi_request_cond, &ompi_request_lock); + ompi_request_waiting--; + opal_mutex_unlock(&ompi_request_lock); + } else { + ompi_request_waiting++; + while (recvreq->req_recv.req_base.req_ompi.req_complete == false) + opal_condition_wait(&ompi_request_cond, &ompi_request_lock); + ompi_request_waiting--; + } + } + + if (NULL != status) { /* return status */ + *status = recvreq->req_recv.req_base.req_ompi.req_status; + } + ret = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR; + ompi_request_free( (ompi_request_t**)&recvreq ); + + return ret; +} + diff --git a/ompi/mca/pml/cm/pml_cm_recvreq.c b/ompi/mca/pml/cm/pml_cm_recvreq.c new file mode 100644 index 0000000000..bea7644782 --- /dev/null +++ b/ompi/mca/pml/cm/pml_cm_recvreq.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "pml_cm.h" +#include "pml_cm_recvreq.h" + + +static int +mca_pml_cm_recv_request_free(struct ompi_request_t** request) +{ + mca_pml_cm_recv_request_t* recvreq = *(mca_pml_cm_recv_request_t**)request; + + assert( false == recvreq->req_recv.req_base.req_free_called ); + + OPAL_THREAD_LOCK(&ompi_request_lock); + recvreq->req_recv.req_base.req_free_called = true; + if( true == recvreq->req_recv.req_base.req_pml_complete ) { + MCA_PML_CM_RECV_REQUEST_RETURN( recvreq ); + } + + OPAL_THREAD_UNLOCK(&ompi_request_lock); + + *request = MPI_REQUEST_NULL; + return OMPI_SUCCESS; +} + + +static void +recvreq_construct(mca_pml_cm_recv_request_t* recvreq) +{ + recvreq->req_mtl.ompi_req = (ompi_request_t*) recvreq; + recvreq->req_mtl.completion_callback = mca_pml_cm_request_completion; + + recvreq->req_recv.req_base.req_ompi.req_free = mca_pml_cm_recv_request_free; + recvreq->req_recv.req_base.req_ompi.req_cancel = mca_pml_cm_cancel; +} + + +static void +recvreq_destruct(mca_pml_cm_recv_request_t* recvreq) +{ + recvreq->req_mtl.ompi_req = NULL; + recvreq->req_mtl.completion_callback = NULL; +} + + +OBJ_CLASS_INSTANCE(mca_pml_cm_recv_request_t, + mca_pml_base_recv_request_t, + recvreq_construct, + recvreq_destruct); diff --git a/ompi/mca/pml/cm/pml_cm_recvreq.h b/ompi/mca/pml/cm/pml_cm_recvreq.h new file mode 100644 index 0000000000..f65691ce51 --- /dev/null +++ b/ompi/mca/pml/cm/pml_cm_recvreq.h @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PML_CM_RECVREQ_H +#define PML_CM_RECVREQ_H + +#include "ompi/mca/pml/base/pml_base_recvreq.h" +#include "ompi/mca/mtl/mtl.h" + +struct mca_pml_cm_recv_request_t { + mca_pml_base_recv_request_t req_recv; + mca_mtl_request_t req_mtl; +}; +typedef struct mca_pml_cm_recv_request_t mca_pml_cm_recv_request_t; +OBJ_CLASS_DECLARATION(mca_pml_cm_recv_request_t); + + +/** + * Allocate a recv request from the modules free list. + * + * @param rc (OUT) OMPI_SUCCESS or error status on failure. + * @return Receive request. + */ +#define MCA_PML_CM_RECV_REQUEST_ALLOC(recvreq, rc) \ +do { \ + opal_list_item_t *item; \ + OMPI_FREE_LIST_GET(&ompi_pml_cm.cm_recv_requests, item, rc); \ + recvreq = (mca_pml_cm_recv_request_t*) item; \ + } while (0) + + +/** + * Initialize a receive request with call parameters. + * + * @param request (IN) Receive request. + * @param addr (IN) User buffer. + * @param count (IN) Number of elements of indicated datatype. + * @param datatype (IN) User defined datatype. + * @param src (IN) Source rank w/in the communicator. + * @param tag (IN) User defined tag. + * @param comm (IN) Communicator. + * @param persistent (IN) Is this a ersistent request. + */ +#define MCA_PML_CM_RECV_REQUEST_INIT( request, \ + addr, \ + count, \ + datatype, \ + src, \ + tag, \ + comm, \ + persistent) \ +do { \ + MCA_PML_BASE_RECV_REQUEST_INIT( &(request)->req_recv, \ + addr, \ + count, \ + datatype, \ + src, \ + tag, \ + comm, \ + persistent); \ + /* BWB - fix me - need real remote proc */ \ + if (MPI_ANY_SOURCE == src) { \ + (request)->req_recv.req_base.req_proc = \ + comm->c_pml_procs[comm->c_my_rank]->proc_ompi; \ + } else { \ + (request)->req_recv.req_base.req_proc = \ + comm->c_pml_procs[src]->proc_ompi; \ + } \ + \ + ompi_convertor_copy_and_prepare_for_recv( \ + (request)->req_recv.req_base.req_proc->proc_convertor, \ + (request)->req_recv.req_base.req_datatype, \ + (request)->req_recv.req_base.req_count, \ + (request)->req_recv.req_base.req_addr, \ + 0, \ + &(request)->req_recv.req_convertor ); \ +} while(0) + + +/** + * Start an initialized request. + * + * @param request Receive request. + * @return OMPI_SUCESS or error status on failure. + */ +#define MCA_PML_CM_RECV_REQUEST_START(request, ret) \ +do { \ + /* init/re-init the request */ \ + MCA_PML_BASE_RECV_START( &(request)->req_recv.req_base ); \ + ret = OMPI_MTL_CALL(irecv(ompi_mtl, \ + recvreq->req_recv.req_base.req_comm, \ + recvreq->req_recv.req_base.req_peer, \ + recvreq->req_recv.req_base.req_tag, \ + &recvreq->req_recv.req_convertor, \ + &recvreq->req_mtl)); \ +} while (0) + + +/** + * Mark the request as completed at MPI level for internal purposes. + * + * @param recvreq (IN) Receive request. + */ +#define MCA_PML_CM_RECV_REQUEST_MPI_COMPLETE( recvreq ) \ +do { \ + MCA_PML_BASE_REQUEST_MPI_COMPLETE( &(recvreq->req_recv.req_base.req_ompi) ); \ + } while (0) + + +/** + * Return a recv request to the modules free list. + * + * @param recvreq (IN) Receive request. + */ +#define MCA_PML_CM_RECV_REQUEST_PML_COMPLETE(recvreq) \ +do { \ + assert( false == recvreq->req_recv.req_base.req_pml_complete ); \ + \ + OPAL_THREAD_LOCK(&ompi_request_lock); \ + \ + if( true == recvreq->req_recv.req_base.req_free_called ) { \ + MCA_PML_CM_RECV_REQUEST_RETURN( recvreq ); \ + } else { \ + /* initialize request status */ \ + recvreq->req_recv.req_base.req_pml_complete = true; \ + MCA_PML_CM_RECV_REQUEST_MPI_COMPLETE( recvreq ); \ + } \ + OPAL_THREAD_UNLOCK(&ompi_request_lock); \ + } while(0) + + +/** + * Free the PML receive request + */ +#define MCA_PML_CM_RECV_REQUEST_RETURN(recvreq) \ +{ \ + MCA_PML_BASE_RECV_REQUEST_FINI(&(recvreq)->req_recv); \ + OMPI_FREE_LIST_RETURN( &ompi_pml_cm.cm_recv_requests, \ + (opal_list_item_t*)(recvreq)); \ +} + + +#endif diff --git a/ompi/mca/pml/cm/pml_cm_send.c b/ompi/mca/pml/cm/pml_cm_send.c new file mode 100644 index 0000000000..39fa164ed3 --- /dev/null +++ b/ompi/mca/pml/cm/pml_cm_send.c @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/datatype/datatype.h" +#include "ompi/communicator/communicator.h" +#include "ompi/datatype/convertor.h" + +#include "pml_cm.h" +#include "pml_cm_sendreq.h" + +int +mca_pml_cm_isend_init(void* buf, + size_t count, + ompi_datatype_t* datatype, + int dst, + int tag, + mca_pml_base_send_mode_t sendmode, + ompi_communicator_t* comm, + ompi_request_t** request) +{ + int ret; + mca_pml_cm_send_request_t *sendreq; + + MCA_PML_CM_SEND_REQUEST_ALLOC(comm, dst, sendreq, ret); + if (NULL == sendreq || OMPI_SUCCESS != ret) return ret; + + MCA_PML_CM_SEND_REQUEST_INIT(sendreq, buf, count, + datatype, dst, tag, comm, + sendmode, false, true); + *request = (ompi_request_t*) sendreq; + + return OMPI_SUCCESS; +} + + +int +mca_pml_cm_isend(void* buf, + size_t count, + ompi_datatype_t* datatype, + int dst, + int tag, + mca_pml_base_send_mode_t sendmode, + ompi_communicator_t* comm, + ompi_request_t** request) +{ + int ret; + mca_pml_cm_send_request_t *sendreq; + + MCA_PML_CM_SEND_REQUEST_ALLOC(comm, dst, sendreq, ret); + if (NULL == sendreq || OMPI_SUCCESS != ret) return ret; + + MCA_PML_CM_SEND_REQUEST_INIT(sendreq, buf, count, + datatype, dst, tag, comm, + sendmode, false, false); + + MCA_PML_CM_SEND_REQUEST_START(sendreq, ret); + + if (OMPI_SUCCESS == ret) *request = (ompi_request_t*) sendreq; + + return ret; +} + + +int +mca_pml_cm_send(void *buf, + size_t count, + ompi_datatype_t* datatype, + int dst, + int tag, + mca_pml_base_send_mode_t sendmode, + ompi_communicator_t* comm) +{ + int ret; + mca_pml_cm_send_request_t *sendreq; + + MCA_PML_CM_SEND_REQUEST_ALLOC(comm, dst, sendreq, ret); + if (NULL == sendreq || OMPI_SUCCESS != ret) return ret; + + MCA_PML_CM_SEND_REQUEST_INIT(sendreq, buf, count, + datatype, dst, tag, comm, + sendmode, true, false); + + MCA_PML_CM_SEND_REQUEST_START(sendreq, ret); + if (OMPI_SUCCESS != ret) { + MCA_PML_CM_SEND_REQUEST_RETURN(sendreq); + return ret; + } + + if (sendreq->req_send.req_base.req_ompi.req_complete == false) { + /* give up and sleep until completion */ + if (opal_using_threads()) { + opal_mutex_lock(&ompi_request_lock); + ompi_request_waiting++; + while (sendreq->req_send.req_base.req_ompi.req_complete == false) + opal_condition_wait(&ompi_request_cond, &ompi_request_lock); + ompi_request_waiting--; + opal_mutex_unlock(&ompi_request_lock); + } else { + ompi_request_waiting++; + while (sendreq->req_send.req_base.req_ompi.req_complete == false) + opal_condition_wait(&ompi_request_cond, &ompi_request_lock); + ompi_request_waiting--; + } + } + + ompi_request_free( (ompi_request_t**)&sendreq ); + + return ret; +} + diff --git a/ompi/mca/pml/cm/pml_cm_sendreq.c b/ompi/mca/pml/cm/pml_cm_sendreq.c new file mode 100644 index 0000000000..0458197491 --- /dev/null +++ b/ompi/mca/pml/cm/pml_cm_sendreq.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "pml_cm.h" +#include "pml_cm_sendreq.h" + + +/* + * The free call mark the final stage in a request + * life-cycle. Starting from this point the request is completed at + * both PML and user level, and can be used for others p2p + * communications. Therefore, in the case of the CM PML it should be + * added to the free request list. + */ +static int +mca_pml_cm_send_request_free(struct ompi_request_t** request) +{ + mca_pml_cm_send_request_t* sendreq = *(mca_pml_cm_send_request_t**)request; + + assert( false == sendreq->req_send.req_base.req_free_called ); + + OPAL_THREAD_LOCK(&ompi_request_lock); + sendreq->req_send.req_base.req_free_called = true; + if( true == sendreq->req_send.req_base.req_pml_complete ) { + MCA_PML_CM_SEND_REQUEST_RETURN( sendreq ); + } + + OPAL_THREAD_UNLOCK(&ompi_request_lock); + + *request = MPI_REQUEST_NULL; + + return OMPI_SUCCESS; +} + + +static void +sendreq_construct(mca_pml_cm_send_request_t* sendreq) +{ + sendreq->req_mtl.ompi_req = (ompi_request_t*) sendreq; + sendreq->req_mtl.completion_callback = mca_pml_cm_request_completion; + + sendreq->req_send.req_base.req_ompi.req_free = mca_pml_cm_send_request_free; + sendreq->req_send.req_base.req_ompi.req_cancel = mca_pml_cm_cancel; +} + + +static void +sendreq_destruct(mca_pml_cm_send_request_t* sendreq) +{ + sendreq->req_mtl.ompi_req = NULL; + sendreq->req_mtl.completion_callback = NULL; +} + + +OBJ_CLASS_INSTANCE(mca_pml_cm_send_request_t, + mca_pml_base_send_request_t, + sendreq_construct, + sendreq_destruct); diff --git a/ompi/mca/pml/cm/pml_cm_sendreq.h b/ompi/mca/pml/cm/pml_cm_sendreq.h new file mode 100644 index 0000000000..2c557983d8 --- /dev/null +++ b/ompi/mca/pml/cm/pml_cm_sendreq.h @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef PML_CM_SENDREQ_H +#define PML_CM_SENDREQ_H + +#include "ompi/mca/pml/base/pml_base_sendreq.h" +#include "ompi/mca/pml/base/pml_base_bsend.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/mca/mtl/mtl.h" + +struct mca_pml_cm_send_request_t { + mca_pml_base_send_request_t req_send; + mca_mtl_request_t req_mtl; + bool req_blocking; +}; +typedef struct mca_pml_cm_send_request_t mca_pml_cm_send_request_t; +OBJ_CLASS_DECLARATION(mca_pml_cm_send_request_t); + + +#define MCA_PML_CM_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc) \ +{ \ + ompi_proc_t *proc = \ + comm->c_pml_procs[dst]->proc_ompi; \ + opal_list_item_t* item; \ + \ + if(NULL == proc) { \ + rc = OMPI_ERR_OUT_OF_RESOURCE; \ + } else { \ + rc = OMPI_SUCCESS; \ + OMPI_FREE_LIST_WAIT(&ompi_pml_cm.cm_send_requests, item, rc); \ + sendreq = (mca_pml_cm_send_request_t*)item; \ + sendreq->req_send.req_base.req_proc = proc; \ + } \ +} + + +#define MCA_PML_CM_SEND_REQUEST_INIT( sendreq, \ + buf, \ + count, \ + datatype, \ + dst, \ + tag, \ + comm, \ + sendmode, \ + blocking, \ + persistent) \ +{ \ + MCA_PML_BASE_SEND_REQUEST_INIT(&sendreq->req_send, \ + buf, \ + count, \ + datatype, \ + dst, \ + tag, \ + comm, \ + sendmode, \ + persistent); \ + /* BWB - XXX - fix me later */ \ + if (count == 0) { \ + ompi_convertor_copy_and_prepare_for_send( \ + (sendreq)->req_send.req_base.req_proc->proc_convertor, \ + (sendreq)->req_send.req_base.req_datatype, \ + (sendreq)->req_send.req_base.req_count, \ + (sendreq)->req_send.req_base.req_addr, \ + 0, \ + &(sendreq)->req_send.req_convertor ); \ + ompi_convertor_get_packed_size( &(sendreq)->req_send.req_convertor, \ + &((sendreq)->req_send.req_bytes_packed) ); \ + } \ + \ + sendreq->req_blocking = blocking; \ +} + + +#define MCA_PML_CM_SEND_REQUEST_START(sendreq, ret) \ +do { \ + MCA_PML_BASE_SEND_START( &sendreq->req_send.req_base ); \ + ret = OMPI_SUCCESS; \ + if (sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { \ + ret = mca_pml_base_bsend_request_alloc(&sendreq->req_send.req_base.req_ompi); \ + if (OMPI_SUCCESS == ret) { \ + ret =mca_pml_base_bsend_request_start(&sendreq->req_send.req_base.req_ompi); \ + } \ + } \ + if (OMPI_SUCCESS == ret) { \ + ret = OMPI_MTL_CALL(isend(ompi_mtl, \ + sendreq->req_send.req_base.req_comm, \ + sendreq->req_send.req_base.req_peer, \ + sendreq->req_send.req_base.req_tag, \ + &sendreq->req_send.req_convertor, \ + sendreq->req_send.req_send_mode, \ + sendreq->req_blocking, \ + &sendreq->req_mtl)); \ + } \ + } while (0) + + +/* + * Mark a send request as completed at the MPI level. + */ +#define MCA_PML_CM_SEND_REQUEST_MPI_COMPLETE(sendreq) \ +do { \ + (sendreq)->req_send.req_base.req_ompi.req_status.MPI_SOURCE = \ + (sendreq)->req_send.req_base.req_comm->c_my_rank; \ + (sendreq)->req_send.req_base.req_ompi.req_status.MPI_TAG = \ + (sendreq)->req_send.req_base.req_tag; \ + (sendreq)->req_send.req_base.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; \ + (sendreq)->req_send.req_base.req_ompi.req_status._count = \ + (sendreq)->req_send.req_bytes_packed; \ + MCA_PML_BASE_REQUEST_MPI_COMPLETE( &((sendreq)->req_send.req_base.req_ompi) ); \ + } while(0) + + +/* + * The PML has completed a send request. Note that this request + * may have been orphaned by the user or have already completed + * at the MPI level. + * This macro will never be called directly from the upper level, as it should + * only be an internal call to the PML. + */ +#define MCA_PML_CM_SEND_REQUEST_PML_COMPLETE(sendreq) \ +do { \ + assert( false == sendreq->req_send.req_base.req_pml_complete ); \ + \ + if (sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { \ + mca_pml_base_bsend_request_fini((ompi_request_t*)sendreq); \ + } \ + \ + OPAL_THREAD_LOCK(&ompi_request_lock); \ + if( false == sendreq->req_send.req_base.req_ompi.req_complete ) { \ + /* Should only be called for long messages (maybe synchronous) */ \ + MCA_PML_CM_SEND_REQUEST_MPI_COMPLETE(sendreq); \ + } \ + sendreq->req_send.req_base.req_pml_complete = true; \ + \ + if( sendreq->req_send.req_base.req_free_called ) { \ + MCA_PML_CM_SEND_REQUEST_RETURN( sendreq ); \ + } else { \ + if(sendreq->req_send.req_base.req_ompi.req_persistent) { \ + /* rewind convertor */ \ + size_t offset = 0; \ + ompi_convertor_set_position(&sendreq->req_send.req_convertor, &offset); \ + } \ + } \ + OPAL_THREAD_UNLOCK(&ompi_request_lock); \ + } while (0) + + +/* + * Release resources associated with a request + */ +#define MCA_PML_CM_SEND_REQUEST_RETURN(sendreq) \ +{ \ + /* Let the base handle the reference counts */ \ + MCA_PML_BASE_SEND_REQUEST_FINI((&(sendreq)->req_send)); \ + OMPI_FREE_LIST_RETURN( \ + &ompi_pml_cm.cm_send_requests, (opal_list_item_t*)sendreq); \ +} + +#endif diff --git a/ompi/mca/pml/cm/pml_cm_start.c b/ompi/mca/pml/cm/pml_cm_start.c new file mode 100644 index 0000000000..bb25e14a0d --- /dev/null +++ b/ompi/mca/pml/cm/pml_cm_start.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2006 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "ompi/mca/pml/base/pml_base_request.h" +#include "ompi/mca/pml/base/pml_base_sendreq.h" +#include "ompi/mca/pml/base/pml_base_recvreq.h" + +#include "pml_cm.h" +#include "pml_cm_sendreq.h" +#include "pml_cm_recvreq.h" + + +int +mca_pml_cm_start(size_t count, ompi_request_t** requests) +{ + int rc; + size_t i; + + for (i = 0 ; i < count ; i++) { + mca_pml_base_request_t *pml_request = + (mca_pml_base_request_t*)requests[i]; + if (NULL == pml_request) continue; + + /* If the persistent request is currebtly active - obtain the + * request lock and verify the status is incomplete. if the + * pml layer has not completed the request - mark the request + * as free called - so that it will be freed when the request + * completes - and create a new request. + */ + switch (pml_request->req_ompi.req_state) { + case OMPI_REQUEST_INACTIVE: + if (pml_request->req_pml_complete == true) + break; + + case OMPI_REQUEST_ACTIVE: { + /* otherwise fall through */ + ompi_request_t *request; + OPAL_THREAD_LOCK(&ompi_request_lock); + if (pml_request->req_pml_complete == false) { + /* free request after it completes */ + pml_request->req_free_called = true; + } else { + /* can reuse the existing request */ + OPAL_THREAD_UNLOCK(&ompi_request_lock); + break; + } + + /* allocate a new request */ + switch (pml_request->req_type) { + case MCA_PML_REQUEST_SEND: { + mca_pml_base_send_mode_t sendmode = + ((mca_pml_base_send_request_t*)pml_request)->req_send_mode; + rc = mca_pml_cm_isend_init( + pml_request->req_addr, + pml_request->req_count, + pml_request->req_datatype, + pml_request->req_peer, + pml_request->req_tag, + sendmode, + pml_request->req_comm, + &request); + break; + } + case MCA_PML_REQUEST_RECV: + rc = mca_pml_cm_irecv_init( + pml_request->req_addr, + pml_request->req_count, + pml_request->req_datatype, + pml_request->req_peer, + pml_request->req_tag, + pml_request->req_comm, + &request); + break; + default: + rc = OMPI_ERR_REQUEST; + break; + } + OPAL_THREAD_UNLOCK(&ompi_request_lock); + if(OMPI_SUCCESS != rc) + return rc; + pml_request = (mca_pml_base_request_t*)request; + requests[i] = request; + break; + } + default: + return OMPI_ERR_REQUEST; + } + + /* start the request */ + switch (pml_request->req_type) { + case MCA_PML_REQUEST_SEND: + { + mca_pml_cm_send_request_t* sendreq = + (mca_pml_cm_send_request_t*)pml_request; + MCA_PML_CM_SEND_REQUEST_START(sendreq, rc); + if(rc != OMPI_SUCCESS) + return rc; + break; + } + case MCA_PML_REQUEST_RECV: + { + mca_pml_cm_recv_request_t* recvreq = + (mca_pml_cm_recv_request_t*)pml_request; + MCA_PML_CM_RECV_REQUEST_START(recvreq, rc); + if(rc != OMPI_SUCCESS) + return rc; + break; + } + default: + return OMPI_ERR_REQUEST; + } + } + return OMPI_SUCCESS; + +} diff --git a/ompi/mca/pml/dr/pml_dr.c b/ompi/mca/pml/dr/pml_dr.c index a38a74a507..9b0c84dd77 100644 --- a/ompi/mca/pml/dr/pml_dr.c +++ b/ompi/mca/pml/dr/pml_dr.c @@ -113,7 +113,7 @@ int mca_pml_dr_del_comm(ompi_communicator_t* comm) int mca_pml_dr_add_procs(ompi_proc_t** procs, size_t nprocs) { ompi_bitmap_t reachable; - struct mca_pml_dr_endpoint_t ** endpoints = NULL; + struct mca_bml_base_endpoint_t **bml_endpoints = NULL; int rc; size_t i; @@ -125,16 +125,16 @@ int mca_pml_dr_add_procs(ompi_proc_t** procs, size_t nprocs) if(OMPI_SUCCESS != rc) return rc; - endpoints = (struct mca_pml_dr_endpoint_t **) malloc ( nprocs * - sizeof(struct mca_pml_dr_endpoint_t*)); - if ( NULL == endpoints ) { - return OMPI_ERR_OUT_OF_RESOURCE; + bml_endpoints = malloc(nprocs * sizeof(struct mca_bml_base_endpoint_t*)); + if (NULL == bml_endpoints) { + return OMPI_ERR_OUT_OF_RESOURCE; } + /* initialize bml endpoint data */ rc = mca_bml.bml_add_procs( nprocs, procs, - (mca_bml_base_endpoint_t**) endpoints, + bml_endpoints, &reachable ); if(OMPI_SUCCESS != rc) @@ -154,28 +154,34 @@ int mca_pml_dr_add_procs(ompi_proc_t** procs, size_t nprocs) mca_pml_dr.free_list_max, mca_pml_dr.free_list_inc, NULL); - - for(i = 0; i < nprocs; i++) { + + /* initialize pml endpoint data */ + for (i = 0 ; i < nprocs ; ++i) { int idx; + mca_pml_dr_endpoint_t *endpoint; + + + endpoint = OBJ_NEW(mca_pml_dr_endpoint_t); + endpoint->src = mca_pml_dr.my_rank; + endpoint->proc_ompi = procs[i]; + procs[i]->proc_pml = (struct mca_pml_base_endpoint_t*) endpoint; + /* this won't work for comm spawn and other dynamic processes, but will work for initial job start */ idx = ompi_pointer_array_add(&mca_pml_dr.endpoints, - (void*) endpoints[i]); + (void*) endpoint); if(orte_ns.compare(ORTE_NS_CMP_ALL, orte_process_info.my_name, - &endpoints[i]->base.super.proc_ompi->proc_name) == 0) { + &(endpoint->proc_ompi->proc_name)) == 0) { mca_pml_dr.my_rank = idx; } - endpoints[i]->local = endpoints[i]->dst = idx; + endpoint->local = endpoint->dst = idx; + endpoint->bml_endpoint = bml_endpoints[i]; } - - for(i = 0; i < nprocs; i++) { - endpoints[i]->src = mca_pml_dr.my_rank; - } - + /* no longer need this */ - if ( NULL != endpoints ) { - free ( endpoints) ; + if ( NULL != bml_endpoints ) { + free ( bml_endpoints) ; } return rc; } @@ -187,6 +193,15 @@ int mca_pml_dr_add_procs(ompi_proc_t** procs, size_t nprocs) int mca_pml_dr_del_procs(ompi_proc_t** procs, size_t nprocs) { + size_t i; + + /* clean up pml endpoint data */ + for (i = 0 ; i < nprocs ; ++i) { + if (NULL != procs[i]->proc_pml) { + OBJ_RELEASE(procs[i]->proc_pml); + } + } + return mca_bml.bml_del_procs(nprocs, procs); } diff --git a/ompi/mca/pml/dr/pml_dr_comm.c b/ompi/mca/pml/dr/pml_dr_comm.c index 1c4f367f40..acb9278f09 100644 --- a/ompi/mca/pml/dr/pml_dr_comm.c +++ b/ompi/mca/pml/dr/pml_dr_comm.c @@ -96,20 +96,20 @@ int mca_pml_dr_comm_init(mca_pml_dr_comm_t* dr_comm, ompi_communicator_t* ompi_c } for(i=0; iprocs+i; OBJ_CONSTRUCT(proc, mca_pml_dr_comm_proc_t); proc->comm_rank = i; ompi_proc = ompi_comm->c_remote_group->grp_proc_pointers[i]; proc->ompi_proc = ompi_proc; - ep = (mca_pml_dr_endpoint_t*) ompi_proc->proc_pml; + pml_ep = (mca_pml_dr_endpoint_t*) ompi_proc->proc_pml; ompi_pointer_array_set_item(&dr_comm->sparse_procs, - ep->dst, /* from our view this is the + pml_ep->dst, /* from our view this is the peers source 'global rank' */ proc); - proc->endpoint = ep; - + proc->pml_endpoint = pml_ep; + proc->bml_endpoint = ompi_proc->proc_bml; } dr_comm->num_procs = size; return OMPI_SUCCESS; diff --git a/ompi/mca/pml/dr/pml_dr_comm.h b/ompi/mca/pml/dr/pml_dr_comm.h index 65c22bafb9..56436901d7 100644 --- a/ompi/mca/pml/dr/pml_dr_comm.h +++ b/ompi/mca/pml/dr/pml_dr_comm.h @@ -46,7 +46,8 @@ struct mca_pml_dr_comm_proc_t { opal_list_t unexpected_frags; /**< unexpected fragment queues */ opal_list_t matched_receives; /**< list of in-progress matched receives */ ompi_proc_t* ompi_proc; /**< back pointer to ompi_proc_t */ - mca_pml_dr_endpoint_t* endpoint; /**< back pointer to the endpoint */ + mca_pml_dr_endpoint_t* pml_endpoint; /**< back pointer to the PML endpoint */ + mca_bml_base_endpoint_t* bml_endpoint; /**< back pointer to the BML endpoint */ int32_t comm_rank; /**< rank in the communicator */ }; typedef struct mca_pml_dr_comm_proc_t mca_pml_dr_comm_proc_t; diff --git a/ompi/mca/pml/dr/pml_dr_component.c b/ompi/mca/pml/dr/pml_dr_component.c index 1f2d60c812..fb54a1ed9c 100644 --- a/ompi/mca/pml/dr/pml_dr_component.c +++ b/ompi/mca/pml/dr/pml_dr_component.c @@ -201,8 +201,7 @@ mca_pml_base_module_t* mca_pml_dr_component_init(int* priority, if(OMPI_SUCCESS != mca_bml_base_init( enable_progress_threads, - enable_mpi_threads, - OBJ_CLASS(mca_pml_dr_endpoint_t) + enable_mpi_threads )) { return NULL; } diff --git a/ompi/mca/pml/dr/pml_dr_endpoint.c b/ompi/mca/pml/dr/pml_dr_endpoint.c index 973994e457..7da1813da0 100644 --- a/ompi/mca/pml/dr/pml_dr_endpoint.c +++ b/ompi/mca/pml/dr/pml_dr_endpoint.c @@ -47,7 +47,7 @@ static void mca_pml_dr_endpoint_construct(mca_pml_dr_endpoint_t* ep) OBJ_CONSTRUCT(&ep->seq_recvs, ompi_seq_tracker_t); OBJ_CONSTRUCT(&ep->seq_recvs_matched, ompi_seq_tracker_t); ep->vfrag_seq = 0; - ep->base.copy = (mca_bml_base_endpoint_copy_fn_t)mca_pml_dr_endpoint_copy; + ep->bml_endpoint = NULL; } @@ -61,6 +61,6 @@ static void mca_pml_dr_endpoint_destruct(mca_pml_dr_endpoint_t* ep) OBJ_CLASS_INSTANCE( mca_pml_dr_endpoint_t, - mca_bml_base_endpoint_t, + opal_object_t, mca_pml_dr_endpoint_construct, mca_pml_dr_endpoint_destruct); diff --git a/ompi/mca/pml/dr/pml_dr_endpoint.h b/ompi/mca/pml/dr/pml_dr_endpoint.h index d296a267ad..bf0e200a5f 100644 --- a/ompi/mca/pml/dr/pml_dr_endpoint.h +++ b/ompi/mca/pml/dr/pml_dr_endpoint.h @@ -33,7 +33,9 @@ extern "C" { * add whatever else is needed */ struct mca_pml_dr_endpoint_t { - mca_bml_base_endpoint_t base; + opal_object_t super; + ompi_proc_t *proc_ompi; /* back pointer to proc structure */ + mca_bml_base_endpoint_t *bml_endpoint; /* pointer to related bml endpoint */ int32_t local; /* local view of the rank */ int32_t src; /* peers view of the src rank */ int32_t dst; /* peers destination rank */ diff --git a/ompi/mca/pml/dr/pml_dr_recvfrag.c b/ompi/mca/pml/dr/pml_dr_recvfrag.c index eea5e2f662..0a13f5a36e 100644 --- a/ompi/mca/pml/dr/pml_dr_recvfrag.c +++ b/ompi/mca/pml/dr/pml_dr_recvfrag.c @@ -138,8 +138,7 @@ void mca_pml_dr_recv_frag_callback( if(ompi_seq_tracker_check_duplicate(&ep->seq_recvs, hdr->hdr_common.hdr_vid)) { MCA_PML_DR_DEBUG(0,(0, "%s:%d: got a duplicate vfrag vfrag id %d\n", __FILE__, __LINE__, hdr->hdr_common.hdr_vid)); - - mca_pml_dr_recv_frag_ack(&ep->base, + mca_pml_dr_recv_frag_ack(ep->bml_endpoint, &hdr->hdr_common, hdr->hdr_match.hdr_src_ptr.pval, 1, 0); @@ -155,7 +154,7 @@ void mca_pml_dr_recv_frag_callback( assert(hdr->hdr_common.hdr_src < ompi_pointer_array_get_size(&comm->sparse_procs)); proc = ompi_pointer_array_get_item(&comm->sparse_procs, hdr->hdr_common.hdr_src); assert(proc != NULL); - assert(ep == proc->endpoint); + assert(ep == proc->pml_endpoint); mca_pml_dr_recv_frag_match(comm,proc,btl,&hdr->hdr_match,segments,des->des_dst_cnt); break; @@ -193,7 +192,7 @@ void mca_pml_dr_recv_frag_callback( if(NULL == ompi_comm) { if(ompi_seq_tracker_check_duplicate(&ep->seq_recvs_matched, hdr->hdr_common.hdr_vid)) { MCA_PML_DR_DEBUG(0, (0, "%s:%d: acking duplicate matched rendezvous from sequence tracker\n", __FILE__, __LINE__)); - mca_pml_dr_recv_frag_ack(&ep->base, + mca_pml_dr_recv_frag_ack(ep->bml_endpoint, &hdr->hdr_common, hdr->hdr_match.hdr_src_ptr.pval, ~(uint64_t) 0, hdr->hdr_rndv.hdr_msg_length); @@ -207,7 +206,7 @@ void mca_pml_dr_recv_frag_callback( assert(hdr->hdr_common.hdr_src < ompi_pointer_array_get_size(&comm->sparse_procs)); proc = ompi_pointer_array_get_item(&comm->sparse_procs, hdr->hdr_common.hdr_src); assert(proc != NULL); - assert(ep == proc->endpoint); + assert(ep == proc->pml_endpoint); /* ack only if the vfrag has been matched */ recvreq = @@ -220,7 +219,7 @@ void mca_pml_dr_recv_frag_callback( } else { if(ompi_seq_tracker_check_duplicate(&ep->seq_recvs_matched, hdr->hdr_common.hdr_vid)) { MCA_PML_DR_DEBUG(0,(0, "%s:%d: acking duplicate matched rendezvous from sequence tracker\n", __FILE__, __LINE__)); - mca_pml_dr_recv_frag_ack(&ep->base, + mca_pml_dr_recv_frag_ack(ep->bml_endpoint, &hdr->hdr_common, hdr->hdr_match.hdr_src_ptr.pval, ~(uint64_t) 0, hdr->hdr_rndv.hdr_msg_length); @@ -239,7 +238,7 @@ void mca_pml_dr_recv_frag_callback( assert(hdr->hdr_common.hdr_src < ompi_pointer_array_get_size(&comm->sparse_procs)); proc = ompi_pointer_array_get_item(&comm->sparse_procs, hdr->hdr_common.hdr_src); assert(proc != NULL); - assert(ep == proc->endpoint); + assert(ep == proc->pml_endpoint); mca_pml_dr_recv_frag_match(comm,proc,btl,&hdr->hdr_match,segments,des->des_dst_cnt); } break; @@ -273,7 +272,7 @@ void mca_pml_dr_recv_frag_callback( /* seq_recvs protected by matching lock */ if(ompi_seq_tracker_check_duplicate(&ep->seq_recvs, hdr->hdr_common.hdr_vid)) { MCA_PML_DR_DEBUG(0,(0, "%s:%d: acking duplicate fragment\n", __FILE__, __LINE__)); - mca_pml_dr_recv_frag_ack(&ep->base, + mca_pml_dr_recv_frag_ack(ep->bml_endpoint, &hdr->hdr_common, hdr->hdr_frag.hdr_src_ptr.pval, ~(uint64_t) 0, 0); @@ -287,7 +286,7 @@ void mca_pml_dr_recv_frag_callback( assert(hdr->hdr_common.hdr_src < ompi_pointer_array_get_size(&comm->sparse_procs)); proc = ompi_pointer_array_get_item(&comm->sparse_procs, hdr->hdr_common.hdr_src); assert(proc != NULL); - assert(ep == proc->endpoint); + assert(ep == proc->pml_endpoint); recvreq = hdr->hdr_frag.hdr_dst_ptr.pval; mca_pml_dr_recv_request_progress(recvreq,btl,segments,des->des_dst_cnt); @@ -610,7 +609,7 @@ bool mca_pml_dr_recv_frag_match( ompi_proc_t* ompi_proc = proc->ompi_proc; int rc; uint32_t csum; - mca_pml_dr_endpoint_t* ep = (mca_pml_dr_endpoint_t*) proc->endpoint; + mca_pml_dr_endpoint_t* ep = (mca_pml_dr_endpoint_t*) proc->pml_endpoint; bool do_csum = mca_pml_dr.enable_csum && (btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM); @@ -690,7 +689,7 @@ rematch: } MCA_PML_DR_RECV_FRAG_INIT(frag,ompi_proc,hdr,segments,num_segments,btl,csum); if(do_csum && csum != hdr->hdr_csum) { - mca_pml_dr_recv_frag_ack((mca_bml_base_endpoint_t*)ompi_proc->proc_pml, + mca_pml_dr_recv_frag_ack((mca_bml_base_endpoint_t*)ompi_proc->proc_bml, &hdr->hdr_common, hdr->hdr_src_ptr.pval, 0, 0); MCA_PML_DR_DEBUG(0,(0, "%s:%d: received corrupted data 0x%08x != 0x%08x (segments %d length %d)\n", __FILE__, __LINE__, csum, hdr->hdr_csum, num_segments, @@ -726,7 +725,7 @@ rematch: } MCA_PML_DR_RECV_FRAG_INIT(frag,ompi_proc,hdr,segments,num_segments,btl,csum); if(do_csum && csum != hdr->hdr_csum) { - mca_pml_dr_recv_frag_ack((mca_bml_base_endpoint_t*)ompi_proc->proc_pml, + mca_pml_dr_recv_frag_ack((mca_bml_base_endpoint_t*)ompi_proc->proc_bml, &hdr->hdr_common, hdr->hdr_src_ptr.pval, 0, 0); MCA_PML_DR_DEBUG(0,(0, "%s:%d: received corrupted data 0x%08x != 0x%08x\n", __FILE__, __LINE__, csum, hdr->hdr_csum)); @@ -751,7 +750,7 @@ rematch: MCA_PML_DR_DEBUG(1,(0, "%s:%d: received short message, acking now vfrag id: %d\n", __FILE__, __LINE__, hdr->hdr_common.hdr_vid)); - mca_pml_dr_recv_frag_ack((mca_bml_base_endpoint_t*)ompi_proc->proc_pml, + mca_pml_dr_recv_frag_ack((mca_bml_base_endpoint_t*)ompi_proc->proc_bml, &hdr->hdr_common, hdr->hdr_src_ptr.pval, 1, 0); } @@ -952,7 +951,7 @@ rematch: * descriptor */ frag->request=match; match->req_proc = proc; - match->req_endpoint = (mca_pml_dr_endpoint_t*)proc->ompi_proc->proc_pml; + match->req_endpoint = (mca_pml_dr_endpoint_t*)proc->ompi_proc->proc_bml; /* add this fragment descriptor to the list of * descriptors to be processed later diff --git a/ompi/mca/pml/dr/pml_dr_recvreq.c b/ompi/mca/pml/dr/pml_dr_recvreq.c index 4e26b46182..192c50dd5b 100644 --- a/ompi/mca/pml/dr/pml_dr_recvreq.c +++ b/ompi/mca/pml/dr/pml_dr_recvreq.c @@ -46,7 +46,7 @@ if(do_csum && csum != hdr->hdr_match.hdr_csum) { \ } else { \ mca_pml_dr_recv_request_match_specific(recvreq); \ } \ - mca_pml_dr_recv_frag_ack(&recvreq->req_endpoint->base, \ + mca_pml_dr_recv_frag_ack(recvreq->req_endpoint->bml_endpoint, \ &hdr->hdr_common, \ hdr->hdr_match.hdr_src_ptr.pval, \ 0, 0); \ @@ -175,7 +175,7 @@ void mca_pml_dr_recv_request_ack( bool do_csum; /* allocate descriptor */ - bml_btl = mca_bml_base_btl_array_get_next(&recvreq->req_endpoint->base.btl_eager); + bml_btl = mca_bml_base_btl_array_get_next(&recvreq->req_endpoint->bml_endpoint->btl_eager); do_csum = mca_pml_dr.enable_csum && (bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM); MCA_PML_DR_DES_ALLOC(bml_btl, des, sizeof(mca_pml_dr_ack_hdr_t)); diff --git a/ompi/mca/pml/dr/pml_dr_recvreq.h b/ompi/mca/pml/dr/pml_dr_recvreq.h index 23d8966d6e..866dad2797 100644 --- a/ompi/mca/pml/dr/pml_dr_recvreq.h +++ b/ompi/mca/pml/dr/pml_dr_recvreq.h @@ -241,7 +241,7 @@ do { #define MCA_PML_DR_RECV_REQUEST_BYTES_PACKED(request, bytes_packed) \ do { \ bool do_csum = mca_pml_dr.enable_csum && \ - (request->req_endpoint->base.btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM); \ + (request->req_endpoint->bml_endpoint->btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM); \ (request)->req_recv.req_bytes_packed = bytes_packed; \ if((request)->req_recv.req_bytes_packed != 0) { \ ompi_proc_t *proc = (request)->req_proc->ompi_proc; \ @@ -276,7 +276,7 @@ do { int32_t free_after = 0; \ size_t n, offset = seg_offset; \ bool do_csum = mca_pml_dr.enable_csum && \ - (request->req_endpoint->base.btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM); \ + (request->req_endpoint->bml_endpoint->btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM); \ \ for(n=0; nvf_size == bytes_sent) { - bml_btl = mca_bml_base_btl_array_get_next(&endpoint->base.btl_send); + bml_btl = mca_bml_base_btl_array_get_next(&endpoint->bml_endpoint->btl_send); MCA_PML_DR_VFRAG_ALLOC(vfrag,rc); if(NULL == vfrag) { OPAL_THREAD_LOCK(&mca_pml_dr.lock); diff --git a/ompi/mca/pml/dr/pml_dr_sendreq.h b/ompi/mca/pml/dr/pml_dr_sendreq.h index b2458f8461..515352d1e9 100644 --- a/ompi/mca/pml/dr/pml_dr_sendreq.h +++ b/ompi/mca/pml/dr/pml_dr_sendreq.h @@ -100,10 +100,10 @@ OBJ_CLASS_DECLARATION(mca_pml_dr_send_request_t); sendmode, \ persistent) \ do { \ - mca_pml_dr_endpoint_t* endpoint = \ - (mca_pml_dr_endpoint_t*)sendreq->req_send.req_base.req_proc->proc_pml; \ + mca_bml_base_endpoint_t* endpoint = \ + sendreq->req_send.req_base.req_proc->proc_bml; \ bool do_csum = mca_pml_dr.enable_csum && \ - (endpoint->base.btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM); \ + (endpoint->btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM); \ /* increment reference counts */ \ OBJ_RETAIN(comm); \ OBJ_RETAIN(datatype); \ @@ -149,24 +149,27 @@ do { #define MCA_PML_DR_SEND_REQUEST_START(sendreq, rc) \ do { \ mca_pml_dr_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm; \ - mca_pml_dr_endpoint_t* endpoint = \ + mca_pml_dr_endpoint_t* pml_endpoint = \ (mca_pml_dr_endpoint_t*)sendreq->req_send.req_base.req_proc->proc_pml; \ + mca_bml_base_endpoint_t* bml_endpoint = \ + sendreq->req_send.req_base.req_proc->proc_bml; \ mca_pml_dr_comm_proc_t* proc = \ comm->procs + sendreq->req_send.req_base.req_peer; \ mca_bml_base_btl_t* bml_btl; \ size_t size = sendreq->req_send.req_bytes_packed; \ size_t eager_limit; \ - if(endpoint == NULL) { \ + if(pml_endpoint == NULL || bml_endpoint == NULL) { \ rc = OMPI_ERR_UNREACH; \ break; \ } \ \ - bml_btl = mca_bml_base_btl_array_get_next(&endpoint->base.btl_eager); \ + bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager); \ MCA_PML_DR_VFRAG_INIT(&sendreq->req_vfrag0); \ - sendreq->req_vfrag0.vf_id = OPAL_THREAD_ADD32(&endpoint->vfrag_seq,1); \ + sendreq->req_vfrag0.vf_id = OPAL_THREAD_ADD32(&pml_endpoint->vfrag_seq,1); \ sendreq->req_vfrag0.bml_btl = bml_btl; \ sendreq->req_vfrag = &sendreq->req_vfrag0; \ - sendreq->req_endpoint = endpoint; \ + sendreq->req_endpoint = pml_endpoint; \ + assert(pml_endpoint->bml_endpoint == bml_endpoint); \ sendreq->req_proc = proc; \ \ sendreq->req_lock = 0; \ @@ -280,9 +283,9 @@ do { * Lookup/allocate a vfrag for the pending send */ -#define MCA_PML_DR_SEND_REQUEST_VFRAG_INIT(sendreq, endpoint, size, vfrag) \ +#define MCA_PML_DR_SEND_REQUEST_VFRAG_INIT(sendreq, pml_endpoint, size, vfrag) \ do { \ - size_t max_send_size = endpoint->base.btl_max_send_size - \ + size_t max_send_size = pml_endpoint->bml_endpoint->btl_max_send_size - \ sizeof(mca_pml_dr_frag_hdr_t); \ size_t div = size / max_send_size; \ \ @@ -309,7 +312,7 @@ do { else \ vfrag->vf_mask = (((uint64_t)1 << vfrag->vf_len) - (uint64_t)1); \ } \ - vfrag->vf_id = OPAL_THREAD_ADD32(&endpoint->vfrag_seq,1); \ + vfrag->vf_id = OPAL_THREAD_ADD32(&pml_endpoint->vfrag_seq,1); \ vfrag->vf_offset = sendreq->req_send_offset; \ vfrag->vf_max_send_size = max_send_size; \ vfrag->vf_send.pval = sendreq; \ @@ -396,7 +399,7 @@ do { \ do { \ mca_pml_dr_endpoint_t* endpoint = sendreq->req_endpoint; \ mca_bml_base_btl_t* bml_btl = \ - mca_bml_base_btl_array_get_next(&endpoint->base.btl_eager); \ + mca_bml_base_btl_array_get_next(&endpoint->bml_endpoint->btl_eager); \ mca_btl_base_descriptor_t *des_old, *des_new; \ mca_pml_dr_hdr_t *hdr; \ bool do_csum = mca_pml_dr.enable_csum && \ diff --git a/ompi/mca/pml/dr/pml_dr_vfrag.c b/ompi/mca/pml/dr/pml_dr_vfrag.c index bcf209e1a4..588ecaa36a 100644 --- a/ompi/mca/pml/dr/pml_dr_vfrag.c +++ b/ompi/mca/pml/dr/pml_dr_vfrag.c @@ -151,15 +151,15 @@ void mca_pml_dr_vfrag_reset(mca_pml_dr_vfrag_t* vfrag) sendreq->req_endpoint = (mca_pml_dr_endpoint_t*)sendreq->req_send.req_base.req_proc->proc_pml; /* make sure a path is available */ - if(mca_bml_base_btl_array_get_size(&sendreq->req_endpoint->base.btl_eager) == 0 || - mca_bml_base_btl_array_get_size(&sendreq->req_endpoint->base.btl_eager) == 0) { + if(mca_bml_base_btl_array_get_size(&sendreq->req_endpoint->bml_endpoint->btl_eager) == 0 || + mca_bml_base_btl_array_get_size(&sendreq->req_endpoint->bml_endpoint->btl_eager) == 0) { opal_output(0, "%s:%d:%s: no path to peer", __FILE__, __LINE__, __func__); orte_errmgr.abort(); } if(vfrag->vf_offset == 0) { - vfrag->bml_btl = mca_bml_base_btl_array_get_next(&sendreq->req_endpoint->base.btl_eager); + vfrag->bml_btl = mca_bml_base_btl_array_get_next(&sendreq->req_endpoint->bml_endpoint->btl_eager); } else { - vfrag->bml_btl = mca_bml_base_btl_array_get_next(&sendreq->req_endpoint->base.btl_send); + vfrag->bml_btl = mca_bml_base_btl_array_get_next(&sendreq->req_endpoint->bml_endpoint->btl_send); } opal_output(0, "%s:%d:%s: selected new BTL: %s", __FILE__, __LINE__, __func__, vfrag->bml_btl->btl->btl_component->btl_version.mca_component_name); diff --git a/ompi/mca/pml/ob1/pml_ob1.c b/ompi/mca/pml/ob1/pml_ob1.c index 65dad10f7d..ef52106962 100644 --- a/ompi/mca/pml/ob1/pml_ob1.c +++ b/ompi/mca/pml/ob1/pml_ob1.c @@ -115,6 +115,7 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) ompi_bitmap_t reachable; struct mca_bml_base_endpoint_t ** bml_endpoints = NULL; int rc; + size_t i; if(nprocs == 0) return OMPI_SUCCESS; @@ -154,6 +155,12 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) mca_pml_ob1.free_list_inc, NULL); + /* we don't have any endpoint data we need to cache on the + ompi_proc_t, so set proc_pml to NULL */ + for (i = 0 ; i < nprocs ; ++i) { + procs[i]->proc_pml = NULL; + } + if ( NULL != bml_endpoints ) { free ( bml_endpoints) ; } @@ -189,7 +196,7 @@ int mca_pml_ob1_dump(struct ompi_communicator_t* comm, int verbose) /* iterate through all procs on communicator */ for(i=0; inum_procs; i++) { mca_pml_ob1_comm_proc_t* proc = &pml_comm->procs[i]; - mca_bml_base_endpoint_t* ep = (mca_bml_base_endpoint_t*)proc->proc_ompi->proc_pml; + mca_bml_base_endpoint_t* ep = (mca_bml_base_endpoint_t*)proc->proc_ompi->proc_bml; size_t n; opal_output(0, "[Rank %d]\n", i); diff --git a/ompi/mca/pml/ob1/pml_ob1_component.c b/ompi/mca/pml/ob1/pml_ob1_component.c index e69cf962a4..f866858aed 100644 --- a/ompi/mca/pml/ob1/pml_ob1_component.c +++ b/ompi/mca/pml/ob1/pml_ob1_component.c @@ -224,8 +224,7 @@ mca_pml_base_module_t* mca_pml_ob1_component_init(int* priority, if(OMPI_SUCCESS != mca_bml_base_init( enable_progress_threads, - enable_mpi_threads, - OBJ_CLASS(mca_bml_base_endpoint_t))) { + enable_mpi_threads)) { return NULL; } /* As our own progress function does nothing except calling the BML diff --git a/ompi/mca/pml/ob1/pml_ob1_recvreq.c b/ompi/mca/pml/ob1/pml_ob1_recvreq.c index 3abc76fb4f..96e8ef44a7 100644 --- a/ompi/mca/pml/ob1/pml_ob1_recvreq.c +++ b/ompi/mca/pml/ob1/pml_ob1_recvreq.c @@ -170,7 +170,7 @@ static void mca_pml_ob1_recv_request_ack( mca_pml_ob1_ack_hdr_t* ack; int rc; - bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml; + bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_bml; bml_btl = mca_bml_base_btl_array_get_next(&bml_endpoint->btl_eager); if(hdr->hdr_msg_length > bytes_received) { @@ -392,7 +392,7 @@ static void mca_pml_ob1_recv_request_rget( int rc; /* lookup bml datastructures */ - bml_endpoint = (mca_bml_base_endpoint_t*)recvreq->req_recv.req_base.req_proc->proc_pml; + bml_endpoint = (mca_bml_base_endpoint_t*)recvreq->req_recv.req_base.req_proc->proc_bml; bml_btl = mca_bml_base_btl_array_find(&bml_endpoint->btl_eager, btl); if(NULL == bml_btl) { opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__); @@ -592,7 +592,7 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq) { if(OPAL_THREAD_ADD32(&recvreq->req_lock,1) == 1) { ompi_proc_t* proc = recvreq->req_recv.req_base.req_proc; - mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_pml; + mca_bml_base_endpoint_t* bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_bml; mca_bml_base_btl_t* bml_btl; bool ack = false; do { diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.h b/ompi/mca/pml/ob1/pml_ob1_sendreq.h index 504ca3bb93..fcdc246d4f 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.h @@ -128,7 +128,7 @@ do { do { \ mca_pml_ob1_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm; \ mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*) \ - sendreq->req_send.req_base.req_proc->proc_pml; \ + sendreq->req_send.req_base.req_proc->proc_bml; \ mca_bml_base_btl_t* bml_btl; \ size_t size = sendreq->req_send.req_bytes_packed; \ size_t eager_limit; \ diff --git a/ompi/mca/pml/pml.h b/ompi/mca/pml/pml.h index 261358b003..22fae0be19 100644 --- a/ompi/mca/pml/pml.h +++ b/ompi/mca/pml/pml.h @@ -74,6 +74,16 @@ extern "C" { typedef uint64_t mca_pml_sequence_t; +/** + * Base PML proc structure + * + * Base PML structure for caching proc information on a communicator. + * A PML should maintain an array of pointers to mca_pml_proc_t + * structures in the c_pml_procs structure of every communicator. + * Note that the mca_pml_proc_t structure can not be instantiated + * directly, so each PML *must* provide a class that inherits from + * this class and provides the necessary integration logic. + */ struct mca_pml_proc_t { opal_list_item_t super; struct ompi_proc_t *proc_ompi; /**< back-pointer to ompi_proc_t */ @@ -81,6 +91,22 @@ struct mca_pml_proc_t { }; typedef struct mca_pml_proc_t mca_pml_proc_t; + +/** + * Base PML endpoint structure + * + * Base PML structure for caching endpoint information on a proc. A + * pointer to an mca_pml_endpoint_t is maintained on each ompi_proc_t, + * in the proc_pml field, to provide per-process cache information. + * The data is opaque to the active PML -- no other subsystem will + * attempt to access the information in the cache. + * + * The PML is responsible for allocation and deallocation of the + * endpoint data during pml_add_procs and pml_del_procs. + */ +struct mca_pml_endpoint_t; + + typedef enum { MCA_PML_BASE_SEND_STANDARD, MCA_PML_BASE_SEND_BUFFERED, diff --git a/ompi/proc/proc.c b/ompi/proc/proc.c index 41592ef2ca..fc1cc1a550 100644 --- a/ompi/proc/proc.c +++ b/ompi/proc/proc.c @@ -55,6 +55,7 @@ OBJ_CLASS_INSTANCE( void ompi_proc_construct(ompi_proc_t* proc) { + proc->proc_bml = NULL; proc->proc_pml = NULL; proc->proc_modex = NULL; OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t); diff --git a/ompi/proc/proc.h b/ompi/proc/proc.h index e61aacb87c..76a76ced41 100644 --- a/ompi/proc/proc.h +++ b/ompi/proc/proc.h @@ -37,8 +37,10 @@ struct ompi_proc_t { opal_list_item_t super; /** this process' name */ orte_process_name_t proc_name; + /** BML specific proc data */ + struct mca_bml_base_endpoint_t* proc_bml; /** PML specific proc data */ - struct mca_pml_proc_t* proc_pml; + struct mca_pml_base_endpoint_t* proc_pml; /** MCA module exchange data */ opal_object_t* proc_modex; /** architecture of this process */ diff --git a/ompi/tools/ompi_info/components.cc b/ompi/tools/ompi_info/components.cc index e5334706e6..c94214eec8 100644 --- a/ompi/tools/ompi_info/components.cc +++ b/ompi/tools/ompi_info/components.cc @@ -58,6 +58,8 @@ #include "ompi/mca/rcache/base/base.h" #include "ompi/mca/btl/btl.h" #include "ompi/mca/btl/base/base.h" +#include "ompi/mca/mtl/mtl.h" +#include "ompi/mca/mtl/base/base.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" #include "ompi/mca/osc/osc.h" @@ -249,6 +251,9 @@ void ompi_info::open_components() mca_btl_base_open(); component_map["btl"] = &mca_btl_base_components_opened; + ompi_mtl_base_open(); + component_map["mtl"] = &ompi_mtl_base_components_opened; + mca_topo_base_open(); component_map["topo"] = &mca_topo_base_components_opened; @@ -270,6 +275,7 @@ void ompi_info::close_components() mca_topo_base_close(); // the PML has to call the base PTL close function. mca_btl_base_close(); + ompi_mtl_base_close(); mca_pml_base_close(); mca_mpool_base_close(); mca_rcache_base_close(); diff --git a/ompi/tools/ompi_info/ompi_info.cc b/ompi/tools/ompi_info/ompi_info.cc index 01fd7cd3c2..79c9c11217 100644 --- a/ompi/tools/ompi_info/ompi_info.cc +++ b/ompi/tools/ompi_info/ompi_info.cc @@ -182,8 +182,10 @@ int main(int argc, char *argv[]) ompi_info::mca_types.push_back("bml"); ompi_info::mca_types.push_back("rcache"); ompi_info::mca_types.push_back("btl"); + ompi_info::mca_types.push_back("mtl"); ompi_info::mca_types.push_back("topo"); ompi_info::mca_types.push_back("osc"); + ompi_info::mca_types.push_back("common"); ompi_info::mca_types.push_back("errmgr"); ompi_info::mca_types.push_back("gpr");