From 66dadbe1e7ac647768908813c77efda737b95b3a Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Tue, 8 Oct 2013 15:18:59 +0000 Subject: [PATCH] Per RFC, remove the udapl BTL. This commit was SVN r29400. --- ompi/mca/btl/udapl/Makefile.am | 63 - ompi/mca/btl/udapl/btl_udapl.c | 1344 ---------------- ompi/mca/btl/udapl/btl_udapl.h | 468 ------ ompi/mca/btl/udapl/btl_udapl_component.c | 1223 --------------- ompi/mca/btl/udapl/btl_udapl_eager_rdma.h | 105 -- ompi/mca/btl/udapl/btl_udapl_endpoint.c | 1708 --------------------- ompi/mca/btl/udapl/btl_udapl_endpoint.h | 218 --- ompi/mca/btl/udapl/btl_udapl_frag.c | 134 -- ompi/mca/btl/udapl/btl_udapl_frag.h | 203 --- ompi/mca/btl/udapl/btl_udapl_mca.c | 314 ---- ompi/mca/btl/udapl/btl_udapl_mca.h | 63 - ompi/mca/btl/udapl/btl_udapl_proc.c | 312 ---- ompi/mca/btl/udapl/btl_udapl_proc.h | 67 - ompi/mca/btl/udapl/configure.m4 | 62 - ompi/mca/btl/udapl/help-mpi-btl-udapl.txt | 139 -- 15 files changed, 6423 deletions(-) delete mode 100644 ompi/mca/btl/udapl/Makefile.am delete mode 100644 ompi/mca/btl/udapl/btl_udapl.c delete mode 100644 ompi/mca/btl/udapl/btl_udapl.h delete mode 100644 ompi/mca/btl/udapl/btl_udapl_component.c delete mode 100644 ompi/mca/btl/udapl/btl_udapl_eager_rdma.h delete mode 100644 ompi/mca/btl/udapl/btl_udapl_endpoint.c delete mode 100644 ompi/mca/btl/udapl/btl_udapl_endpoint.h delete mode 100644 ompi/mca/btl/udapl/btl_udapl_frag.c delete mode 100644 ompi/mca/btl/udapl/btl_udapl_frag.h delete mode 100644 ompi/mca/btl/udapl/btl_udapl_mca.c delete mode 100644 ompi/mca/btl/udapl/btl_udapl_mca.h delete mode 100644 ompi/mca/btl/udapl/btl_udapl_proc.c delete mode 100644 ompi/mca/btl/udapl/btl_udapl_proc.h delete mode 100644 ompi/mca/btl/udapl/configure.m4 delete mode 100644 ompi/mca/btl/udapl/help-mpi-btl-udapl.txt diff --git a/ompi/mca/btl/udapl/Makefile.am b/ompi/mca/btl/udapl/Makefile.am deleted file mode 100644 index 210cdff990..0000000000 --- a/ompi/mca/btl/udapl/Makefile.am +++ /dev/null @@ -1,63 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -CFLAGS = $(btl_udapl_CFLAGS) -AM_CPPFLAGS = $(btl_udapl_CPPFLAGS) -DPKGDATADIR=\"$(pkgdatadir)\" - -dist_pkgdata_DATA = \ - help-mpi-btl-udapl.txt - -udapl_sources = \ - btl_udapl.c \ - btl_udapl.h \ - btl_udapl_component.c \ - btl_udapl_eager_rdma.h \ - btl_udapl_endpoint.c \ - btl_udapl_endpoint.h \ - btl_udapl_frag.c \ - btl_udapl_frag.h \ - btl_udapl_mca.c \ - btl_udapl_mca.h \ - btl_udapl_proc.c \ - btl_udapl_proc.h - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_ompi_btl_udapl_DSO -component_noinst = -component_install = mca_btl_udapl.la -else -component_noinst = libmca_btl_udapl.la -component_install = -endif - -mcacomponentdir = $(pkglibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_btl_udapl_la_SOURCES = $(udapl_sources) -mca_btl_udapl_la_LIBADD = $(btl_udapl_LIBS) -mca_btl_udapl_la_LDFLAGS = -module -avoid-version $(btl_udapl_LDFLAGS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_btl_udapl_la_SOURCES = $(udapl_sources) -libmca_btl_udapl_la_LIBADD = $(btl_udapl_LIBS) -libmca_btl_udapl_la_LDFLAGS = -module -avoid-version $(btl_udapl_LDFLAGS) diff --git a/ompi/mca/btl/udapl/btl_udapl.c b/ompi/mca/btl/udapl/btl_udapl.c deleted file mode 100644 index ff484009a7..0000000000 --- a/ompi/mca/btl/udapl/btl_udapl.c +++ /dev/null @@ -1,1344 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006 Sandia National Laboratories. All rights - * reserved. - * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include -#include -#include "opal/class/opal_bitmap.h" -#include "opal/util/if.h" -#include "ompi/mca/btl/btl.h" - -#include "btl_udapl.h" -#include "btl_udapl_endpoint.h" -#include "btl_udapl_frag.h" -#include "btl_udapl_mca.h" -#include "btl_udapl_proc.h" -#include "opal/datatype/opal_convertor.h" -#include "ompi/mca/mpool/base/base.h" -#include "ompi/mca/mpool/grdma/mpool_grdma.h" -#include "ompi/mca/btl/base/btl_base_error.h" -#include "ompi/proc/proc.h" - -static int udapl_reg_mr(void *reg_data, void *base, size_t size, - mca_mpool_base_registration_t *reg); -static int udapl_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg); -static int mca_btl_udapl_set_peer_parameters( - struct mca_btl_udapl_module_t* udapl_btl, - size_t nprocs); -static int mca_btl_udapl_assign_netmask(mca_btl_udapl_module_t* udapl_btl); - -mca_btl_udapl_module_t mca_btl_udapl_module = { - { - &mca_btl_udapl_component.super, - 0, /* max size of first fragment */ - 0, /* min send fragment size */ - 0, /* max send fragment size */ - 0, /* btl_rdma_pipeline_send_length */ - 0, /* btl_rdma_pipeline_frag_size */ - 0, /* btl_min_rdma_pipeline_size */ - 0, /* exclusivity */ - 0, /* latency */ - 0, /* bandwidth */ - MCA_BTL_FLAGS_SEND, - 0, /* segment size */ - mca_btl_udapl_add_procs, - mca_btl_udapl_del_procs, - NULL, - mca_btl_udapl_finalize, - mca_btl_udapl_alloc, - mca_btl_udapl_free, - mca_btl_udapl_prepare_src, - mca_btl_udapl_prepare_dst, - mca_btl_udapl_send, - NULL, /* send immediate */ - mca_btl_udapl_put, - NULL, /* get */ - mca_btl_base_dump, - NULL, /* mpool */ - NULL, /* register error cb */ - mca_btl_udapl_ft_event - } -}; - -static int udapl_reg_mr(void *reg_data, void *base, size_t size, - mca_mpool_base_registration_t *reg) -{ - mca_btl_udapl_module_t *btl = (mca_btl_udapl_module_t*)reg_data; - mca_btl_udapl_reg_t *udapl_reg = (mca_btl_udapl_reg_t*)reg; - DAT_REGION_DESCRIPTION region; - DAT_VLEN dat_size; - DAT_VADDR dat_addr; - int rc; - DAT_MEM_TYPE lmr_mem_type = DAT_MEM_TYPE_VIRTUAL; - - region.for_va = base; - udapl_reg->lmr_triplet.virtual_address = (DAT_VADDR)(uintptr_t)base; - udapl_reg->lmr_triplet.segment_length = size; - udapl_reg->lmr = NULL; - -#if HAVE_DAT_MEM_TYPE_SO_VIRTUAL - if (reg->flags & MCA_MPOOL_FLAGS_SO_MEM) { - lmr_mem_type = DAT_MEM_TYPE_SO_VIRTUAL; - } -#endif - - rc = dat_lmr_create(btl->udapl_ia, lmr_mem_type, region, size, - btl->udapl_pz, DAT_MEM_PRIV_ALL_FLAG, &udapl_reg->lmr, - &udapl_reg->lmr_triplet.lmr_context, &udapl_reg->rmr_context, - &dat_size, &dat_addr); - - if(rc != DAT_SUCCESS) { - BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt", - "dat_lmr_create DAT_INSUFFICIENT_RESOURCES", true)); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - return OMPI_SUCCESS; -} - -static int udapl_dereg_mr(void *reg_data, mca_mpool_base_registration_t *reg) -{ - mca_btl_udapl_reg_t *udapl_reg = (mca_btl_udapl_reg_t*)reg; - int rc; - - if(udapl_reg->lmr != NULL) { - rc = dat_lmr_free(udapl_reg->lmr); - if(rc != DAT_SUCCESS) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_lmr_free", - major, minor)); - return OMPI_ERROR; - } - } - - return OMPI_SUCCESS; -} - -/** - * Initialize module module resources. - */ - -int -mca_btl_udapl_init(DAT_NAME_PTR ia_name, mca_btl_udapl_module_t* btl) -{ - mca_mpool_base_resources_t res; - DAT_CONN_QUAL port; - DAT_RETURN rc; - - /* open the uDAPL interface */ - btl->udapl_evd_async = DAT_HANDLE_NULL; - rc = dat_ia_open(ia_name, btl->udapl_async_evd_qlen, - &btl->udapl_evd_async, &btl->udapl_ia); - if(DAT_SUCCESS != rc) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - -#if defined(__SVR4) && defined(__sun) - if (strcmp(major, "DAT_INVALID_PARAMETER") == 0 && - strcmp(minor, "DAT_INVALID_RO_COOKIE") == 0) { - /* Some platforms that Solaris runs on implement the PCI - * standard for relaxed ordering(RO). Using RDMA with - * polling on a memory location as the uDAPL (and openib - * by the way) BTL does for short messages with - * relaxed ordering could potentially produce silent data - * corruption. For this reason we need to take extra - * steps and this is accomplished by setting - * "ro_aware_system = 1" and handling as required. - * - * The uDAPL standard does not provide an interface to - * inform users of this scenario so Sun has implemented the - * following: If a platform supports relaxed ordering - * when the interface name is passed into the - * dat_ia_open() call, the call will return - * DAT_INVALID_PARAMETER and DAT_INVALID_RO_COOKIE. - * DAT_INVALID_RO_COOKIE is not part of the uDAPL standard - * at this time. The only way to open this interface is - * to prefix the following cookie "RO_AWARE_" to the ia - * name that was retreived from the dat registry. - * - * Example: ia_name = "ib0", new expected name will be - * "RO_AWARE_ib0". - * - * Here, since our first ia open attempt failed in the - * standard way, add the cookie and try to open again. - */ - DAT_NAME_PTR ro_ia_name; - - /* prefix relaxed order cookie to ia_name */ - asprintf(&ro_ia_name, "RO_AWARE_%s", ia_name); - if (NULL == ro_ia_name) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* because this is not standard inform user in some way */ - BTL_UDAPL_VERBOSE_HELP(VERBOSE_INFORM, - ("help-mpi-btl-udapl.txt", "relaxed order support", - true, ia_name, ro_ia_name)); - - /* try and open again */ - btl->udapl_evd_async = DAT_HANDLE_NULL; - rc = dat_ia_open(ro_ia_name, btl->udapl_async_evd_qlen, - &btl->udapl_evd_async, &btl->udapl_ia); - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - - if (DAT_SUCCESS == rc) { - mca_btl_udapl_component.ro_aware_system = 1; - free(ro_ia_name); - } else { - BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, - ("help-mpi-btl-udapl.txt", - "dat_ia_open fail RO", true, ro_ia_name, - major, minor, ia_name)); - - free(ro_ia_name); - return OMPI_ERROR; - } - } else { -#endif - BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt", - "dat_ia_open fail", true, ia_name, major, minor)); - - return OMPI_ERROR; -#if defined(__SVR4) && defined(__sun) - } -#endif - } - - /* create a protection zone */ - rc = dat_pz_create(btl->udapl_ia, &btl->udapl_pz); - if(DAT_SUCCESS != rc) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_pz_create", - major, minor)); - goto failure; - } - - /* query to get address information */ - rc = dat_ia_query(btl->udapl_ia, &btl->udapl_evd_async, - DAT_IA_ALL, &(btl->udapl_ia_attr), 0, NULL); - if(DAT_SUCCESS != rc) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_ia_query", - major, minor)); - goto failure; - } - - memcpy(&btl->udapl_addr.addr, (btl->udapl_ia_attr).ia_address_ptr, - sizeof(DAT_SOCK_ADDR)); - - /* determine netmask */ - mca_btl_udapl_assign_netmask(btl); - - /* check evd qlen against adapter max */ - if (btl->udapl_dto_evd_qlen > (btl->udapl_ia_attr).max_evd_qlen) { - BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt", - "evd_qlen adapter max", - true, - "btl_udapl_dto_evd_qlen", - btl->udapl_dto_evd_qlen, - (btl->udapl_ia_attr).max_evd_qlen)); - btl->udapl_dto_evd_qlen = btl->udapl_ia_attr.max_evd_qlen; - } - if (btl->udapl_conn_evd_qlen > (btl->udapl_ia_attr).max_evd_qlen) { - BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt", - "evd_qlen adapter max", - true, - "btl_udapl_conn_evd_qlen", - btl->udapl_conn_evd_qlen, - (btl->udapl_ia_attr).max_evd_qlen)); - btl->udapl_conn_evd_qlen = btl->udapl_ia_attr.max_evd_qlen; - } - - /* set up evd's */ - rc = dat_evd_create(btl->udapl_ia, - btl->udapl_dto_evd_qlen, DAT_HANDLE_NULL, - DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG, &btl->udapl_evd_dto); - if(DAT_SUCCESS != rc) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_evd_create (dto)", - major, minor)); - goto failure; - } - - rc = dat_evd_create(btl->udapl_ia, - btl->udapl_conn_evd_qlen, DAT_HANDLE_NULL, - DAT_EVD_CR_FLAG | DAT_EVD_CONNECTION_FLAG, &btl->udapl_evd_conn); - if(DAT_SUCCESS != rc) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_evd_create (conn)", - major, minor)); - goto failure; - } - - /* create our public service point */ - rc = dat_psp_create_any(btl->udapl_ia, &port, btl->udapl_evd_conn, - DAT_PSP_CONSUMER_FLAG, &btl->udapl_psp); - if(DAT_SUCCESS != rc) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_psp_create_any", - major, minor)); - goto failure; - } - - /* establish endpoint parameters */ - rc = mca_btl_udapl_endpoint_get_params(btl, &(btl->udapl_ep_param)); - if(OMPI_SUCCESS != rc) { - /* by not erroring out here we can try to continue with - * the default endpoint parameter values - */ - BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt", - "use default endpoint params", - true)); - } - - /* Save the port with the address information */ - /* TODO - since we're doing the hack below, do we need our own port? */ - btl->udapl_addr.port = port; - - /* Using dat_ep_query to obtain the remote port would be ideal but - * since the current udapl implementations don't seem to support - * this we store the port in udapl_addr and explictly exchange the - * information later. - */ - ((struct sockaddr_in*)&btl->udapl_addr.addr)->sin_port = htons(port); - - /* initialize the memory pool */ - res.pool_name = "udapl"; - res.reg_data = btl; - res.sizeof_reg = sizeof(mca_btl_udapl_reg_t); - res.register_mem = udapl_reg_mr; - res.deregister_mem = udapl_dereg_mr; - btl->super.btl_mpool = mca_mpool_base_module_create( - mca_btl_udapl_component.udapl_mpool_name, &btl->super, &res); - if (NULL == btl->super.btl_mpool) { - BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_INFORM, - ("WARNING: Failed to create mpool.")); - goto failure; - } - - /* initialize objects */ - OBJ_CONSTRUCT(&btl->udapl_frag_eager, ompi_free_list_t); - OBJ_CONSTRUCT(&btl->udapl_frag_eager_recv, ompi_free_list_t); - OBJ_CONSTRUCT(&btl->udapl_frag_max, ompi_free_list_t); - OBJ_CONSTRUCT(&btl->udapl_frag_max_recv, ompi_free_list_t); - OBJ_CONSTRUCT(&btl->udapl_frag_user, ompi_free_list_t); - OBJ_CONSTRUCT(&btl->udapl_frag_control, ompi_free_list_t); - OBJ_CONSTRUCT(&btl->udapl_lock, opal_mutex_t); - - /* check buffer alignment against dat library */ - if (mca_btl_udapl_component.udapl_buffer_alignment != - DAT_OPTIMAL_ALIGNMENT) { - - BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt", - "optimal buffer alignment mismatch", - true, - DAT_OPTIMAL_ALIGNMENT, - mca_btl_udapl_component.udapl_buffer_alignment, - DAT_OPTIMAL_ALIGNMENT)); - } - - /* initialize free lists */ - ompi_free_list_init_ex_new(&btl->udapl_frag_eager, - sizeof(mca_btl_udapl_frag_eager_t) + - mca_btl_udapl_component.udapl_eager_frag_size, - mca_btl_udapl_component.udapl_buffer_alignment, - OBJ_CLASS(mca_btl_udapl_frag_eager_t), - mca_btl_udapl_component.udapl_eager_frag_size, - mca_btl_udapl_component.udapl_buffer_alignment, - mca_btl_udapl_component.udapl_free_list_num, - mca_btl_udapl_component.udapl_free_list_max, - mca_btl_udapl_component.udapl_free_list_inc, - btl->super.btl_mpool, - NULL, - NULL); - - ompi_free_list_init_ex_new(&btl->udapl_frag_eager_recv, - sizeof(mca_btl_udapl_frag_eager_t) + - mca_btl_udapl_component.udapl_eager_frag_size, - mca_btl_udapl_component.udapl_buffer_alignment, - OBJ_CLASS(mca_btl_udapl_frag_eager_t), - mca_btl_udapl_component.udapl_eager_frag_size, - mca_btl_udapl_component.udapl_buffer_alignment, - mca_btl_udapl_component.udapl_free_list_num, - mca_btl_udapl_component.udapl_free_list_max, - mca_btl_udapl_component.udapl_free_list_inc, - btl->super.btl_mpool, - NULL, - NULL); - - ompi_free_list_init_ex_new(&btl->udapl_frag_max, - sizeof(mca_btl_udapl_frag_max_t) + - mca_btl_udapl_component.udapl_max_frag_size, - mca_btl_udapl_component.udapl_buffer_alignment, - OBJ_CLASS(mca_btl_udapl_frag_max_t), - mca_btl_udapl_component.udapl_max_frag_size, - mca_btl_udapl_component.udapl_buffer_alignment, - mca_btl_udapl_component.udapl_free_list_num, - mca_btl_udapl_component.udapl_free_list_max, - mca_btl_udapl_component.udapl_free_list_inc, - btl->super.btl_mpool, - NULL, - NULL); - - ompi_free_list_init_ex_new(&btl->udapl_frag_max_recv, - sizeof(mca_btl_udapl_frag_max_t) + - mca_btl_udapl_component.udapl_max_frag_size, - mca_btl_udapl_component.udapl_buffer_alignment, - OBJ_CLASS(mca_btl_udapl_frag_max_t), - mca_btl_udapl_component.udapl_max_frag_size, - mca_btl_udapl_component.udapl_buffer_alignment, - mca_btl_udapl_component.udapl_free_list_num, - mca_btl_udapl_component.udapl_free_list_max, - mca_btl_udapl_component.udapl_free_list_inc, - btl->super.btl_mpool, - NULL, - NULL); - - ompi_free_list_init_ex_new(&btl->udapl_frag_user, - sizeof(mca_btl_udapl_frag_user_t), - mca_btl_udapl_component.udapl_buffer_alignment, - OBJ_CLASS(mca_btl_udapl_frag_user_t), - 0,0, - mca_btl_udapl_component.udapl_free_list_num, - mca_btl_udapl_component.udapl_free_list_max, - mca_btl_udapl_component.udapl_free_list_inc, - NULL, - NULL, - NULL); - - ompi_free_list_init_ex_new(&btl->udapl_frag_control, - sizeof(mca_btl_udapl_frag_eager_t) + - mca_btl_udapl_component.udapl_eager_frag_size, - mca_btl_udapl_component.udapl_buffer_alignment, - OBJ_CLASS(mca_btl_udapl_frag_eager_t), - mca_btl_udapl_component.udapl_eager_frag_size, - mca_btl_udapl_component.udapl_buffer_alignment, - mca_btl_udapl_component.udapl_free_list_num, - -1, - mca_btl_udapl_component.udapl_free_list_inc, - btl->super.btl_mpool, - NULL, - NULL); - - /* initialize eager rdma buffer info */ - btl->udapl_eager_rdma_endpoints = OBJ_NEW(opal_pointer_array_t); - opal_pointer_array_init(btl->udapl_eager_rdma_endpoints, - mca_btl_udapl_component.udapl_max_eager_rdma_peers, - mca_btl_udapl_component.udapl_max_eager_rdma_peers, - 0); - btl->udapl_eager_rdma_endpoint_count = 0; - OBJ_CONSTRUCT(&btl->udapl_eager_rdma_lock, opal_mutex_t); - - /* initialize miscellaneous variables */ - btl->udapl_async_events = 0; - btl->udapl_connect_inprogress = 0; - btl->udapl_num_peers = 0; - - /* TODO - Set up SRQ when it is supported */ - return OMPI_SUCCESS; - -failure: - dat_ia_close(btl->udapl_ia, DAT_CLOSE_ABRUPT_FLAG); - return OMPI_ERROR; -} - -/* - * Cleanup/release module resources. - */ - -int mca_btl_udapl_finalize(struct mca_btl_base_module_t* base_btl) -{ - mca_btl_udapl_module_t* udapl_btl = (mca_btl_udapl_module_t*) base_btl; - int32_t i; - - /* - * Cleaning up the endpoints here because mca_btl_udapl_del_procs - * is never called by upper layers. - * Note: this is only looking at those endpoints which are available - * off of the btl module rdma list. - */ - for (i=0; i < udapl_btl->udapl_eager_rdma_endpoint_count; i++) { - mca_btl_udapl_endpoint_t* endpoint = - opal_pointer_array_get_item(udapl_btl->udapl_eager_rdma_endpoints, - i); - - OBJ_DESTRUCT(endpoint); - } - - /* release uDAPL resources */ - dat_evd_free(udapl_btl->udapl_evd_dto); - dat_evd_free(udapl_btl->udapl_evd_conn); - dat_pz_free(udapl_btl->udapl_pz); - dat_ia_close(udapl_btl->udapl_ia, DAT_CLOSE_GRACEFUL_FLAG); - - /* destroy objects */ - OBJ_DESTRUCT(&udapl_btl->udapl_lock); - OBJ_DESTRUCT(&udapl_btl->udapl_frag_eager); - OBJ_DESTRUCT(&udapl_btl->udapl_frag_eager_recv); - OBJ_DESTRUCT(&udapl_btl->udapl_frag_max); - OBJ_DESTRUCT(&udapl_btl->udapl_frag_max_recv); - OBJ_DESTRUCT(&udapl_btl->udapl_frag_user); - OBJ_DESTRUCT(&udapl_btl->udapl_frag_control); - OBJ_DESTRUCT(&udapl_btl->udapl_eager_rdma_lock); - - /* destroy mpool */ - if (OMPI_SUCCESS != - mca_mpool_base_module_destroy(udapl_btl->super.btl_mpool)) { - BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_INFORM, - ("WARNING: Failed to release mpool")); - return OMPI_ERROR; - } - - free(udapl_btl); - return OMPI_SUCCESS; -} - - -/* - * Adjust parameters that are dependent on the number of peers. - * - * @param udapl_btl (IN) BTL module - * @param nprocs (IN) number of processes handed into - * mca_btl_udapl_add_procs() - * @return OMPI_SUCCESS or error status on failure - */ - -static int mca_btl_udapl_set_peer_parameters( - struct mca_btl_udapl_module_t* udapl_btl, - size_t nprocs) -{ - int rc = OMPI_SUCCESS; - DAT_RETURN dat_rc = DAT_SUCCESS; - uint potential_udapl_timeout; - int first_time_sizing = (udapl_btl->udapl_num_peers == 0 ? 1 : 0); - DAT_EVD_PARAM evd_param; - - /* nprocs includes self so subtract 1 */ - udapl_btl->udapl_num_peers += nprocs - 1; - - /* resize dto_evd_qlen if not already at its max */ - if (udapl_btl->udapl_dto_evd_qlen != - udapl_btl->udapl_ia_attr.max_evd_qlen) { - - int potential_dto_evd_qlen; - int max_connection_dto_events; - int eager_connection_dto_events; - - /* eager connection dto events already factored into - * max_recv/request_dtos but need to calculate max connection dtos; - * see mca_btl_udapl_get_params() for max_recv/request_dtos - */ - eager_connection_dto_events = udapl_btl->udapl_max_recv_dtos + - udapl_btl->udapl_max_request_dtos; - max_connection_dto_events = mca_btl_udapl_component.udapl_num_recvs + - mca_btl_udapl_component.udapl_num_sends + - (mca_btl_udapl_component.udapl_num_recvs / - mca_btl_udapl_component.udapl_sr_win) + 1; - potential_dto_evd_qlen = udapl_btl->udapl_num_peers * - (eager_connection_dto_events + max_connection_dto_events); - - /* here we use what the library calculates as the - * potential_dto_evd_qlen unless the user has set - */ - if (first_time_sizing) { - if (udapl_btl->udapl_dto_evd_qlen < potential_dto_evd_qlen) { - if (MCA_BTL_UDAPL_DTO_EVD_QLEN_DEFAULT != - udapl_btl->udapl_dto_evd_qlen) { - - /* user modified so warn */ - BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, - ("help-mpi-btl-udapl.txt", - "evd_qlen too low", - true, - "btl_udapl_dto_evd_qlen", - udapl_btl->udapl_dto_evd_qlen, - "btl_udapl_dto_evd_qlen", - potential_dto_evd_qlen)); - } else { - udapl_btl->udapl_dto_evd_qlen = potential_dto_evd_qlen; - } - } - } else { - /* since this is not the first time attempting to resize the - * evd queue length just use the potential value; this may not - * be the best solution - */ - udapl_btl->udapl_dto_evd_qlen = potential_dto_evd_qlen; - } - - udapl_btl->udapl_dto_evd_qlen = ((udapl_btl->udapl_dto_evd_qlen > - udapl_btl->udapl_ia_attr.max_evd_qlen) ? - udapl_btl->udapl_ia_attr.max_evd_qlen : - udapl_btl->udapl_dto_evd_qlen); - - /* OFED stack does not return DAT_INVALID_STATE when - * the new qlen is less than current value so here we find - * current value and if greater than what we intend to set - * it to skip the resize. - */ - dat_rc = dat_evd_query(udapl_btl->udapl_evd_dto, - DAT_EVD_FIELD_EVD_QLEN, &evd_param); - if(DAT_SUCCESS != dat_rc) { - char* major; - char* minor; - - dat_strerror(dat_rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_evd_query", - major, minor)); - } - - if (udapl_btl->udapl_dto_evd_qlen > evd_param.evd_qlen) { - /* resize dto event dispatcher queue length */ - dat_rc = dat_evd_resize(udapl_btl->udapl_evd_dto, - udapl_btl->udapl_dto_evd_qlen); - if(DAT_SUCCESS != dat_rc) { - char* major; - char* minor; - - dat_strerror(dat_rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_evd_resize", - major, minor)); - rc = OMPI_ERR_OUT_OF_RESOURCE; - } - } - } - - /* resize connection evd qlen */ - if (udapl_btl->udapl_conn_evd_qlen != - udapl_btl->udapl_ia_attr.max_evd_qlen) { - - int potential_conn_evd_qlen = 2 * udapl_btl->udapl_num_peers; - - if (first_time_sizing) { - if (udapl_btl->udapl_conn_evd_qlen < potential_conn_evd_qlen) { - if (MCA_BTL_UDAPL_CONN_EVD_QLEN_DEFAULT != - udapl_btl->udapl_conn_evd_qlen) { - - /* user modified so warn */ - BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, - ("help-mpi-btl-udapl.txt", - "evd_qlen too low", - true, - "btl_udapl_conn_evd_qlen", - udapl_btl->udapl_conn_evd_qlen, - "btl_udapl_conn_evd_qlen", - potential_conn_evd_qlen)); - } else { - udapl_btl->udapl_conn_evd_qlen = potential_conn_evd_qlen; - } - } - } else { - /* since this is not the first time attempting to resize the - * evd queue length just use the potential value; this may not - * be the best solution - */ - udapl_btl->udapl_conn_evd_qlen = potential_conn_evd_qlen; - } - - udapl_btl->udapl_conn_evd_qlen = ((udapl_btl->udapl_conn_evd_qlen > - udapl_btl->udapl_ia_attr.max_evd_qlen) ? - udapl_btl->udapl_ia_attr.max_evd_qlen : - udapl_btl->udapl_conn_evd_qlen); - - /* OFED stack does not return DAT_INVALID_STATE when - * the new qlen is less than current value so here we find - * current value and if greater than what we intend to set - * it to skip the resize. - */ - dat_rc = dat_evd_query(udapl_btl->udapl_evd_conn, - DAT_EVD_FIELD_EVD_QLEN, &evd_param); - if(DAT_SUCCESS != dat_rc) { - char* major; - char* minor; - - dat_strerror(dat_rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_evd_query", - major, minor)); - } - - if (udapl_btl->udapl_conn_evd_qlen > evd_param.evd_qlen) { - /* resize conn evd queue length */ - dat_rc = dat_evd_resize(udapl_btl->udapl_evd_conn, - udapl_btl->udapl_conn_evd_qlen); - if(DAT_SUCCESS != dat_rc) { - char* major; - char* minor; - - dat_strerror(dat_rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_evd_resize", - major, minor)); - rc = OMPI_ERR_OUT_OF_RESOURCE; - } - } - } - - /* adjust connection timeout value, calculated in microseconds */ - potential_udapl_timeout = MCA_BTL_UDAPL_CONN_TIMEOUT_INC * - udapl_btl->udapl_num_peers; - - if (mca_btl_udapl_component.udapl_timeout < - potential_udapl_timeout) { - - if (MCA_BTL_UDAPL_CONN_TIMEOUT_DEFAULT != - mca_btl_udapl_component.udapl_timeout) { - - /* user modified so warn */ - BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, - ("help-mpi-btl-udapl.txt", - "connection timeout low", - true, - "btl_udapl_timeout", - mca_btl_udapl_component.udapl_timeout, - "btl_udapl_timeout", - potential_udapl_timeout)); - } else { - mca_btl_udapl_component.udapl_timeout = - potential_udapl_timeout; - } - } - mca_btl_udapl_component.udapl_timeout = - ((mca_btl_udapl_component.udapl_timeout > - MCA_BTL_UDAPL_CONN_TIMEOUT_MAX) ? - MCA_BTL_UDAPL_CONN_TIMEOUT_MAX : - mca_btl_udapl_component.udapl_timeout); - - return rc; -} - -/* - * Find and assign system netmask for the address of the uDAPL BTL - * module, but only if udapl_if_mask has not been set by the "--mca - * btl_udapl_if_mask" parameter. This routine will either find - * the system netmask or set the value to 0. - * - * @param udapl_btl (IN) BTL module - * - * @return OMPI_SUCCESS or OMPI_ERROR - */ -static int mca_btl_udapl_assign_netmask(mca_btl_udapl_module_t* udapl_btl) -{ - struct sockaddr *saddr; - struct sockaddr_in *btl_addr; - char btl_addr_string[INET_ADDRSTRLEN]; - char btl_ifname[INET_ADDRSTRLEN]; - - /* Setting if_mask to 0 informs future steps to assume all - * addresses are reachable. - */ - udapl_btl->udapl_if_mask = 0; - - if (mca_btl_udapl_component.udapl_compare_subnet) { - /* go get system netmask value */ - - /* use generic address to find address family */ - saddr = (struct sockaddr *)&(udapl_btl->udapl_addr.addr); - - if (saddr->sa_family == AF_INET) { - - btl_addr = (struct sockaddr_in *)saddr; - - /* - * Retrieve the netmask of the udapl btl address. To - * accomplish this requires 4 steps and the use of an opal - * utility. This same utility is used by the tcp oob. - * Steps: - * 1. Get string value of known udapl btl module address. - * 2. Use string value to find the interface name of address. - * 3. Use interface name to find its index. - * 4. From the index get the netmask. - */ - - /* retrieve string value of udapl btl address */ - inet_ntop(AF_INET, (void *) &btl_addr->sin_addr, - btl_addr_string, INET_ADDRSTRLEN); - - /* use address string to retrieve associated interface name */ - if (OPAL_SUCCESS != - opal_ifaddrtoname(btl_addr_string, - btl_ifname, INET_ADDRSTRLEN)) { - - BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, - ("help-mpi-btl-udapl.txt", "interface not found", - true, ompi_process_info.nodename, btl_addr_string)); - - return OMPI_ERROR; - } - - /* use interface name to retrieve index; then - * use index to retrieve udapl btl address netmask - */ - if (OPAL_SUCCESS != - opal_ifindextomask(opal_ifnametoindex(btl_ifname), - &(udapl_btl->udapl_if_mask), sizeof(udapl_btl->udapl_if_mask))) { - - BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, - ("help-mpi-btl-udapl.txt", "netmask not found", - true, ompi_process_info.nodename, btl_addr_string)); - - return OMPI_ERROR; - } - - /* report if_mask used by address */ - BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_INFORM, - ("uDAPL BTL address %s : if_mask = %d", - btl_addr_string, udapl_btl->udapl_if_mask)); - - } else { - /* current uDAPL BTL does not support IPv6 */ - BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, - ("help-mpi-btl-udapl.txt", "IPv4 only", - true, ompi_process_info.nodename)); - - return OMPI_ERROR; - } - } - - return OMPI_SUCCESS; -} - -/* - * - */ - -int mca_btl_udapl_add_procs( - struct mca_btl_base_module_t* btl, - size_t nprocs, - struct ompi_proc_t **ompi_procs, - struct mca_btl_base_endpoint_t** peers, - opal_bitmap_t* reachable) -{ - mca_btl_udapl_module_t* udapl_btl = (mca_btl_udapl_module_t*)btl; - int i, rc; - - for(i = 0; i < (int) nprocs; i++) { - - struct ompi_proc_t* ompi_proc = ompi_procs[i]; - mca_btl_udapl_proc_t* udapl_proc; - mca_btl_base_endpoint_t* udapl_endpoint; - - if(ompi_proc == ompi_proc_local()) - continue; - - if(NULL == (udapl_proc = mca_btl_udapl_proc_create(ompi_proc))) { - continue; - } - - OPAL_THREAD_LOCK(&udapl_proc->proc_lock); - - /* The btl_proc datastructure is shared by all uDAPL BTL - * instances that are trying to reach this destination. - * Cache the peer instance on the btl_proc. - */ - udapl_endpoint = OBJ_NEW(mca_btl_udapl_endpoint_t); - if(NULL == udapl_endpoint) { - OPAL_THREAD_UNLOCK(&udapl_proc->proc_lock); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - udapl_endpoint->endpoint_btl = udapl_btl; - rc = mca_btl_udapl_proc_insert(udapl_proc, udapl_endpoint); - if(rc != OMPI_SUCCESS) { - OBJ_RELEASE(udapl_endpoint); - OPAL_THREAD_UNLOCK(&udapl_proc->proc_lock); - continue; - } - - opal_bitmap_set_bit(reachable, i); - OPAL_THREAD_UNLOCK(&udapl_proc->proc_lock); - peers[i] = udapl_endpoint; - } - - /* resize based on number of processes */ - if (OMPI_SUCCESS != - mca_btl_udapl_set_peer_parameters(udapl_btl, nprocs)) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - return OMPI_SUCCESS; -} - - -int mca_btl_udapl_del_procs(struct mca_btl_base_module_t* btl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_btl_base_endpoint_t ** peers) -{ - /* TODO */ - return OMPI_SUCCESS; -} - - -/** - * Allocate a segment. - * - * @param btl (IN) BTL module - * @param size (IN) Request segment size. - */ - -mca_btl_base_descriptor_t* mca_btl_udapl_alloc( - struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - uint8_t order, - size_t size, - uint32_t flags) -{ - mca_btl_udapl_module_t* udapl_btl = (mca_btl_udapl_module_t*) btl; - mca_btl_udapl_frag_t* frag; - int pad = 0; - - /* compute pad as needed */ - MCA_BTL_UDAPL_FRAG_CALC_ALIGNMENT_PAD(pad, - (size + sizeof(mca_btl_udapl_footer_t))); - - if((size + pad) <= btl->btl_eager_limit) { - MCA_BTL_UDAPL_FRAG_ALLOC_EAGER(udapl_btl, frag); - } else if(size <= btl->btl_max_send_size) { - MCA_BTL_UDAPL_FRAG_ALLOC_MAX(udapl_btl, frag); - } else { - return NULL; - } - - if (NULL == frag) { - return NULL; - } - - frag->segment.base.seg_len = size; - - /* Set up the LMR triplet from the frag segment. - * Note: The triplet.segment_len is set to what is required for - * actually sending the fragment, if later it is determined - * that rdma can be used to transfer the fragment the - * triplet.segment_len will have to change. - */ - frag->triplet.virtual_address = - (DAT_VADDR)(uintptr_t)frag->segment.base.seg_addr.pval; - frag->triplet.segment_length = - frag->segment.base.seg_len + sizeof(mca_btl_udapl_footer_t); - assert(frag->triplet.lmr_context == - frag->registration->lmr_triplet.lmr_context); - - frag->btl = udapl_btl; - frag->base.des_src = &frag->segment; - frag->base.des_src_cnt = 1; - frag->base.des_dst = NULL; - frag->base.des_dst_cnt = 0; - frag->base.des_flags = flags; - frag->base.order = MCA_BTL_NO_ORDER; - return &frag->base; -} - - -/** - * Return a segment - */ - -int mca_btl_udapl_free( - struct mca_btl_base_module_t* btl, - mca_btl_base_descriptor_t* des) -{ - mca_btl_udapl_frag_t* frag = (mca_btl_udapl_frag_t*)des; - - if(0 == frag->size) { - if (NULL != frag->registration) { - btl->btl_mpool->mpool_deregister(btl->btl_mpool, - &(frag->registration->base)); - frag->registration = NULL; - } - MCA_BTL_UDAPL_FRAG_RETURN_USER(btl, frag); - } else if(frag->size == mca_btl_udapl_component.udapl_eager_frag_size) { - MCA_BTL_UDAPL_FRAG_RETURN_EAGER(btl, frag); - } else if(frag->size == mca_btl_udapl_component.udapl_max_frag_size) { - MCA_BTL_UDAPL_FRAG_RETURN_MAX(btl, frag); - } else { - BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_DIAGNOSE, - ("mca_btl_udapl_free: invalid descriptor\n")); - return OMPI_ERR_BAD_PARAM; - } - return OMPI_SUCCESS; -} - -/** - * Pack data and return a descriptor that can be - * used for send/put. - * - * @param btl (IN) BTL module - * @param peer (IN) BTL peer addressing - */ -mca_btl_base_descriptor_t* mca_btl_udapl_prepare_src( - struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_mpool_base_registration_t* registration, - struct opal_convertor_t* convertor, - uint8_t order, - size_t reserve, - size_t* size, - uint32_t flags -) -{ - mca_btl_udapl_frag_t* frag = NULL; - struct iovec iov; - uint32_t iov_count = 1; - size_t max_data = *size; - int rc; - int pad = 0; - - /* compute pad as needed */ - MCA_BTL_UDAPL_FRAG_CALC_ALIGNMENT_PAD(pad, - (max_data + reserve + sizeof(mca_btl_udapl_footer_t))); - - if(opal_convertor_need_buffers(convertor) == false && 0 == reserve) { - if(registration != NULL || max_data > btl->btl_max_send_size) { - - MCA_BTL_UDAPL_FRAG_ALLOC_USER(btl, frag); - if(NULL == frag){ - return NULL; - } - - iov.iov_len = max_data; - iov.iov_base = NULL; - - opal_convertor_pack(convertor, &iov, - &iov_count, &max_data ); - - *size = max_data; - - if(NULL == registration) { - rc = btl->btl_mpool->mpool_register(btl->btl_mpool, iov.iov_base, - max_data, 0, - ®istration); - - if(rc != OMPI_SUCCESS) { - MCA_BTL_UDAPL_FRAG_RETURN_USER(btl,frag); - return NULL; - } - /* keep track of the registration we did */ - frag->registration = (mca_btl_udapl_reg_t*)registration; - } - - frag->segment.base.seg_len = max_data; - frag->segment.base.seg_addr.pval = iov.iov_base; - frag->triplet.segment_length = max_data; - frag->triplet.virtual_address = (DAT_VADDR)(uintptr_t)iov.iov_base; - frag->triplet.lmr_context = - ((mca_btl_udapl_reg_t*)registration)->lmr_triplet.lmr_context; - - /* initialize base descriptor */ - frag->base.des_src = &frag->segment; - frag->base.des_src_cnt = 1; - frag->base.des_dst = NULL; - frag->base.des_dst_cnt = 0; - frag->base.des_flags = flags; - frag->base.order = MCA_BTL_NO_ORDER; - return &frag->base; - } - } - - if(max_data + pad + reserve <= btl->btl_eager_limit) { - /* the data is small enough to fit in the eager frag and - * memory is not prepinned */ - MCA_BTL_UDAPL_FRAG_ALLOC_EAGER(btl, frag); - } - - if(NULL == frag) { - /* the data doesn't fit into eager frag or eager frag is - * not available */ - MCA_BTL_UDAPL_FRAG_ALLOC_MAX(btl, frag); - if(NULL == frag) { - return NULL; - } - if(max_data + reserve > btl->btl_max_send_size) { - max_data = btl->btl_max_send_size - reserve; - } - } - - iov.iov_len = max_data; - iov.iov_base = (char *) frag->segment.base.seg_addr.pval + reserve; - - rc = opal_convertor_pack(convertor, - &iov, &iov_count, &max_data ); - if(rc < 0) { - MCA_BTL_UDAPL_FRAG_RETURN_MAX(btl, frag); - return NULL; - } - - *size = max_data; - - /* setup lengths and addresses to send out data */ - frag->segment.base.seg_len = max_data + reserve; - frag->triplet.segment_length = - max_data + reserve + sizeof(mca_btl_udapl_footer_t); - frag->triplet.virtual_address = - (DAT_VADDR)(uintptr_t)frag->segment.base.seg_addr.pval; - - /* initialize base descriptor */ - frag->base.des_src = &frag->segment; - frag->base.des_src_cnt = 1; - frag->base.des_dst = NULL; - frag->base.des_dst_cnt = 0; - frag->base.des_flags = flags; - frag->base.order = MCA_BTL_NO_ORDER; - return &frag->base; -} - - -/** - * Prepare a descriptor for send/rdma using the supplied - * convertor. If the convertor references data that is contiguous, - * the descriptor may simply point to the user buffer. Otherwise, - * this routine is responsible for allocating buffer space and - * packing if required. - * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL peer addressing - * @param convertor (IN) Data type convertor - * @param reserve (IN) Additional bytes requested by upper layer to precede user data - * @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT) - */ -mca_btl_base_descriptor_t* mca_btl_udapl_prepare_dst( - struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_mpool_base_registration_t* registration, - struct opal_convertor_t* convertor, - uint8_t order, - size_t reserve, - size_t* size, - uint32_t flags) -{ - mca_btl_udapl_frag_t* frag; - int rc; - - MCA_BTL_UDAPL_FRAG_ALLOC_USER(btl, frag); - if(NULL == frag) { - return NULL; - } - - frag->segment.base.seg_len = *size; - opal_convertor_get_current_pointer( convertor, (void**)&(frag->segment.base.seg_addr.pval) ); - - if(NULL == registration) { - /* didn't get a memory registration passed in, so must - * register the region now - */ - rc = btl->btl_mpool->mpool_register(btl->btl_mpool, - frag->segment.base.seg_addr.pval, - frag->segment.base.seg_len, - 0, - ®istration); - if(OMPI_SUCCESS != rc || NULL == registration) { - MCA_BTL_UDAPL_FRAG_RETURN_USER(btl,frag); - return NULL; - } - frag->registration = (mca_btl_udapl_reg_t*)registration; - } - - frag->base.des_src = NULL; - frag->base.des_src_cnt = 0; - frag->base.des_dst = &frag->segment; - frag->base.des_dst_cnt = 1; - frag->base.des_flags = flags; - - frag->segment.context = ((mca_btl_udapl_reg_t*)registration)->rmr_context; - - frag->base.order = MCA_BTL_NO_ORDER; - - return &frag->base; -} - -/** - * Initiate an asynchronous send. - * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL addressing information - * @param descriptor (IN) Description of the data to be transferred - * @param tag (IN) The tag value used to notify the peer. - */ - -int mca_btl_udapl_send( - struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* des, - mca_btl_base_tag_t tag) - -{ - mca_btl_udapl_frag_t* frag = (mca_btl_udapl_frag_t*)des; - - frag->endpoint = endpoint; - frag->ftr = (mca_btl_udapl_footer_t *) - ((char *)frag->segment.base.seg_addr.pval + frag->segment.base.seg_len); - frag->ftr->tag = tag; - frag->type = MCA_BTL_UDAPL_SEND; - - /* TODO - will inlining this give worthwhile performance? */ - return mca_btl_udapl_endpoint_send(endpoint, frag); -} - - - -/** - * Initiate an asynchronous put. - * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL addressing information - * @param descriptor (IN) Description of the data to be transferred - */ - -int mca_btl_udapl_put( - mca_btl_base_module_t* btl, - mca_btl_base_endpoint_t* endpoint, - mca_btl_base_descriptor_t* des) -{ - DAT_RMR_TRIPLET remote_buffer; - DAT_DTO_COOKIE cookie; - int rc = OMPI_SUCCESS; - - mca_btl_udapl_frag_t* frag = (mca_btl_udapl_frag_t*)des; - mca_btl_udapl_segment_t *dst_segment = des->des_dst; - - frag->btl = (mca_btl_udapl_module_t *)btl; - frag->endpoint = endpoint; - frag->type = MCA_BTL_UDAPL_PUT; - - if (OPAL_THREAD_ADD32(&endpoint->endpoint_lwqe_tokens[BTL_UDAPL_MAX_CONNECTION], -1) < 0) { - /* no local work queue tokens available */ - OPAL_THREAD_ADD32(&endpoint->endpoint_lwqe_tokens[BTL_UDAPL_MAX_CONNECTION], 1); - OPAL_THREAD_LOCK(&endpoint->endpoint_lock); - opal_list_append(&endpoint->endpoint_max_frags, - (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); - opal_progress(); - } else { - /* work queue tokens available, try to send */ - - if(OPAL_THREAD_ADD32(&endpoint->endpoint_sr_tokens[BTL_UDAPL_MAX_CONNECTION], -1) < 0) { - OPAL_THREAD_ADD32(&endpoint->endpoint_lwqe_tokens[BTL_UDAPL_MAX_CONNECTION], 1); - OPAL_THREAD_ADD32(&endpoint->endpoint_sr_tokens[BTL_UDAPL_MAX_CONNECTION], 1); - OPAL_THREAD_LOCK(&endpoint->endpoint_lock); - opal_list_append(&endpoint->endpoint_max_frags, - (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); - opal_progress(); - } else { - frag->triplet.segment_length = frag->segment.base.seg_len; - - remote_buffer.rmr_context = dst_segment->context; - remote_buffer.target_address = - (DAT_VADDR)(uintptr_t)dst_segment->base.seg_addr.lval; - remote_buffer.segment_length = dst_segment->base.seg_len; - - cookie.as_ptr = frag; - OPAL_THREAD_LOCK(&endpoint->endpoint_lock); - rc = dat_ep_post_rdma_write(endpoint->endpoint_max, - 1, - &frag->triplet, - cookie, - &remote_buffer, - DAT_COMPLETION_DEFAULT_FLAG); - OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); - if(DAT_SUCCESS != rc) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_ep_post_rdma_write", - major, minor)); - rc = OMPI_ERROR; - } - } - } - - return rc; -} - - - -/** - * Initiate an asynchronous get. - * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL addressing information - * @param descriptor (IN) Description of the data to be transferred - * - */ - -int mca_btl_udapl_get( - mca_btl_base_module_t* btl, - mca_btl_base_endpoint_t* endpoint, - mca_btl_base_descriptor_t* des) -{ - BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_DEVELOPER, ("udapl_get\n")); - return OMPI_ERR_NOT_IMPLEMENTED; -} - -int mca_btl_udapl_ft_event(int state) { - if(OPAL_CRS_CHECKPOINT == state) { - ; - } - else if(OPAL_CRS_CONTINUE == state) { - ; - } - else if(OPAL_CRS_RESTART == state) { - ; - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/btl/udapl/btl_udapl.h b/ompi/mca/btl/udapl/btl_udapl.h deleted file mode 100644 index 94a0495de5..0000000000 --- a/ompi/mca/btl/udapl/btl_udapl.h +++ /dev/null @@ -1,468 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_BTL_UDAPL_H -#define MCA_BTL_UDAPL_H - -#include "ompi_config.h" -#include -#include -#include - -/* Open MPI includes */ -#include "opal/class/opal_pointer_array.h" -#include "opal/mca/event/event.h" -#include "ompi/class/ompi_free_list.h" -#include "ompi/mca/btl/btl.h" -#include "ompi/mca/btl/base/base.h" -#include "ompi/mca/btl/base/btl_base_error.h" -#include "ompi/mca/mpool/mpool.h" -#include "ompi/mca/btl/btl.h" -#include "btl_udapl_endpoint.h" - -BEGIN_C_DECLS - - -/** - * uDAPL BTL component. - */ - -struct mca_btl_udapl_component_t { - mca_btl_base_component_2_0_0_t super; /**< base BTL component */ - - int32_t udapl_verbosity; /**< report out level, see - "Report Out from uDAPL BTL" below for details. */ - size_t udapl_num_btls; /**< number of hcas available to the uDAPL component */ - unsigned int udapl_max_btls; /**< maximum number of supported hcas */ - struct mca_btl_udapl_module_t **udapl_btls; /**< array of available BTL modules */ - int32_t udapl_num_recvs; /**< number of recv buffers to keep posted */ - int32_t udapl_num_sends; /**< number of sends to post on endpoint */ - int32_t udapl_sr_win; /**< number of fragments recieved before - returning credits to sender */ - unsigned int udapl_timeout; /**< connection timeout, in microseconds */ - size_t udapl_eager_frag_size; - size_t udapl_max_frag_size; - size_t udapl_eager_rdma_frag_size; /* size of the rdma fragement including data - * payload space - */ - - int udapl_free_list_num; /**< initial size of free lists */ - int udapl_free_list_max; /**< maximum size of free lists */ - int udapl_free_list_inc; /**< number of elements to alloc when growing */ - int32_t udapl_use_eager_rdma; /**< turn rdma for small msgs on/off */ - int32_t udapl_eager_rdma_num; /**< number of rdma buffers allocated - for short messages */ - int32_t udapl_max_eager_rdma_peers; /**< maximum number of peers allowed to - use RDMA for short messages (cap) - */ - int32_t udapl_eager_rdma_win; /**< number of eager RDMA fragments - recieved before returning credits to - sender */ - int32_t udapl_conn_priv_data; /**< use connect priv data for proc data */ - int32_t udapl_async_events; /**< dequeue asynchronous events */ - int32_t udapl_buffer_alignment; /**< preferred communication buffer alignment, in bytes */ - opal_list_t udapl_procs; /**< list of udapl proc structures */ - opal_mutex_t udapl_lock; /**< lock for accessing module state */ - char* udapl_mpool_name; /**< name of memory pool */ - int32_t udapl_compare_subnet;/**< whether to compare with netmask or not */ - char *if_include; - char **if_include_list; - char *if_exclude; - char **if_exclude_list; - char **if_list; /* used for checking entries not found */ - int32_t ro_aware_system; /* default 0; 1 if relaxed ordered platform */ -}; -typedef struct mca_btl_udapl_component_t mca_btl_udapl_component_t; - -OMPI_MODULE_DECLSPEC extern mca_btl_udapl_component_t mca_btl_udapl_component; - - - -/** - * BTL Module Interface - */ -struct mca_btl_udapl_module_t { - mca_btl_base_module_t super; /**< base BTL interface */ - mca_btl_udapl_addr_t udapl_addr; - - /* uDAPL interface and other handles */ - DAT_IA_HANDLE udapl_ia; - DAT_PZ_HANDLE udapl_pz; - DAT_PSP_HANDLE udapl_psp; - DAT_IA_ATTR udapl_ia_attr; - - /* event dispatchers - async, data transfer, connection negotiation */ - DAT_EVD_HANDLE udapl_evd_async; - DAT_EVD_HANDLE udapl_evd_dto; - DAT_EVD_HANDLE udapl_evd_conn; - DAT_EP_PARAM udapl_ep_param; - - /* free list of fragment descriptors */ - ompi_free_list_t udapl_frag_eager; - ompi_free_list_t udapl_frag_eager_recv; - ompi_free_list_t udapl_frag_max; - ompi_free_list_t udapl_frag_max_recv; - ompi_free_list_t udapl_frag_user; - ompi_free_list_t udapl_frag_control; - - opal_mutex_t udapl_lock; /* lock for accessing module state */ - opal_mutex_t udapl_eager_rdma_lock; /* eager rdma lock */ - int32_t udapl_eager_rdma_endpoint_count; /* count of the number of - * endpoints in - * udapl_eager_rdma_endpoints - */ - opal_pointer_array_t *udapl_eager_rdma_endpoints; /* array of endpoints - * with eager rdma - * connections - */ - int32_t udapl_async_events; - int32_t udapl_connect_inprogress; - int32_t udapl_num_peers; - - /* module specific limits */ - int udapl_async_evd_qlen; - int udapl_conn_evd_qlen; - int udapl_dto_evd_qlen; - int udapl_max_request_dtos; /**< maximum number of outstanding consumer - submitted sends and rdma operations, see - section 6.6.6 of uDAPL Spec */ - int udapl_max_recv_dtos; /**< maximum number of outstanding consumer - submitted recv operations, see section - 6.6.6 of uDAPL Spec */ - uint32_t udapl_if_mask; /**< netmask value btl module */ -}; -typedef struct mca_btl_udapl_module_t mca_btl_udapl_module_t; -extern mca_btl_udapl_module_t mca_btl_udapl_module; - -struct mca_btl_udapl_reg_t { - mca_mpool_base_registration_t base; - DAT_LMR_HANDLE lmr; /* local memory region (LMR) */ - DAT_LMR_TRIPLET lmr_triplet; /* LMR triplet - context, address, length */ - DAT_RMR_CONTEXT rmr_context; /* remote memory region context handle */ - -}; -typedef struct mca_btl_udapl_reg_t mca_btl_udapl_reg_t; - -/** - * Report Out from uDAPL BTL - * - * - BTL_ERROR() : Use to report out errors from uDAPL BTL. These are - * critical errors which will most likely cause the program to fail so - * this message should always be reported to the user. Defined in - * btl/base/btl_base_error.h. - * Example: - * dat_strerror(rc, (const char**)&major, (const char**)&minor); - * BTL_ERROR(("ERROR: %s %s %s\n", "dat_cr_accept", major, minor)); - * - * - BTL_UDAPL_VERBOSE_OUTPUT() : Use to output different levels - * of verbosity to the user. See Note below. - * Example: - * BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_DIAGNOSE, - * ("WARNING: don't %s", "jump")); - * - * - BTL_UDAPL_VERBOSE_HELP() : Use output information as defined in - * uDAPL BTL help file (help-mpi-btl-udapl.txt). See Note below. - * Example: - * BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, - * ("help-mpi-btl-udapl.txt", - * "invalid num rdma segments", true, 22)); - * - * Note : - Verbose levels are defined below. These levels are - * controlled by the mca parameter "btl_udapl_verbose". - * The verbose level is set to 10 by default so that critical - * error and useful help information will appear. Which ever value - * this param is set to, those messages as well as any lower level - * verbose messages will be reported. - * - Setting "btl_udapl_verbose" to "-1" will turn off all - * messages reported by the use of BTL_UDAPL_VERBOSE_*(). - * - These macros should not be used in a critical path as they - * are always included in the compiled code. - * - These macros rely on the use of paranthesis around the "args" - * value. - * - * Values used with BTL_UDAPL_VERBOSE_*(): - * - * - 0: critical user information; should always be reported; - * on by default - * - 10: useful help messages that would be reported from - * "help-mpi-btl-udapl.txt"; accessed from - * BTL_UDAPL_VERBOSE_HELP(); on by default - * - 20: general execution diagnostic information; - * may be useful to user or btl developer - * - 30: basic debugging/diagnostic information - * - 90: useful only to developers - * - 100: other components do not appear to go beyond 100 for verbose - * levels so noting here as the max for future reference - */ -#define VERBOSE_CRITICAL 0 -#define VERBOSE_SHOW_HELP 10 -#define VERBOSE_INFORM 20 -#define VERBOSE_DIAGNOSE 30 -#define VERBOSE_DEVELOPER 90 - -#define BTL_UDAPL_VERBOSE_OUTPUT(verbose_level, args) \ -do { \ - if (verbose_level <= mca_btl_udapl_component.udapl_verbosity) { \ - BTL_OUTPUT(args); \ - } \ -} while(0); - -#define BTL_UDAPL_VERBOSE_HELP(verbose_level, args) \ -do { \ - if (verbose_level <= mca_btl_udapl_component.udapl_verbosity) { \ - opal_show_help args; \ - } \ -} while(0); - - -/* - * Report a uDAPL error - for debugging - */ - -#if OPAL_ENABLE_DEBUG -extern void mca_btl_udapl_error(DAT_RETURN ret, char* str); - -#define MCA_BTL_UDAPL_ERROR(ret, str) \ - mca_btl_udapl_error((ret), (str)); - -#else -#define MCA_BTL_UDAPL_ERROR(ret, str) -#endif - - -/** - * Register uDAPL component parameters with the MCA framework - */ -extern int mca_btl_udapl_component_open(void); - -/** - * Any final cleanup before being unloaded. - */ -extern int mca_btl_udapl_component_close(void); - -/** - * uDAPL component initialization. - * - * @param num_btl_modules (OUT) Number of BTLs returned in BTL array. - * @param allow_multi_user_threads (OUT) Flag indicating wether BTL supports user threads (TRUE) - * @param have_hidden_threads (OUT) Flag indicating wether BTL uses threads (TRUE) - */ -extern mca_btl_base_module_t** mca_btl_udapl_component_init( - int *num_btl_modules, - bool allow_multi_user_threads, - bool have_hidden_threads -); - - -/** - * uDAPL component progress. - */ - -extern int mca_btl_udapl_component_progress(void); - - -/** - * Initialize resources for a new BTL/uDAPL IA - * - * @param ia_name Name of uDAPL interface adapter - * @param btl BTL instance. - * @return OMPI_SUCCESS or error status on failure. - */ - -extern int mca_btl_udapl_init( - DAT_NAME_PTR ia_name, - struct mca_btl_udapl_module_t* btl -); - - -/** - * Cleanup any resources held by the BTL. - * - * @param btl BTL instance. - * @return OMPI_SUCCESS or error status on failure. - */ - -extern int mca_btl_udapl_finalize( - struct mca_btl_base_module_t* btl -); - - -/** - * PML->BTL notification of change in the process list. - * - * @param btl (IN) - * @param nprocs (IN) Number of processes - * @param procs (IN) Set of processes - * @param peers (OUT) Set of (optional) peer addressing info. - * @param peers (IN/OUT) Set of processes that are reachable via this BTL. - * @return OMPI_SUCCESS or error status on failure. - * - */ - -extern int mca_btl_udapl_add_procs( - struct mca_btl_base_module_t* btl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_btl_base_endpoint_t** peers, - opal_bitmap_t* reachable -); - -/** - * PML->BTL notification of change in the process list. - * - * @param btl (IN) BTL instance - * @param nproc (IN) Number of processes. - * @param procs (IN) Set of processes. - * @param peers (IN) Set of peer data structures. - * @return Status indicating if cleanup was successful - * - */ - -extern int mca_btl_udapl_del_procs( - struct mca_btl_base_module_t* btl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_btl_base_endpoint_t** peers -); - - -/** - * Initiate an asynchronous send. - * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL addressing information - * @param descriptor (IN) Description of the data to be transferred - * @param tag (IN) The tag value used to notify the peer. - */ - -extern int mca_btl_udapl_send( - struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* btl_peer, - struct mca_btl_base_descriptor_t* descriptor, - mca_btl_base_tag_t tag -); - - -/** - * Initiate an asynchronous put. - * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL addressing information - * @param descriptor (IN) Description of the data to be transferred - */ - -extern int mca_btl_udapl_put( - struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* btl_peer, - struct mca_btl_base_descriptor_t* decriptor -); - - -/** - * Initiate an asynchronous get. - * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL addressing information - * @param descriptor (IN) Description of the data to be transferred - */ - -extern int mca_btl_udapl_get( - struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* btl_peer, - struct mca_btl_base_descriptor_t* decriptor -); - -/** - * Allocate a descriptor with a segment of the requested size. - * Note that the BTL layer may choose to return a smaller size - * if it cannot support the request. - * - * @param btl (IN) BTL module - * @param size (IN) Request segment size. - */ - -extern mca_btl_base_descriptor_t* mca_btl_udapl_alloc( - struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - uint8_t order, - size_t size, - uint32_t flags); - - -/** - * Return a segment allocated by this BTL. - * - * @param btl (IN) BTL module - * @param descriptor (IN) Allocated descriptor. - */ - -extern int mca_btl_udapl_free( - struct mca_btl_base_module_t* btl, - mca_btl_base_descriptor_t* des); - - -/** - * Prepare a descriptor for send/rdma using the supplied - * convertor. If the convertor references data that is contigous, - * the descriptor may simply point to the user buffer. Otherwise, - * this routine is responsible for allocating buffer space and - * packing if required. - * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL peer addressing - * @param convertor (IN) Data type convertor - * @param reserve (IN) Additional bytes requested by upper layer to precede user data - * @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT) -*/ - -mca_btl_base_descriptor_t* mca_btl_udapl_prepare_src( - struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* peer, - struct mca_mpool_base_registration_t*, - struct opal_convertor_t* convertor, - uint8_t order, - size_t reserve, - size_t* size, - uint32_t flags -); - -extern mca_btl_base_descriptor_t* mca_btl_udapl_prepare_dst( - struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* peer, - struct mca_mpool_base_registration_t*, - struct opal_convertor_t* convertor, - uint8_t order, - size_t reserve, - size_t* size, - uint32_t flags); - - /** - * Fault Tolerance Event Notification Function - * @param state Checkpoint Stae - * @return OMPI_SUCCESS or failure status - */ -int mca_btl_udapl_ft_event(int state); - -END_C_DECLS -#endif diff --git a/ompi/mca/btl/udapl/btl_udapl_component.c b/ompi/mca/btl/udapl/btl_udapl_component.c deleted file mode 100644 index 7498801b94..0000000000 --- a/ompi/mca/btl/udapl/btl_udapl_component.c +++ /dev/null @@ -1,1223 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2008 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006 Sandia National Laboratories. All rights - * reserved. - * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" -#include "ompi/constants.h" -#include "opal/mca/event/event.h" -#include "opal/util/argv.h" -#include "ompi/mca/btl/btl.h" - -#include "ompi/mca/mpool/base/base.h" -#include "btl_udapl.h" -#include "btl_udapl_frag.h" -#include "btl_udapl_endpoint.h" -#include "btl_udapl_mca.h" -#include "btl_udapl_proc.h" -#include "ompi/mca/btl/base/base.h" -#include "ompi/mca/btl/base/btl_base_error.h" -#include "btl_udapl_endpoint.h" -#include "ompi/runtime/ompi_module_exchange.h" -#include "ompi/runtime/mpiruntime.h" - -/* - * Local Functions - */ -static inline int mca_btl_udapl_frag_progress_one(mca_btl_udapl_module_t* udapl_btl, - mca_btl_udapl_frag_t* frag); -void mca_btl_udapl_frag_progress_pending(mca_btl_udapl_module_t* udapl_btl, - mca_btl_base_endpoint_t* endpoint, - const int connection); -static int mca_btl_udapl_modify_ia_list(DAT_COUNT *num_info_entries, - DAT_PROVIDER_INFO* datinfo); -static const char* -mca_btl_udapl_dat_event_to_string(DAT_EVENT_NUMBER event_number); - -mca_btl_udapl_component_t mca_btl_udapl_component = { - { - /* First, the mca_base_component_t struct containing meta information - about the component itself */ - { - MCA_BTL_BASE_VERSION_2_0_0, - - "udapl", /* MCA component name */ - OMPI_MAJOR_VERSION, /* MCA component major version */ - OMPI_MINOR_VERSION, /* MCA component minor version */ - OMPI_RELEASE_VERSION, /* MCA component release version */ - mca_btl_udapl_component_open, /* component open */ - mca_btl_udapl_component_close, /* component close */ - NULL, - mca_btl_udapl_register_mca_params - }, - { - /* The component is not checkpoint ready */ - MCA_BASE_METADATA_PARAM_NONE - }, - - mca_btl_udapl_component_init, - mca_btl_udapl_component_progress, - } -}; - - -/* - * Predefined and fixed size structure containing DAT_EVENT values - * and associated string as defined in: "uDAPL:User Direct Access - * Programming Library v1.2 Sept 15, 2004", DAT Collaborative Organization. - */ -static struct mca_btl_udapl_dat_events { - DAT_EVENT_NUMBER value; - const char* name; -} mca_btl_udapl_dat_events[] = { - { DAT_DTO_COMPLETION_EVENT, - "DAT_DTO_COMPLETION_EVENT" }, - { DAT_RMR_BIND_COMPLETION_EVENT, - "DAT_RMR_BIND_COMPLETION_EVENT" }, - { DAT_CONNECTION_REQUEST_EVENT, - "DAT_CONNECTION_REQUEST_EVENT" }, - { DAT_CONNECTION_EVENT_ESTABLISHED, - "DAT_CONNECTION_EVENT_ESTABLISHED" }, - { DAT_CONNECTION_EVENT_PEER_REJECTED, - "DAT_CONNECTION_EVENT_PEER_REJECTED" }, - { DAT_CONNECTION_EVENT_NON_PEER_REJECTED, - "DAT_CONNECTION_EVENT_NON_PEER_REJECTED" }, - { DAT_CONNECTION_EVENT_ACCEPT_COMPLETION_ERROR, - "DAT_CONNECTION_EVENT_ACCEPT_COMPLETION_ERROR" }, - { DAT_CONNECTION_EVENT_DISCONNECTED, - "DAT_CONNECTION_EVENT_DISCONNECTED" }, - { DAT_CONNECTION_EVENT_BROKEN, - "DAT_CONNECTION_EVENT_BROKEN" }, - { DAT_CONNECTION_EVENT_TIMED_OUT, - "DAT_CONNECTION_EVENT_TIMED_OUT" }, - { DAT_CONNECTION_EVENT_UNREACHABLE, - "DAT_CONNECTION_EVENT_UNREACHABLE" }, - { DAT_ASYNC_ERROR_EVD_OVERFLOW, - "DAT_ASYNC_ERROR_EVD_OVERFLOW" }, - { DAT_ASYNC_ERROR_IA_CATASTROPHIC, - "DAT_ASYNC_ERROR_IA_CATASTROPHIC" }, - { DAT_ASYNC_ERROR_EP_BROKEN, - "DAT_ASYNC_ERROR_EP_BROKEN" }, - { DAT_ASYNC_ERROR_TIMED_OUT, - "DAT_ASYNC_ERROR_TIMED_OUT" }, - { DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR, - "DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR" }, - { DAT_SOFTWARE_EVENT, - "DAT_SOFTWARE_EVENT" } -}; - - -/* - * Function to convert DAT_EVENT_NUMBER into a readable string. - * - * @param event_number (IN) DAT_EVENT_NUMBER value - * - * @return event string or a string indicating - * event number is invalid - */ -static const char * -mca_btl_udapl_dat_event_to_string(DAT_EVENT_NUMBER event_number) -{ - int i; - int num_events = (sizeof(mca_btl_udapl_dat_events) / - sizeof(mca_btl_udapl_dat_events[0])); - - for (i = 0; i < num_events; i++) { - if (mca_btl_udapl_dat_events[i].value == event_number) { - return (mca_btl_udapl_dat_events[i].name); - } - } - - return ("Unknown DAT Event Number"); -} - - -/** - * Report a uDAPL error - for debugging - */ - -#if OPAL_ENABLE_DEBUG -void -mca_btl_udapl_error(DAT_RETURN ret, char* str) -{ - char* major; - char* minor; - - if(DAT_SUCCESS != dat_strerror(ret, - (const char**)&major, (const char**)&minor)) - { - printf("dat_strerror failed! ret is %d\n", ret); - exit(-1); - } - - OPAL_OUTPUT((0, "ERROR: %s %s %s\n", str, major, minor)); -} -#endif - - -/* - * Called by MCA framework to open the component, registers - * component parameters. - */ -int mca_btl_udapl_component_open(void) -{ - int rc = OMPI_SUCCESS; - - /* initialize state */ - mca_btl_udapl_component.udapl_num_btls=0; - mca_btl_udapl_component.udapl_btls=NULL; - mca_btl_udapl_component.ro_aware_system=0; - - /* initialize objects */ - OBJ_CONSTRUCT(&mca_btl_udapl_component.udapl_procs, opal_list_t); - OBJ_CONSTRUCT(&mca_btl_udapl_component.udapl_lock, opal_mutex_t); - - /* compute udapl_eager_frag_size and udapl_max_frag_size */ - mca_btl_udapl_component.udapl_eager_frag_size = - mca_btl_udapl_module.super.btl_eager_limit; - mca_btl_udapl_module.super.btl_eager_limit -= - (sizeof(mca_btl_udapl_footer_t) + sizeof(mca_btl_udapl_rdma_footer_t)); - - mca_btl_udapl_component.udapl_max_frag_size = - mca_btl_udapl_module.super.btl_max_send_size; - mca_btl_udapl_module.super.btl_max_send_size -= - (sizeof(mca_btl_udapl_footer_t) + sizeof(mca_btl_udapl_rdma_footer_t)); - - /* compute udapl_eager_rdma_frag_size */ - mca_btl_udapl_component.udapl_eager_rdma_frag_size = - sizeof(mca_btl_udapl_frag_eager_rdma_t) + - mca_btl_udapl_component.udapl_eager_frag_size; - - mca_btl_udapl_module.super.btl_seg_size = sizeof (mca_btl_udapl_segment_t); - - return rc; -} - - -/* - * component cleanup - sanity checking of queue lengths - */ - -int mca_btl_udapl_component_close(void) -{ - /* TODO - what needs to be done here? */ - return OMPI_SUCCESS; -} - - -/* - * Register uDAPL component addressing information. The MCA framework - * will make this available to all peers. - */ - -static int -mca_btl_udapl_modex_send(void) -{ - int rc; - size_t i; - size_t size; - mca_btl_udapl_addr_t *addrs = NULL; - - size = sizeof(mca_btl_udapl_addr_t) * - mca_btl_udapl_component.udapl_num_btls; - - if (0 != size) { - addrs = (mca_btl_udapl_addr_t*)malloc(size); - if (NULL == addrs) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - memset(addrs, 0, size); - - for (i = 0; i < mca_btl_udapl_component.udapl_num_btls; i++) { - mca_btl_udapl_module_t* btl = mca_btl_udapl_component.udapl_btls[i]; - addrs[i] = btl->udapl_addr; - } - } - - rc = ompi_modex_send( - &mca_btl_udapl_component.super.btl_version, addrs, size); - if (NULL != addrs) { - free (addrs); - } - return rc; -} - - -/* - * Callback function used for udapl btl internal control messages. - * - * @param btl (IN) BTL module - * @param tag (IN) Not used but part of callback interface - * @param descriptor (IN) Description of the data that was just transferred - * @param cbdata (IN) Data used by call back function. Not used. - * - */ -static void mca_btl_udapl_receive_control(struct mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata) -{ - mca_btl_udapl_frag_t* frag = (mca_btl_udapl_frag_t*)descriptor; - mca_btl_udapl_endpoint_t* endpoint = frag->endpoint; - mca_btl_udapl_control_header_t* ctl_hdr = - frag->segment.base.seg_addr.pval; - - switch (ctl_hdr->type) { - case MCA_BTL_UDAPL_CONTROL_RDMA_CONNECT: - { - mca_btl_udapl_eager_rdma_connect_t* rdma_connect = - frag->segment.base.seg_addr.pval; - - if (endpoint->endpoint_eager_rdma_remote.base.pval) { - BTL_ERROR(("ERROR: Received RDMA connect twice!")); - return; - } - endpoint->endpoint_eager_rdma_remote.rkey = rdma_connect->rkey; - endpoint->endpoint_eager_rdma_remote.base.pval = - rdma_connect->rdma_start.pval; - - OPAL_THREAD_ADD32(&(endpoint->endpoint_eager_rdma_remote.tokens), - mca_btl_udapl_component.udapl_eager_rdma_num); - - break; - } - case MCA_BTL_UDAPL_CONTROL_RDMA_CREDIT: - { - mca_btl_udapl_eager_rdma_credit_t* rdma_credit = - frag->segment.base.seg_addr.pval; - - /* don't return credits used for rdma credit control message */ - OPAL_THREAD_ADD32( - &(endpoint->endpoint_sr_credits[BTL_UDAPL_EAGER_CONNECTION]), - -1); - - OPAL_THREAD_ADD32(&(endpoint->endpoint_eager_rdma_remote.tokens), - rdma_credit->credits); - - break; - } - case MCA_BTL_UDAPL_CONTROL_SR_CREDIT: - { - mca_btl_udapl_sr_credit_t* sr_credit = - frag->segment.base.seg_addr.pval; - - /* don't return credits used for sr credit control message */ - OPAL_THREAD_ADD32( - &(endpoint->endpoint_sr_credits[sr_credit->connection]), -1); - - OPAL_THREAD_ADD32( - &(endpoint->endpoint_sr_tokens[sr_credit->connection]), - sr_credit->credits); - - break; - } - default: - BTL_ERROR(("ERROR: Unknown contrl message type received by BTL")); - break; - } -} - - -/* - * Modify the list of dat entry pointers to include only those entries - * which it is desired to attempt dat_ia_open on. - * - * @param num_info_entries (IN/OUT) Number of entries in datinfo list - * @param datinfo (IN/OUT) List of pointers to dat registry entries - */ - -static int mca_btl_udapl_modify_ia_list(DAT_COUNT *num_info_entries, - DAT_PROVIDER_INFO* datinfo) -{ - int i,j,k,found; - DAT_PROVIDER_INFO* tmp_datinfo = NULL; - DAT_COUNT tmp_num_entries = 0; - - - tmp_datinfo = malloc((*num_info_entries) * sizeof(DAT_PROVIDER_INFO)); - if(NULL == tmp_datinfo) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - for (i = 0; i < *num_info_entries; i++) { - j = 0; - found = 0; - - /* search for datinfo entry on the if_list list */ - while (mca_btl_udapl_component.if_list[j]) { - if (0 == strcmp(datinfo[i].ia_name, - mca_btl_udapl_component.if_list[j])) { - - found = 1; - /* remove from if_list */ - k = opal_argv_count(mca_btl_udapl_component.if_list); - opal_argv_delete(&k, &(mca_btl_udapl_component.if_list), - j, 1); - - break; - } - j++; - } - - if (found) { - if (NULL != mca_btl_udapl_component.if_include_list) { - /* explicitly include */ - tmp_datinfo[tmp_num_entries] = datinfo[i]; - tmp_num_entries++; - } - - /* if this is if_exclude case and match found do nothing */ - - } else { - /* if this is if_include case and match not found do nothing */ - - if (NULL != mca_btl_udapl_component.if_exclude_list) { - /* not found for exclude case so actually include here */ - tmp_datinfo[tmp_num_entries] = datinfo[i]; - tmp_num_entries++; - } - } - } - - /* set new values */ - *num_info_entries = tmp_num_entries; - for (j = 0; j < *num_info_entries; j++) { - datinfo[j] = tmp_datinfo[j]; - } - - - /* if if_list not NULL, either not found or user error */ - if (opal_argv_count(mca_btl_udapl_component.if_list)) { - char *str = opal_argv_join(mca_btl_udapl_component.if_list, ','); - BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, - ("help-mpi-btl-udapl.txt", "nonexistent entry", - true, ompi_process_info.nodename, - ((NULL != mca_btl_udapl_component.if_include) ? - "in" : "ex"), str)); - free(str); - } - - free(tmp_datinfo); - return OMPI_SUCCESS; -} - - -/* - * Initialize the uDAPL component, - * check how many interfaces are available and create a btl module for each. - */ - -mca_btl_base_module_t ** -mca_btl_udapl_component_init (int *num_btl_modules, - bool enable_progress_threads, - bool enable_mpi_threads) -{ - DAT_PROVIDER_INFO* datinfo; - DAT_PROVIDER_INFO** datinfoptr; - mca_btl_base_module_t **btls; - mca_btl_udapl_module_t *btl; - DAT_COUNT num_ias; - int32_t i; - - /* Currently refuse to run if MPI_THREAD_MULTIPLE is enabled */ - if (ompi_mpi_thread_multiple && !mca_btl_base_thread_multiple_override) { - mca_btl_udapl_component.udapl_num_btls = 0; - mca_btl_udapl_modex_send(); - return NULL; - } - - /* parse the include and exclude lists, checking for errors */ - mca_btl_udapl_component.if_include_list = - mca_btl_udapl_component.if_exclude_list = - mca_btl_udapl_component.if_list = NULL; - if (NULL != mca_btl_udapl_component.if_include && - NULL != mca_btl_udapl_component.if_exclude) { - BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt", - "specified include and exclude", true, - mca_btl_udapl_component.if_include, - mca_btl_udapl_component.if_exclude)); - mca_btl_udapl_component.udapl_num_btls = 0; - mca_btl_udapl_modex_send(); - return NULL; - } else if (NULL != mca_btl_udapl_component.if_include) { - mca_btl_udapl_component.if_include_list = - opal_argv_split(mca_btl_udapl_component.if_include, ','); - mca_btl_udapl_component.if_list = - opal_argv_copy(mca_btl_udapl_component.if_include_list); - - } else if (NULL != mca_btl_udapl_component.if_exclude) { - mca_btl_udapl_component.if_exclude_list = - opal_argv_split(mca_btl_udapl_component.if_exclude, ','); - mca_btl_udapl_component.if_list = - opal_argv_copy(mca_btl_udapl_component.if_exclude_list); - } - - /* enumerate uDAPL interfaces */ - /* Have to do weird pointer stuff to make uDAPL happy - - just an array of DAT_PROVIDER_INFO isn't good enough. */ - datinfo = malloc(sizeof(DAT_PROVIDER_INFO) * - mca_btl_udapl_component.udapl_max_btls); - datinfoptr = malloc(sizeof(DAT_PROVIDER_INFO*) * - mca_btl_udapl_component.udapl_max_btls); - if(NULL == datinfo || NULL == datinfoptr) { - return NULL; - } - - for(i = 0; i < (int32_t)mca_btl_udapl_component.udapl_max_btls; i++) { - datinfoptr[i] = &datinfo[i]; - } - - if(DAT_SUCCESS != dat_registry_list_providers( - mca_btl_udapl_component.udapl_max_btls, - (DAT_COUNT*)&num_ias, datinfoptr)) { - free(datinfo); - free(datinfoptr); - return NULL; - } - - free(datinfoptr); - - /* modify list of IA's to be used when if_in[ex]clude set */ - if (NULL != mca_btl_udapl_component.if_list) { - mca_btl_udapl_modify_ia_list(&num_ias, datinfo); - } - - /* allocate space for the each possible BTL */ - mca_btl_udapl_component.udapl_btls = (mca_btl_udapl_module_t **) - malloc(num_ias * sizeof(mca_btl_udapl_module_t *)); - if(NULL == mca_btl_udapl_component.udapl_btls) { - free(datinfo); - return NULL; - } - - /* create a BTL module for each interface */ - for(mca_btl_udapl_component.udapl_num_btls = i = 0; i < num_ias; i++) { - btl = malloc(sizeof(mca_btl_udapl_module_t)); - if(NULL == btl) { - free(datinfo); - free(mca_btl_udapl_component.udapl_btls); - return NULL; - } - - /* copy default values into the new BTL */ - memcpy(btl, &mca_btl_udapl_module, sizeof(mca_btl_udapl_module_t)); - - /* initialize this BTL */ - /* TODO - make use of the thread-safety info in datinfo also */ - if(OMPI_SUCCESS != mca_btl_udapl_init(datinfo[i].ia_name, btl)) { - free(btl); - continue; - } - - /* register internal control message callback */ - mca_btl_base_active_message_trigger[MCA_BTL_TAG_UDAPL].cbfunc = mca_btl_udapl_receive_control; - mca_btl_base_active_message_trigger[MCA_BTL_TAG_UDAPL].cbdata = NULL; - - /* successful btl creation */ - mca_btl_udapl_component.udapl_btls[mca_btl_udapl_component.udapl_num_btls] = btl; - if(++mca_btl_udapl_component.udapl_num_btls >= - mca_btl_udapl_component.udapl_max_btls) { - break; - } - } - - /* finished with datinfo */ - free(datinfo); - - /* Make sure we have some interfaces */ - if(0 == mca_btl_udapl_component.udapl_num_btls) { - mca_btl_base_error_no_nics("uDAPL", "NIC"); - free(mca_btl_udapl_component.udapl_btls); - return NULL; - } - - /* publish uDAPL parameters with the MCA framework */ - if (OMPI_SUCCESS != mca_btl_udapl_modex_send()) { - free(mca_btl_udapl_component.udapl_btls); - return NULL; - } - - /* Post OOB receive */ - mca_btl_udapl_endpoint_post_oob_recv(); - - /* return array of BTLs */ - btls = (mca_btl_base_module_t**) malloc(sizeof(mca_btl_base_module_t *) * - mca_btl_udapl_component.udapl_num_btls); - if (NULL == btls) { - free(mca_btl_udapl_component.udapl_btls); - return NULL; - } - - memcpy(btls, mca_btl_udapl_component.udapl_btls, - mca_btl_udapl_component.udapl_num_btls * - sizeof(mca_btl_udapl_module_t *)); - *num_btl_modules = mca_btl_udapl_component.udapl_num_btls; - return btls; -} - - -static int mca_btl_udapl_accept_connect(mca_btl_udapl_module_t* btl, - DAT_CR_HANDLE cr_handle) -{ - DAT_EP_HANDLE ep; - int rc; - mca_btl_base_endpoint_t* proc_ep; - mca_btl_udapl_addr_t priv_data_in_addr; - int32_t priv_data_in_conn_type; /* incoming endpoint type */ - - if (mca_btl_udapl_component.udapl_conn_priv_data) { - DAT_CR_PARAM cr_param; - - /* query the connection request for incoming private data */ - rc = dat_cr_query(cr_handle, - DAT_CR_FIELD_ALL, - &cr_param); - if (rc != DAT_SUCCESS) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_cr_query", - major, minor)); - return OMPI_ERROR; - } - - /* retrieve data from connection request event; - * cr_param contains remote_port_qual but we need to - * match on the psp port and address of remote - * so we get this from the private data. - */ - memcpy(&priv_data_in_addr, - (mca_btl_udapl_addr_t *)cr_param.private_data, - sizeof(mca_btl_udapl_addr_t)); - priv_data_in_conn_type = *(int32_t *) - ((char *)cr_param.private_data + sizeof(mca_btl_udapl_addr_t)); - } - - /* create the endpoint for the incoming connection */ - rc = mca_btl_udapl_endpoint_create(btl, &ep); - if(OMPI_SUCCESS != rc) { - BTL_ERROR(("ERROR: mca_btl_udapl_endpoint_create")); - return OMPI_ERROR; - } - - /* cr_param no longer valid once dat_cr_accept called */ - rc = dat_cr_accept(cr_handle, ep, 0, NULL); - if(DAT_SUCCESS != rc) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_cr_accept", - major, minor)); - return OMPI_ERROR; - } - - if (mca_btl_udapl_component.udapl_conn_priv_data) { - /* With accept now in process find a home for the DAT ep by - * matching against the private data that came in on the - * connection request event - */ - - /* find the endpoint which matches the address in data received */ - proc_ep = - mca_btl_udapl_find_endpoint_address_match(btl, priv_data_in_addr); - - if (proc_ep == NULL) { - return OMPI_ERROR; - } - - if (BTL_UDAPL_EAGER_CONNECTION == priv_data_in_conn_type) { - proc_ep->endpoint_eager = ep; - } else { - assert(BTL_UDAPL_MAX_CONNECTION == priv_data_in_conn_type); - proc_ep->endpoint_max = ep; - } - } - - return OMPI_SUCCESS; -} - - -static inline int mca_btl_udapl_sendrecv(mca_btl_udapl_module_t* btl, - DAT_EP_HANDLE* endpoint) -{ - int rc; - mca_btl_udapl_frag_t* frag; - DAT_DTO_COOKIE cookie; - static int32_t connection_seq = 1; - uint32_t flags = 0; - mca_btl_base_endpoint_t* btl_endpoint = NULL; /* endpoint required by - * mca_btl_udapl_alloc has not - * been created at this point - */ - - /* Post a receive to get the peer's address data */ - frag = (mca_btl_udapl_frag_t*) - mca_btl_udapl_alloc( - &btl->super, - btl_endpoint, - MCA_BTL_NO_ORDER, - sizeof(mca_btl_udapl_addr_t) + - sizeof(int32_t), - flags); - cookie.as_ptr = frag; - - frag->type = MCA_BTL_UDAPL_CONN_RECV; - - rc = dat_ep_post_recv(endpoint, 1, - &frag->triplet, cookie, DAT_COMPLETION_DEFAULT_FLAG); - if(DAT_SUCCESS != rc) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_ep_post_recv", - major, minor)); - return OMPI_ERROR; - } - - - /* Send our local address data over this EP */ - frag = (mca_btl_udapl_frag_t*) - mca_btl_udapl_alloc( - &btl->super, - btl_endpoint, - MCA_BTL_NO_ORDER, - sizeof(mca_btl_udapl_addr_t) + - sizeof(int32_t), - flags); - cookie.as_ptr = frag; - - memcpy(frag->segment.base.seg_addr.pval, - &btl->udapl_addr, sizeof(mca_btl_udapl_addr_t)); - memcpy((char *)frag->segment.base.seg_addr.pval + sizeof(mca_btl_udapl_addr_t), - &connection_seq, sizeof(int32_t)); - connection_seq++; - - frag->type = MCA_BTL_UDAPL_CONN_SEND; - - rc = dat_ep_post_send(endpoint, 1, - &frag->triplet, cookie, DAT_COMPLETION_DEFAULT_FLAG); - if(DAT_SUCCESS != rc) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_ep_post_send", - major, minor)); - return OMPI_ERROR; - } - - return OMPI_SUCCESS; -} - -static inline int mca_btl_udapl_frag_progress_one( - mca_btl_udapl_module_t* udapl_btl, - mca_btl_udapl_frag_t* frag) -{ - int rc; - - switch(frag->type) { - case MCA_BTL_UDAPL_SEND: - rc = mca_btl_udapl_endpoint_send(frag->endpoint, frag); - break; - case MCA_BTL_UDAPL_PUT: - rc = mca_btl_udapl_put(&udapl_btl->super, - frag->endpoint, - &frag->base); - break; - default: - rc = OMPI_ERROR; - BTL_ERROR(("Error : Progressing pending operation, invalid type %d\n", - frag->type)); - break; - } - - return rc; -} - -void mca_btl_udapl_frag_progress_pending(mca_btl_udapl_module_t* udapl_btl, - mca_btl_base_endpoint_t* endpoint, - const int connection) -{ - int len; - int i; - int token_avail; - mca_btl_udapl_frag_t* frag; - - if (BTL_UDAPL_EAGER_CONNECTION == connection) { - len = opal_list_get_size(&endpoint->endpoint_eager_frags); - - /* progress eager frag queue as needed */ - BTL_UDAPL_TOKEN_AVAIL(endpoint, connection, token_avail); - - for(i = 0; i < len && token_avail > 0; i++) { - - OPAL_THREAD_LOCK(&endpoint->endpoint_lock); - frag = (mca_btl_udapl_frag_t*)opal_list_remove_first(&(endpoint->endpoint_eager_frags)); - OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); - if(NULL == frag) { - return; - } - if(mca_btl_udapl_frag_progress_one(udapl_btl, frag) != - OMPI_SUCCESS) { - BTL_ERROR(("ERROR: Not able to progress on connection(%d)\n", - BTL_UDAPL_EAGER_CONNECTION)); - return; - } - BTL_UDAPL_TOKEN_AVAIL(endpoint, connection, token_avail); - } - - } else if (BTL_UDAPL_MAX_CONNECTION == connection) { - len = opal_list_get_size(&endpoint->endpoint_max_frags); - - BTL_UDAPL_TOKEN_AVAIL(endpoint, connection, token_avail); - - /* progress max frag queue as needed */ - for(i = 0; i < len && token_avail > 0; i++) { - - OPAL_THREAD_LOCK(&endpoint->endpoint_lock); - frag = (mca_btl_udapl_frag_t*)opal_list_remove_first(&(endpoint->endpoint_max_frags)); - OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); - if(NULL == frag) { - return; - } - if(mca_btl_udapl_frag_progress_one(udapl_btl, frag) != - OMPI_SUCCESS) { - BTL_ERROR(("ERROR: Not able to progress on connection(%d)\n", - BTL_UDAPL_MAX_CONNECTION)); - return; - } - BTL_UDAPL_TOKEN_AVAIL(endpoint, connection, token_avail); - } - - } else { - BTL_ERROR(("ERROR: Can not progress pending fragment on unknown connection\n")); - } - return; -} - -/* - * uDAPL component progress. - */ - -int mca_btl_udapl_component_progress() -{ - mca_btl_udapl_module_t* btl; - static int32_t inprogress = 0; - DAT_EVENT event; - size_t i; - int32_t j, rdma_ep_count; - int count = 0, btl_ownership; - mca_btl_udapl_frag_t* frag; - mca_btl_base_endpoint_t* endpoint; - - /* prevent deadlock - only one thread should be 'progressing' at a time */ - if(OPAL_THREAD_ADD32(&inprogress, 1) > 1) { - OPAL_THREAD_ADD32(&inprogress, -1); - return OMPI_SUCCESS; - } - - /* check for work to do on each uDAPL btl */ - OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock); - for(i = 0; i < mca_btl_udapl_component.udapl_num_btls; i++) { - btl = mca_btl_udapl_component.udapl_btls[i]; - - /* Check DTO EVD */ - while(DAT_SUCCESS == - dat_evd_dequeue(btl->udapl_evd_dto, &event)) { - DAT_DTO_COMPLETION_EVENT_DATA* dto; - - switch(event.event_number) { - case DAT_DTO_COMPLETION_EVENT: - dto = &event.event_data.dto_completion_event_data; - - frag = dto->user_cookie.as_ptr; - - /* Was the DTO successful? */ - if(DAT_DTO_SUCCESS != dto->status) { - - if (DAT_DTO_ERR_FLUSHED == dto->status) { - - BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_INFORM, - ("DAT_DTO_ERR_FLUSHED: probably OK if occurs during MPI_Finalize().\n")); - } else { - - BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL, - ("ERROR: DAT_DTO_COMPLETION_EVENT: %d %d %lu %p.\n", - dto->status, frag->type, - (unsigned long)frag->size, dto->ep_handle)); - } - return OMPI_ERROR; - } - endpoint = frag->endpoint; - btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - - switch(frag->type) { - case MCA_BTL_UDAPL_RDMA_WRITE: - { - assert(frag->base.des_src == &frag->segment); - assert(frag->base.des_src_cnt == 1); - assert(frag->base.des_dst == NULL); - assert(frag->base.des_dst_cnt == 0); - assert(frag->type == MCA_BTL_UDAPL_RDMA_WRITE); - - frag->base.des_cbfunc(&btl->super, endpoint, - &frag->base, OMPI_SUCCESS); - if( btl_ownership ) { - mca_btl_udapl_free(&btl->super, - &frag->base); - } - - OPAL_THREAD_ADD32(&(endpoint->endpoint_lwqe_tokens[BTL_UDAPL_EAGER_CONNECTION]), 1); - - mca_btl_udapl_frag_progress_pending(btl, - endpoint, BTL_UDAPL_EAGER_CONNECTION); - - break; - } - case MCA_BTL_UDAPL_SEND: - { - int connection = BTL_UDAPL_EAGER_CONNECTION; - - assert(frag->base.des_src == &frag->segment); - assert(frag->base.des_src_cnt == 1); - assert(frag->base.des_dst == NULL); - assert(frag->base.des_dst_cnt == 0); - assert(frag->type == MCA_BTL_UDAPL_SEND); - - if(frag->size != - mca_btl_udapl_component.udapl_eager_frag_size) { - assert(frag->size == - mca_btl_udapl_component.udapl_max_frag_size); - - connection = BTL_UDAPL_MAX_CONNECTION; - } - frag->base.des_cbfunc(&btl->super, endpoint, - &frag->base, OMPI_SUCCESS); - if( btl_ownership ) { - mca_btl_udapl_free(&btl->super, - &frag->base); - } - - OPAL_THREAD_ADD32(&(endpoint->endpoint_lwqe_tokens[connection]), 1); - - mca_btl_udapl_frag_progress_pending(btl, - endpoint, connection); - break; - } - case MCA_BTL_UDAPL_RECV: - { - mca_btl_active_message_callback_t* reg; - int cntrl_msg = -1; - - assert(frag->base.des_dst == &frag->segment); - assert(frag->base.des_dst_cnt == 1); - assert(frag->base.des_src == NULL); - assert(frag->base.des_src_cnt == 0); - assert(frag->type == MCA_BTL_UDAPL_RECV); - assert(frag->triplet.virtual_address == - (DAT_VADDR)(uintptr_t)frag->segment.base.seg_addr.pval); - assert(frag->triplet.segment_length == frag->size); - assert(frag->btl == btl); - - /* setup frag ftr location and do callback */ - frag->segment.base.seg_len = dto->transfered_length - - sizeof(mca_btl_udapl_footer_t); - frag->ftr = (mca_btl_udapl_footer_t *) - ((char *)frag->segment.base.seg_addr.pval + - frag->segment.base.seg_len); - - cntrl_msg = frag->ftr->tag; - - reg = mca_btl_base_active_message_trigger + frag->ftr->tag; - OPAL_THREAD_UNLOCK(&mca_btl_udapl_component.udapl_lock); - - reg->cbfunc(&btl->super, - frag->ftr->tag, &frag->base, reg->cbdata); - OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock); - - /* Repost the frag */ - frag->ftr = frag->segment.base.seg_addr.pval; - frag->segment.base.seg_len = - (frag->size - sizeof(mca_btl_udapl_footer_t) - - sizeof(mca_btl_udapl_rdma_footer_t)); - frag->base.des_flags = 0; - - if(frag->size == - mca_btl_udapl_component.udapl_eager_frag_size) { - - OPAL_THREAD_ADD32(&(frag->endpoint->endpoint_sr_credits[BTL_UDAPL_EAGER_CONNECTION]), 1); - - dat_ep_post_recv(frag->endpoint->endpoint_eager, - 1, &frag->triplet, dto->user_cookie, - DAT_COMPLETION_DEFAULT_FLAG); - - if (frag->endpoint->endpoint_sr_credits[BTL_UDAPL_EAGER_CONNECTION] >= - mca_btl_udapl_component.udapl_sr_win) { - mca_btl_udapl_endpoint_send_sr_credits(frag->endpoint, - BTL_UDAPL_EAGER_CONNECTION); - } - - if (MCA_BTL_TAG_UDAPL == cntrl_msg) { - mca_btl_udapl_frag_progress_pending(btl, - frag->endpoint, - BTL_UDAPL_EAGER_CONNECTION); - } - - } else { - assert(frag->size == - mca_btl_udapl_component.udapl_max_frag_size); - - OPAL_THREAD_ADD32(&(frag->endpoint->endpoint_sr_credits[BTL_UDAPL_MAX_CONNECTION]), 1); - - dat_ep_post_recv(frag->endpoint->endpoint_max, - 1, &frag->triplet, dto->user_cookie, - DAT_COMPLETION_DEFAULT_FLAG); - - if (frag->endpoint->endpoint_sr_credits[BTL_UDAPL_MAX_CONNECTION] >= - mca_btl_udapl_component.udapl_sr_win) { - mca_btl_udapl_endpoint_send_sr_credits(frag->endpoint, - BTL_UDAPL_MAX_CONNECTION); - } - - if (MCA_BTL_TAG_UDAPL == cntrl_msg) { - mca_btl_udapl_frag_progress_pending(btl, - frag->endpoint, - BTL_UDAPL_MAX_CONNECTION); - } - } - - break; - } - case MCA_BTL_UDAPL_PUT: - { - assert(frag->base.des_src == &frag->segment); - assert(frag->base.des_src_cnt == 1); - assert(frag->base.des_dst_cnt == 1); - assert(frag->type == MCA_BTL_UDAPL_PUT); - - frag->base.des_cbfunc(&btl->super, endpoint, - &frag->base, OMPI_SUCCESS); - if( btl_ownership ) { - mca_btl_udapl_free(&btl->super, - &frag->base); - } - - OPAL_THREAD_ADD32(&(endpoint->endpoint_lwqe_tokens[BTL_UDAPL_MAX_CONNECTION]), 1); - OPAL_THREAD_ADD32(&(endpoint->endpoint_sr_tokens[BTL_UDAPL_MAX_CONNECTION]), 1); - - mca_btl_udapl_frag_progress_pending(btl, - endpoint, BTL_UDAPL_MAX_CONNECTION); - - break; - } - case MCA_BTL_UDAPL_CONN_RECV: - mca_btl_udapl_endpoint_finish_connect(btl, - frag->segment.base.seg_addr.pval, - (int32_t *)((char *)frag->segment.base.seg_addr.pval + - sizeof(mca_btl_udapl_addr_t)), - event.event_data.connect_event_data.ep_handle); - /* No break - fall through to free */ - case MCA_BTL_UDAPL_CONN_SEND: - frag->segment.base.seg_len = - mca_btl_udapl_module.super.btl_eager_limit; - mca_btl_udapl_free(&btl->super, &frag->base); - break; - default: - BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_DIAGNOSE, - ("WARNING: unknown frag type: %d\n", - frag->type)); - } - count++; - break; - default: - BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_DIAGNOSE, - ("WARNING: DTO event: %s (%d)\n", - mca_btl_udapl_dat_event_to_string(event.event_number), - event.event_number)); - } - } - - /* Check connection EVD */ - while((btl->udapl_connect_inprogress > 0) && (DAT_SUCCESS == - dat_evd_dequeue(btl->udapl_evd_conn, &event))) { - - switch(event.event_number) { - case DAT_CONNECTION_REQUEST_EVENT: - /* Accept a new connection */ - mca_btl_udapl_accept_connect(btl, - event.event_data.cr_arrival_event_data.cr_handle); - count++; - break; - case DAT_CONNECTION_EVENT_ESTABLISHED: - /* Both the client and server side of a connection generate - this event */ - if (mca_btl_udapl_component.udapl_conn_priv_data) { - /* private data is only valid at this point if this - * event is from a dat_ep_connect call, not an accept - */ - mca_btl_udapl_endpoint_pd_established_conn(btl, - event.event_data.connect_event_data.ep_handle); - } else { - /* explicitly exchange process data */ - mca_btl_udapl_sendrecv(btl, - event.event_data.connect_event_data.ep_handle); - } - count++; - break; - case DAT_CONNECTION_EVENT_PEER_REJECTED: - case DAT_CONNECTION_EVENT_NON_PEER_REJECTED: - case DAT_CONNECTION_EVENT_ACCEPT_COMPLETION_ERROR: - case DAT_CONNECTION_EVENT_DISCONNECTED: - case DAT_CONNECTION_EVENT_BROKEN: - case DAT_CONNECTION_EVENT_TIMED_OUT: - /* handle this case specially? if we have finite timeout, - we might want to try connecting again here. */ - case DAT_CONNECTION_EVENT_UNREACHABLE: - /* Need to set the BTL endpoint to MCA_BTL_UDAPL_FAILED - See dat_ep_connect documentation pdf pg 198 */ - BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL, - ("WARNING: connection event not handled : %s (%d)\n", - mca_btl_udapl_dat_event_to_string(event.event_number), - event.event_number)); - break; - default: - BTL_ERROR(("ERROR: connection event : %s (%d)", - mca_btl_udapl_dat_event_to_string(event.event_number), - event.event_number)); - } - } - - /* Check async EVD */ - if (btl->udapl_async_events == mca_btl_udapl_component.udapl_async_events) { - btl->udapl_async_events = 0; - - while(DAT_SUCCESS == - dat_evd_dequeue(btl->udapl_evd_async, &event)) { - - switch(event.event_number) { - case DAT_ASYNC_ERROR_EVD_OVERFLOW: - case DAT_ASYNC_ERROR_IA_CATASTROPHIC: - case DAT_ASYNC_ERROR_EP_BROKEN: - case DAT_ASYNC_ERROR_TIMED_OUT: - case DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR: - BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL, - ("WARNING: async event ignored : %s (%d)", - mca_btl_udapl_dat_event_to_string(event.event_number), - event.event_number)); - break; - default: - BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL, - ("WARNING: %s (%d)\n", - mca_btl_udapl_dat_event_to_string(event.event_number), - event.event_number)); - } - } - } else { - btl->udapl_async_events++; - } - - /* - * Check eager rdma segments - */ - - /* find the number of endpoints with rdma buffers */ - rdma_ep_count = btl->udapl_eager_rdma_endpoint_count; - - for (j = 0; j < rdma_ep_count; j++) { - mca_btl_udapl_endpoint_t* endpoint; - mca_btl_udapl_frag_t *local_rdma_frag; - - endpoint = - opal_pointer_array_get_item(btl->udapl_eager_rdma_endpoints, j); - - OPAL_THREAD_LOCK(&endpoint->endpoint_eager_rdma_local.lock); - - local_rdma_frag = - MCA_BTL_UDAPL_GET_LOCAL_RDMA_FRAG(endpoint, - endpoint->endpoint_eager_rdma_local.head); - - if (local_rdma_frag->rdma_ftr->active == 1) { - int pad = 0; - mca_btl_active_message_callback_t* reg; - - MCA_BTL_UDAPL_RDMA_NEXT_INDEX(endpoint->endpoint_eager_rdma_local.head); - OPAL_THREAD_UNLOCK(&endpoint->endpoint_eager_rdma_local.lock); - - /* compute pad as needed */ - MCA_BTL_UDAPL_FRAG_CALC_ALIGNMENT_PAD(pad, - (local_rdma_frag->rdma_ftr->size + - sizeof(mca_btl_udapl_footer_t))); - - /* set fragment information */ - local_rdma_frag->ftr = (mca_btl_udapl_footer_t *) - ((char *)local_rdma_frag->rdma_ftr - - pad - - sizeof(mca_btl_udapl_footer_t)); - local_rdma_frag->segment.base.seg_len = - local_rdma_frag->rdma_ftr->size; - local_rdma_frag->segment.base.seg_addr.pval = (unsigned char *) - ((char *)local_rdma_frag->ftr - - local_rdma_frag->segment.base.seg_len); - - /* trigger callback */ - reg = mca_btl_base_active_message_trigger + local_rdma_frag->ftr->tag; - reg->cbfunc(&btl->super, - local_rdma_frag->ftr->tag, &local_rdma_frag->base, reg->cbdata); - - /* repost */ - local_rdma_frag->rdma_ftr->active = 0; - local_rdma_frag->segment.base.seg_len = - mca_btl_udapl_module.super.btl_eager_limit; - local_rdma_frag->base.des_flags = 0; - - /* increment local rdma credits */ - OPAL_THREAD_ADD32(&(endpoint->endpoint_eager_rdma_local.credits), - 1); - - if (endpoint->endpoint_eager_rdma_local.credits >= - mca_btl_udapl_component.udapl_eager_rdma_win) { - mca_btl_udapl_endpoint_send_eager_rdma_credits(endpoint); - } - - count++; - - } else { - OPAL_THREAD_UNLOCK(&endpoint->endpoint_eager_rdma_local.lock); - } - } /* end of rdma_count loop */ - } - - /* unlock and return */ - OPAL_THREAD_UNLOCK(&mca_btl_udapl_component.udapl_lock); - OPAL_THREAD_ADD32(&inprogress, -1); - return count; -} - diff --git a/ompi/mca/btl/udapl/btl_udapl_eager_rdma.h b/ompi/mca/btl/udapl/btl_udapl_eager_rdma.h deleted file mode 100644 index f32bc2d68e..0000000000 --- a/ompi/mca/btl/udapl/btl_udapl_eager_rdma.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BTL_UDAPL_EAGER_RDMA_H -#define MCA_BTL_UDAPL_EAGER_RDMA_H - -/* Open MPI includes */ -#include "ompi/mca/btl/udapl/btl_udapl_endpoint.h" - - -BEGIN_C_DECLS - -/* - * Describe endpoint local memory region. - */ -struct mca_btl_udapl_eager_rdma_local_t { - ompi_ptr_t base; /**< points to fragment structures */ - struct mca_btl_udapl_reg_t* reg; - uint8_t head; /**< RDMA buffer to poll */ - int32_t credits; /**< number of local rdma buffers ready to be reclaimed, - reused. Initially equal to 0. */ - opal_mutex_t lock; /**< protect access to RDMA buffer */ -}; -typedef struct mca_btl_udapl_eager_rdma_local_t mca_btl_udapl_eager_rdma_local_t; - -/* - * Describe endpoint remote memory region. - */ -struct mca_btl_udapl_eager_rdma_remote_t { - ompi_ptr_t base; /**< points to start of data region, not - fragment structures */ - DAT_RMR_CONTEXT rkey; /**< key required to access remote memory */ - uint8_t head; /**< RDMA buffer to use */ - int32_t tokens; /**< number of available rdma buffers, initially equal - to mca parameter eager_rdma_num */ - opal_mutex_t lock; /**< protect access to RDMA buffer */ -}; -typedef struct mca_btl_udapl_eager_rdma_remote_t mca_btl_udapl_eager_rdma_remote_t; - -/* - * Encapsulate data that describes a remote memory region. - */ -struct mca_btl_udapl_eager_rdma_connect_t { - mca_btl_udapl_control_header_t control; - uint32_t rkey; - ompi_ptr_t rdma_start; -}; -typedef struct mca_btl_udapl_eager_rdma_connect_t mca_btl_udapl_eager_rdma_connect_t; - -/* - * Encapsulate data that describes rdma credit information. - */ -struct mca_btl_udapl_eager_rdma_credit_t { - mca_btl_udapl_control_header_t control; - uint32_t credits; -}; -typedef struct mca_btl_udapl_eager_rdma_credit_t mca_btl_udapl_eager_rdma_credit_t; - -#define EAGER_RDMA_BUFFER_AVAILABLE (0) -#define EAGER_RDMA_BUFFER_IN_USE (0xff) - -#define MCA_BTL_UDAPL_RDMA_FRAG_IN_USE(F) do { \ - *(volatile uint8_t*) ((char*)(F) + \ - (mca_btl_udapl_component.udapl_eager_rdma_frag_size - \ - (sizeof(mca_btl_udapl_footer_t)))); \ - } while (0) - -#define MCA_BTL_UDAPL_RDMA_FRAG_ASSIGN_IN_USE(F) do { \ - *(volatile uint8_t*) ((char*)(F) + \ - (mca_btl_udapl_component.udapl_eager_rdma_frag_size- \ - (sizeof(mca_btl_udapl_footer_t)))) = EAGER_RDMA_BUFFER_IN_USE; \ - } while (0) - -#define MCA_BTL_UDAPL_RDMA_FRAG_ASSIGN_AVAILABLE(F) do { \ - *(volatile uint8_t*) ((char*)(F) + \ - (mca_btl_udapl_component.udapl_eager_rdma_frag_size - \ - (sizeof(mca_btl_udapl_footer_t)))) = EAGER_RDMA_BUFFER_AVAILABLE; \ - } while (0) - -/* Retrieve the rdma fragment at location I */ -#define MCA_BTL_UDAPL_GET_LOCAL_RDMA_FRAG(E, I) \ - (mca_btl_udapl_frag_t*) \ - ((char*)(E)->endpoint_eager_rdma_local.base.pval + \ - (I) * sizeof(mca_btl_udapl_frag_eager_rdma_t)) - -/* - * Increment the index I by one while not exceeding the total number of - * available eager rdma fragments - */ -#define MCA_BTL_UDAPL_RDMA_NEXT_INDEX(I) do { \ - (I) = ((I) + 1); \ - if((I) == \ - mca_btl_udapl_component.udapl_eager_rdma_num) \ - (I) = 0; \ - } while (0) - -END_C_DECLS -#endif diff --git a/ompi/mca/btl/udapl/btl_udapl_endpoint.c b/ompi/mca/btl/udapl/btl_udapl_endpoint.c deleted file mode 100644 index 89fbe4d8a4..0000000000 --- a/ompi/mca/btl/udapl/btl_udapl_endpoint.c +++ /dev/null @@ -1,1708 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006 Sandia National Laboratories. All rights - * reserved. - * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" -#include -#include -#include "ompi/types.h" -#include "opal/align.h" - -#include "opal/dss/dss.h" -#include "opal/class/opal_pointer_array.h" - -#include "ompi/class/ompi_free_list.h" -#include "ompi/mca/mpool/grdma/mpool_grdma.h" -#include "ompi/mca/rte/rte.h" - -#include "ompi/mca/btl/base/btl_base_error.h" -#include "btl_udapl.h" -#include "btl_udapl_endpoint.h" -#include "btl_udapl_frag.h" -#include "btl_udapl_mca.h" -#include "btl_udapl_proc.h" - -static void mca_btl_udapl_endpoint_send_cb( - int status, - ompi_process_name_t* endpoint, - opal_buffer_t* buffer, - ompi_rml_tag_t tag, - void* cbdata); -static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint); -static int mca_btl_udapl_endpoint_post_recv( - mca_btl_udapl_endpoint_t* endpoint, - size_t size); -void mca_btl_udapl_endpoint_connect(mca_btl_udapl_endpoint_t* endpoint); -void mca_btl_udapl_endpoint_recv( - int status, - ompi_process_name_t* endpoint, - opal_buffer_t* buffer, - ompi_rml_tag_t tag, - void* cbdata); -static int mca_btl_udapl_endpoint_finish_eager(mca_btl_udapl_endpoint_t*); -static int mca_btl_udapl_endpoint_finish_max(mca_btl_udapl_endpoint_t*); -static mca_btl_base_endpoint_t* mca_btl_udapl_find_endpoint_connection_match( - struct mca_btl_udapl_module_t* btl, - DAT_EP_HANDLE ep); -static int mca_btl_udapl_endpoint_pd_finish_eager( - mca_btl_udapl_endpoint_t* endpoint); -static int mca_btl_udapl_endpoint_pd_finish_max( - mca_btl_udapl_endpoint_t* endpoint); -static int mca_btl_udapl_endpoint_pd_connections_completed( - mca_btl_udapl_endpoint_t* endpoint); -static void mca_btl_udapl_endpoint_connect_eager_rdma( - mca_btl_udapl_endpoint_t* endpoint); -static int mca_btl_udapl_endpoint_write_eager( - mca_btl_base_endpoint_t* endpoint, - mca_btl_udapl_frag_t* frag); -static void -mca_btl_udapl_endpoint_control_send_cb(mca_btl_base_module_t* btl, - mca_btl_base_endpoint_t* endpoint, - mca_btl_base_descriptor_t* descriptor, - int status); -static int mca_btl_udapl_endpoint_send_eager_rdma( - mca_btl_base_endpoint_t* endpoint); -extern void mca_btl_udapl_frag_progress_pending( - mca_btl_udapl_module_t* udapl_btl, - mca_btl_base_endpoint_t* endpoint, - const int connection); - - -/* - * Write a fragment - * - * @param endpoint (IN) BTL addressing information - * @param frag (IN) Fragment to be transferred - * - * @return OMPI_SUCCESS or OMPI_ERROR - */ -int mca_btl_udapl_endpoint_write_eager(mca_btl_base_endpoint_t* endpoint, - mca_btl_udapl_frag_t* frag) -{ - DAT_DTO_COOKIE cookie; - char* remote_buf; - DAT_RMR_TRIPLET remote_buffer; - int rc = OMPI_SUCCESS; - int pad = 0; - uint8_t head = endpoint->endpoint_eager_rdma_remote.head; - size_t size_plus_align = OPAL_ALIGN( - mca_btl_udapl_component.udapl_eager_frag_size, - DAT_OPTIMAL_ALIGNMENT, - size_t); - - /* now that we have the head update it */ - MCA_BTL_UDAPL_RDMA_NEXT_INDEX(endpoint->endpoint_eager_rdma_remote.head); - - MCA_BTL_UDAPL_FRAG_CALC_ALIGNMENT_PAD(pad, - (frag->segment.base.seg_len + sizeof(mca_btl_udapl_footer_t))); - - /* set the rdma footer information */ - frag->rdma_ftr = (mca_btl_udapl_rdma_footer_t *) - ((char *)frag->segment.base.seg_addr.pval + - frag->segment.base.seg_len + - sizeof(mca_btl_udapl_footer_t) + - pad); - frag->rdma_ftr->active = 1; - frag->rdma_ftr->size = frag->segment.base.seg_len; /* this is size PML wants; - * will have to calc - * alignment - * at the other end - */ - - /* prep the fragment to be written out */ - frag->type = MCA_BTL_UDAPL_RDMA_WRITE; - frag->triplet.segment_length = frag->segment.base.seg_len + - sizeof(mca_btl_udapl_footer_t) + - pad + - sizeof(mca_btl_udapl_rdma_footer_t); - - /* set remote_buf to start of the remote write location; - * compute by first finding the end of the entire fragment - * and then working way back - */ - remote_buf = (char *)(endpoint->endpoint_eager_rdma_remote.base.pval) + - (head * size_plus_align) + - frag->size - - frag->triplet.segment_length; - - /* execute transfer with one contiguous write */ - - /* establish remote memory region */ - remote_buffer.rmr_context = - (DAT_RMR_CONTEXT)endpoint->endpoint_eager_rdma_remote.rkey; - remote_buffer.target_address = (DAT_VADDR)(uintptr_t)remote_buf; - remote_buffer.segment_length = frag->triplet.segment_length; - - /* write the data out */ - cookie.as_ptr = frag; - rc = dat_ep_post_rdma_write(endpoint->endpoint_eager, - 1, - &(frag->triplet), - cookie, - &remote_buffer, - DAT_COMPLETION_DEFAULT_FLAG); - if(DAT_SUCCESS != rc) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_ep_post_rdma_write", - major, minor)); - return OMPI_ERROR; - } - - return rc; -} - - -int mca_btl_udapl_endpoint_send(mca_btl_base_endpoint_t* endpoint, - mca_btl_udapl_frag_t* frag) -{ - int rc = OMPI_SUCCESS; - DAT_RETURN dat_rc; - DAT_DTO_COOKIE cookie; - bool call_progress = false; - - /* Fix up the segment length before we do anything with the frag */ - frag->triplet.segment_length = - frag->segment.base.seg_len + sizeof(mca_btl_udapl_footer_t); - - OPAL_THREAD_LOCK(&endpoint->endpoint_lock); - switch(endpoint->endpoint_state) { - case MCA_BTL_UDAPL_CONNECTED: - /* just send it already.. */ - if(frag->size == - mca_btl_udapl_component.udapl_eager_frag_size) { - - if (OPAL_THREAD_ADD32(&endpoint->endpoint_lwqe_tokens[BTL_UDAPL_EAGER_CONNECTION], -1) < 0) { - /* no local work queue tokens available */ - OPAL_THREAD_ADD32(&endpoint->endpoint_lwqe_tokens[BTL_UDAPL_EAGER_CONNECTION], 1); - opal_list_append(&endpoint->endpoint_eager_frags, - (opal_list_item_t*)frag); - call_progress = true; - - } else { - /* work queue tokens available, try to write */ - if(OPAL_THREAD_ADD32(&endpoint->endpoint_eager_rdma_remote.tokens, -1) < 0) { - /* no rdma segment available so either send or queue */ - OPAL_THREAD_ADD32(&endpoint->endpoint_eager_rdma_remote.tokens, 1); - - if(OPAL_THREAD_ADD32(&endpoint->endpoint_sr_tokens[BTL_UDAPL_EAGER_CONNECTION], -1) < 0) { - /* no sr tokens available, put on queue */ - OPAL_THREAD_ADD32(&endpoint->endpoint_lwqe_tokens[BTL_UDAPL_EAGER_CONNECTION], 1); - OPAL_THREAD_ADD32(&endpoint->endpoint_sr_tokens[BTL_UDAPL_EAGER_CONNECTION], 1); - opal_list_append(&endpoint->endpoint_eager_frags, - (opal_list_item_t*)frag); - call_progress = true; - - } else { - /* sr tokens available, send eager size frag */ - cookie.as_ptr = frag; - dat_rc = dat_ep_post_send(endpoint->endpoint_eager, 1, - &frag->triplet, cookie, - DAT_COMPLETION_DEFAULT_FLAG); - - if(DAT_SUCCESS != dat_rc) { - char* major; - char* minor; - - dat_strerror(dat_rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_ep_post_send", - major, minor)); - endpoint->endpoint_state = MCA_BTL_UDAPL_FAILED; - rc = OMPI_ERROR; - } - } - - } else { - rc = mca_btl_udapl_endpoint_write_eager(endpoint, frag); - } - } - - } else { - assert(frag->size == - mca_btl_udapl_component.udapl_max_frag_size); - - if (OPAL_THREAD_ADD32(&endpoint->endpoint_lwqe_tokens[BTL_UDAPL_MAX_CONNECTION], -1) < 0) { - - /* no local work queue tokens available, put on queue */ - OPAL_THREAD_ADD32(&endpoint->endpoint_lwqe_tokens[BTL_UDAPL_MAX_CONNECTION], 1); - opal_list_append(&endpoint->endpoint_max_frags, - (opal_list_item_t*)frag); - call_progress = true; - - } else { - /* work queue tokens available, try to send */ - if(OPAL_THREAD_ADD32(&endpoint->endpoint_sr_tokens[BTL_UDAPL_MAX_CONNECTION], -1) < 0) { - /* no sr tokens available, put on queue */ - OPAL_THREAD_ADD32(&endpoint->endpoint_lwqe_tokens[BTL_UDAPL_MAX_CONNECTION], 1); - OPAL_THREAD_ADD32(&endpoint->endpoint_sr_tokens[BTL_UDAPL_MAX_CONNECTION], 1); - opal_list_append(&endpoint->endpoint_max_frags, - (opal_list_item_t*)frag); - call_progress = true; - - } else { - /* sr tokens available, send max size frag */ - cookie.as_ptr = frag; - dat_rc = dat_ep_post_send(endpoint->endpoint_max, 1, - &frag->triplet, cookie, - DAT_COMPLETION_DEFAULT_FLAG); - - if(DAT_SUCCESS != dat_rc) { - char* major; - char* minor; - - dat_strerror(dat_rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_ep_post_send", - major, minor)); - rc = OMPI_ERROR; - } - } - } - } - - break; - case MCA_BTL_UDAPL_CLOSED: - /* Initiate a new connection, add this send to a queue */ - rc = mca_btl_udapl_start_connect(endpoint); - if(OMPI_SUCCESS != rc) { - endpoint->endpoint_state = MCA_BTL_UDAPL_FAILED; - break; - } - - /* Fall through on purpose to queue the send */ - case MCA_BTL_UDAPL_CONN_EAGER: - case MCA_BTL_UDAPL_CONN_MAX: - /* Add this send to a queue */ - if(frag->size == - mca_btl_udapl_component.udapl_eager_frag_size) { - opal_list_append(&endpoint->endpoint_eager_frags, - (opal_list_item_t*)frag); - } else { - assert(frag->size == - mca_btl_udapl_component.udapl_max_frag_size); - opal_list_append(&endpoint->endpoint_max_frags, - (opal_list_item_t*)frag); - } - - break; - case MCA_BTL_UDAPL_FAILED: - rc = OMPI_ERR_UNREACH; - break; - } - OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); - - if(call_progress) opal_progress(); - - return rc; -} - - -static void mca_btl_udapl_endpoint_send_cb(int status, ompi_process_name_t* endpoint, - opal_buffer_t* buffer, ompi_rml_tag_t tag, void* cbdata) -{ - OBJ_RELEASE(buffer); -} - - -/* - * Set uDAPL endpoint parameters as required in ep_param. Accomplished - * by retrieving the default set of parameters from temporary (dummy) - * endpoint and then setting any other parameters as required by - * this BTL. - * - * @param btl (IN) BTL module - * @param ep_param (IN/OUT)Pointer to a valid endpoint parameter location - * - * @return OMPI_SUCCESS or error status on failure - */ -int mca_btl_udapl_endpoint_get_params(mca_btl_udapl_module_t* btl, - DAT_EP_PARAM* ep_param) -{ - int rc = OMPI_SUCCESS; - int request_dtos; - int max_control_messages; - DAT_EP_HANDLE dummy_ep; - DAT_EP_ATTR* ep_attr = &((*ep_param).ep_attr); - - /* open dummy endpoint, used to find default endpoint parameters */ - rc = dat_ep_create(btl->udapl_ia, - btl->udapl_pz, - btl->udapl_evd_dto, - btl->udapl_evd_dto, - btl->udapl_evd_conn, - NULL, - &dummy_ep); - if (DAT_SUCCESS != rc) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_ep_create", - major, minor)); - /* this could be recoverable, by just using defaults */ - ep_attr = NULL; - return OMPI_ERROR; - } - - rc = dat_ep_query(dummy_ep, - DAT_EP_FIELD_ALL, - ep_param); - if (DAT_SUCCESS != rc) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_ep_query", - major, minor)); - - /* this could be recoverable, by just using defaults */ - ep_attr = NULL; - return OMPI_ERROR; - } - - /* Set max_recv_dtos : - * The max_recv_dtos should be equal to the number of - * outstanding posted receives, which for this BTL will - * be mca_btl_udapl_component.udapl_num_recvs. - */ - if (btl->udapl_max_recv_dtos < - mca_btl_udapl_component.udapl_num_recvs) { - - if (MCA_BTL_UDAPL_MAX_RECV_DTOS_DEFAULT != - btl->udapl_max_recv_dtos) { - - /* user modified, this will fail and is not acceptable */ - BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt", - "max_recv_dtos too low", - true, - btl->udapl_max_recv_dtos, - mca_btl_udapl_component.udapl_num_recvs)); - - btl->udapl_max_recv_dtos = - mca_btl_udapl_component.udapl_num_recvs; - } - - if (MCA_BTL_UDAPL_NUM_RECVS_DEFAULT != - mca_btl_udapl_component.udapl_num_recvs) { - - /* user modified udapl_num_recvs so adjust max_recv_dtos */ - btl->udapl_max_recv_dtos = - mca_btl_udapl_component.udapl_num_recvs; - } - } - - (*ep_attr).max_recv_dtos = btl->udapl_max_recv_dtos; - - /* Set max_request_dtos : - * The max_request_dtos should equal the max number of - * outstanding sends plus RDMA operations. - * - * Note: Using the same value for both EAGER and MAX - * connections even though the MAX connection does not - * have the extra RDMA operations that the EAGER - * connection does. - */ - max_control_messages = - (mca_btl_udapl_component.udapl_num_recvs / - mca_btl_udapl_component.udapl_sr_win) + 1 + - (mca_btl_udapl_component.udapl_eager_rdma_num / - mca_btl_udapl_component.udapl_eager_rdma_win) + 1; - request_dtos = mca_btl_udapl_component.udapl_num_sends + - (2*mca_btl_udapl_component.udapl_eager_rdma_num) + - max_control_messages; - - if (btl->udapl_max_request_dtos < request_dtos) { - if (MCA_BTL_UDAPL_MAX_REQUEST_DTOS_DEFAULT != - mca_btl_udapl_module.udapl_max_request_dtos) { - - /* user has modified */ - BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, - ("help-mpi-btl-udapl.txt", - "max_request_dtos too low", - true, - btl->udapl_max_request_dtos, request_dtos)); - } else { - btl->udapl_max_request_dtos = - mca_btl_udapl_module.udapl_max_request_dtos = request_dtos; - } - } - - if (btl->udapl_max_request_dtos > btl->udapl_ia_attr.max_dto_per_ep) { - /* do not go beyond what is allowed by the system */ - BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt", - "max_request_dtos system max", - true, - btl->udapl_max_request_dtos, - btl->udapl_ia_attr.max_dto_per_ep)); - btl->udapl_max_request_dtos = btl->udapl_ia_attr.max_dto_per_ep; - } - - (*ep_attr).max_request_dtos = btl->udapl_max_request_dtos; - - /* close the dummy endpoint */ - rc = dat_ep_free(dummy_ep); - if (DAT_SUCCESS != rc) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("WARNING: %s %s %s\n", "dat_ep_free", - major, minor)); - /* this could be recoverable, by just using defaults */ - } - - return rc; -} - -/* - * Create a uDAPL endpoint - * - * @param btl (IN) BTL module - * @param ep_endpoint (IN) uDAPL endpoint information - * - * @return OMPI_SUCCESS or error status on failure - */ -int mca_btl_udapl_endpoint_create(mca_btl_udapl_module_t* btl, - DAT_EP_HANDLE* udapl_endpoint) -{ - int rc = OMPI_SUCCESS; - - /* Create a new uDAPL endpoint and start the connection process */ - rc = dat_ep_create(btl->udapl_ia, btl->udapl_pz, - btl->udapl_evd_dto, btl->udapl_evd_dto, btl->udapl_evd_conn, - &(btl->udapl_ep_param.ep_attr), udapl_endpoint); - - if(DAT_SUCCESS != rc) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_ep_create", - major, minor)); - dat_ep_free(udapl_endpoint); - udapl_endpoint = DAT_HANDLE_NULL; - } - - return rc; -} - - -static int mca_btl_udapl_start_connect(mca_btl_base_endpoint_t* endpoint) -{ - mca_btl_udapl_addr_t* addr = &endpoint->endpoint_btl->udapl_addr; - opal_buffer_t* buf = OBJ_NEW(opal_buffer_t); - int rc; - - if(NULL == buf) { - OMPI_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - OPAL_THREAD_ADD32(&(endpoint->endpoint_btl->udapl_connect_inprogress), 1); - - /* Pack our address information */ - rc = opal_dss.pack(buf, &addr->port, 1, OPAL_UINT64); - if(OPAL_SUCCESS != rc) { - OMPI_ERROR_LOG(rc); - return rc; - } - - rc = opal_dss.pack(buf, &addr->addr, sizeof(DAT_SOCK_ADDR), OPAL_UINT8); - if(OPAL_SUCCESS != rc) { - OMPI_ERROR_LOG(rc); - return rc; - } - - /* Send the buffer */ - rc = ompi_rte_send_buffer_nb(&endpoint->endpoint_proc->proc_ompi->proc_name, buf, - OMPI_RML_TAG_UDAPL, 0, mca_btl_udapl_endpoint_send_cb, NULL); - if(0 > rc) { - OMPI_ERROR_LOG(rc); - return rc; - } - - endpoint->endpoint_state = MCA_BTL_UDAPL_CONN_EAGER; - return OMPI_SUCCESS; -} - - -void mca_btl_udapl_endpoint_recv(int status, ompi_process_name_t* endpoint, - opal_buffer_t* buffer, ompi_rml_tag_t tag, void* cbdata) -{ - mca_btl_udapl_addr_t addr; - mca_btl_udapl_proc_t* proc; - mca_btl_base_endpoint_t* ep; - int32_t cnt = 1; - size_t i; - int rc; - - /* Unpack data */ - rc = opal_dss.unpack(buffer, &addr.port, &cnt, OPAL_UINT64); - if(OPAL_SUCCESS != rc) { - OMPI_ERROR_LOG(rc); - return; - } - - cnt = sizeof(mca_btl_udapl_addr_t); - rc = opal_dss.unpack(buffer, &addr.addr, &cnt, OPAL_UINT8); - if(OPAL_SUCCESS != rc) { - OMPI_ERROR_LOG(rc); - return; - } - - /* Match the endpoint and handle it */ - OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock); - for(proc = (mca_btl_udapl_proc_t*) - opal_list_get_first(&mca_btl_udapl_component.udapl_procs); - proc != (mca_btl_udapl_proc_t*) - opal_list_get_end(&mca_btl_udapl_component.udapl_procs); - proc = (mca_btl_udapl_proc_t*)opal_list_get_next(proc)) { - - if(OPAL_EQUAL == ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, &proc->proc_ompi->proc_name, endpoint)) { - for(i = 0; i < proc->proc_endpoint_count; i++) { - ep = proc->proc_endpoints[i]; - - /* Does this endpoint match? Only compare the address - * portion of mca_btl_udapl_addr_t. - */ - if(!memcmp(&addr, &ep->endpoint_addr, - (sizeof(DAT_CONN_QUAL) + sizeof(DAT_SOCK_ADDR)))) { - OPAL_THREAD_UNLOCK(&mca_btl_udapl_component.udapl_lock); - mca_btl_udapl_endpoint_connect(ep); - return; - } - } - } - } - OPAL_THREAD_UNLOCK(&mca_btl_udapl_component.udapl_lock); -} - - -/* - * Set up OOB recv callback. - */ - -void mca_btl_udapl_endpoint_post_oob_recv(void) -{ - ompi_rte_recv_buffer_nb(OMPI_NAME_WILDCARD, OMPI_RML_TAG_UDAPL, - ORTE_RML_PERSISTENT, mca_btl_udapl_endpoint_recv, NULL); -} - - -void mca_btl_udapl_endpoint_connect(mca_btl_udapl_endpoint_t* endpoint) -{ - mca_btl_udapl_module_t* btl = endpoint->endpoint_btl; - int rc; - char *priv_data_ptr = NULL; - DAT_COUNT priv_data_size = 0; - - OPAL_THREAD_LOCK(&endpoint->endpoint_lock); - OPAL_THREAD_ADD32(&(btl->udapl_connect_inprogress), 1); - - /* Nasty test to prevent deadlock and unwanted connection attempts */ - /* This right here is the whole point of using the ORTE/RML handshake */ - if((MCA_BTL_UDAPL_CONN_EAGER == endpoint->endpoint_state && - 0 > ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, - &endpoint->endpoint_proc->proc_ompi->proc_name, - &ompi_proc_local()->proc_name)) || - (MCA_BTL_UDAPL_CLOSED != endpoint->endpoint_state && - MCA_BTL_UDAPL_CONN_EAGER != endpoint->endpoint_state)) { - OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); - return; - } - - /* Create a new uDAPL endpoint and start the connection process */ - rc = mca_btl_udapl_endpoint_create(btl, &endpoint->endpoint_eager); - if(DAT_SUCCESS != rc) { - BTL_ERROR(("mca_btl_udapl_endpoint_create")); - goto failure_create; - } - - /* create private data as required */ - if (mca_btl_udapl_component.udapl_conn_priv_data) { - int32_t priv_data_conn_type = BTL_UDAPL_EAGER_CONNECTION; - - priv_data_size = sizeof(mca_btl_udapl_addr_t) + sizeof(int32_t); - priv_data_ptr = (char *)malloc(priv_data_size); - - if (NULL == priv_data_ptr) { - BTL_ERROR(("ERROR: %s %s\n", "mca_btl_udapl_endpoint_connect", - "out of resources")); - goto failure_create; - } - - /* private data consists of local btl address, listen port (psp), - * and endpoint state to indicate EAGER or MAX endpoint - */ - memcpy(priv_data_ptr, &btl->udapl_addr, sizeof(mca_btl_udapl_addr_t)); - memcpy((priv_data_ptr + sizeof(mca_btl_udapl_addr_t)), - &priv_data_conn_type, sizeof(int32_t)); - } - - rc = dat_ep_connect(endpoint->endpoint_eager, &endpoint->endpoint_addr.addr, - endpoint->endpoint_addr.port, mca_btl_udapl_component.udapl_timeout, - priv_data_size, priv_data_ptr, 0, DAT_CONNECT_DEFAULT_FLAG); - if(DAT_SUCCESS != rc) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_ep_connect", - major, minor)); - goto failure; - } - - endpoint->endpoint_state = MCA_BTL_UDAPL_CONN_EAGER; - OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); - - if (mca_btl_udapl_component.udapl_conn_priv_data) { - free(priv_data_ptr); - } - - return; - -failure: - dat_ep_free(endpoint->endpoint_eager); -failure_create: - endpoint->endpoint_eager = DAT_HANDLE_NULL; - endpoint->endpoint_state = MCA_BTL_UDAPL_FAILED; - OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); - return; -} - - -/* - * Finish establishing a connection - * Note that this routine expects that the mca_btl_udapl_component.udapl.lock - * has been acquired by the callee. - */ - -int mca_btl_udapl_endpoint_finish_connect(struct mca_btl_udapl_module_t* btl, - mca_btl_udapl_addr_t* addr, - int32_t* connection_seq, - DAT_EP_HANDLE endpoint) -{ - mca_btl_base_endpoint_t* ep; - int rc = OMPI_SUCCESS; - - /* find the endpoint which matches the address in data received */ - ep = mca_btl_udapl_find_endpoint_address_match(btl, *addr); - - if (ep == NULL) { - /* If this point is reached, no matching endpoint was found */ - BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL, - ("ERROR: could not match endpoint\n")); - return OMPI_ERROR; - } - - if(MCA_BTL_UDAPL_CONN_EAGER == ep->endpoint_state) { - ep->endpoint_connection_seq = (NULL != connection_seq) ? - *connection_seq:0; - ep->endpoint_eager = endpoint; - rc = mca_btl_udapl_endpoint_finish_eager(ep); - } else if(MCA_BTL_UDAPL_CONN_MAX == ep->endpoint_state) { - /* Check to see order of messages received are in - * the same order the actual connections are made. - * If they are not we need to swap the eager and - * max connections. This inversion is possible due - * to a race condition that one process may actually - * receive the sendrecv messages from the max connection - * before the eager connection. - */ - if (NULL == connection_seq || - ep->endpoint_connection_seq < *connection_seq) { - /* normal order connection matching */ - ep->endpoint_max = endpoint; - } else { - /* inverted order connection matching */ - ep->endpoint_max = ep->endpoint_eager; - ep->endpoint_eager = endpoint; - } - - rc = mca_btl_udapl_endpoint_finish_max(ep); - } else { - BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_DIAGNOSE, - ("ERROR: invalid EP state %d\n", - ep->endpoint_state)); - return OMPI_ERROR; - } - - return rc; -} - - -/* - * Finish setting up an eager connection, start a max connection - */ - -static int mca_btl_udapl_endpoint_finish_eager( - mca_btl_udapl_endpoint_t* endpoint) -{ - mca_btl_udapl_module_t* btl = endpoint->endpoint_btl; - int rc; - - endpoint->endpoint_state = MCA_BTL_UDAPL_CONN_MAX; - OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); - - /* establish eager rdma connection */ - if ((1 == mca_btl_udapl_component.udapl_use_eager_rdma) && - (btl->udapl_eager_rdma_endpoint_count < - mca_btl_udapl_component.udapl_max_eager_rdma_peers)) { - mca_btl_udapl_endpoint_connect_eager_rdma(endpoint); - } - - /* Only one side does dat_ep_connect() */ - if(0 < ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, - &endpoint->endpoint_proc->proc_ompi->proc_name, - &ompi_proc_local()->proc_name)) { - - rc = mca_btl_udapl_endpoint_create(btl, &endpoint->endpoint_max); - if(DAT_SUCCESS != rc) { - endpoint->endpoint_state = MCA_BTL_UDAPL_FAILED; - OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); - return OMPI_ERROR; - } - - rc = dat_ep_connect(endpoint->endpoint_max, - &endpoint->endpoint_addr.addr, endpoint->endpoint_addr.port, - mca_btl_udapl_component.udapl_timeout, - 0, NULL, 0, - DAT_CONNECT_DEFAULT_FLAG); - if(DAT_SUCCESS != rc) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_ep_connect", - major, minor)); - dat_ep_free(endpoint->endpoint_max); - return OMPI_ERROR; - } - } - - return OMPI_SUCCESS; -} - - -static int mca_btl_udapl_endpoint_finish_max(mca_btl_udapl_endpoint_t* endpoint) -{ - int ret = OMPI_SUCCESS; - mca_btl_udapl_module_t* udapl_btl = endpoint->endpoint_btl; - - endpoint->endpoint_state = MCA_BTL_UDAPL_CONNECTED; - OPAL_THREAD_ADD32(&(endpoint->endpoint_btl->udapl_connect_inprogress), -1); - - /* post eager recv buffers */ - ret = mca_btl_udapl_endpoint_post_recv(endpoint, - mca_btl_udapl_component.udapl_eager_frag_size); - if (OMPI_SUCCESS != ret) { - return ret; - } - - /* post max recv buffers */ - ret = mca_btl_udapl_endpoint_post_recv(endpoint, - mca_btl_udapl_component.udapl_max_frag_size); - if (OMPI_SUCCESS != ret) { - return ret; - } - - /* progress eager frag queue as allowed */ - mca_btl_udapl_frag_progress_pending(udapl_btl, endpoint, - BTL_UDAPL_EAGER_CONNECTION); - - /* progress max frag queue as allowed */ - mca_btl_udapl_frag_progress_pending(udapl_btl, endpoint, - BTL_UDAPL_MAX_CONNECTION); - - return ret; -} - - -/* - * Utility routine. Search list of endpoints to find one that matches - * the given address. - * - * @param btl (IN) BTL module - * @param addr (IN) Address used to find endpoint to be returned - * - * @return Pointer to the base endpoint matching addr or NULL - */ -mca_btl_base_endpoint_t* -mca_btl_udapl_find_endpoint_address_match(struct mca_btl_udapl_module_t* btl, - mca_btl_udapl_addr_t addr) -{ - size_t i; - mca_btl_udapl_proc_t *proc; - mca_btl_base_endpoint_t *proc_ep; - mca_btl_base_endpoint_t *endpoint = NULL; - - for(proc = (mca_btl_udapl_proc_t*) - opal_list_get_first(&mca_btl_udapl_component.udapl_procs); - proc != (mca_btl_udapl_proc_t*) - opal_list_get_end(&mca_btl_udapl_component.udapl_procs); - proc = (mca_btl_udapl_proc_t*)opal_list_get_next(proc)) { - - for(i = 0; i < proc->proc_endpoint_count; i++) { - proc_ep = proc->proc_endpoints[i]; - - if(proc_ep->endpoint_btl == btl && - !memcmp(&addr, &proc_ep->endpoint_addr, - (sizeof(DAT_CONN_QUAL) + sizeof(DAT_SOCK_ADDR)))) { - - /* match found */ - endpoint = proc_ep; - return endpoint; - } - } - } - - return endpoint; -} - - -/* - * Utility routine. Search list of endpoints to find one that matches - * the given DAT endpoint handle, this could either be the eager or - * max ep. - * - * @param btl (IN) BTL module - * @param ep (IN) EP handle used to find endpoint to be returned - * - * @return Pointer to the base endpoint matching addr or NULL - */ -static mca_btl_base_endpoint_t* -mca_btl_udapl_find_endpoint_connection_match(struct mca_btl_udapl_module_t* btl, - DAT_EP_HANDLE ep) -{ - size_t i; - mca_btl_udapl_proc_t *proc; - mca_btl_base_endpoint_t *proc_ep; - mca_btl_base_endpoint_t *endpoint = NULL; - - for(proc = (mca_btl_udapl_proc_t*) - opal_list_get_first(&mca_btl_udapl_component.udapl_procs); - proc != (mca_btl_udapl_proc_t*) - opal_list_get_end(&mca_btl_udapl_component.udapl_procs); - proc = (mca_btl_udapl_proc_t*)opal_list_get_next(proc)) { - - for(i = 0; i < proc->proc_endpoint_count; i++) { - proc_ep = proc->proc_endpoints[i]; - - if(proc_ep->endpoint_btl == btl) { - if (ep == proc_ep->endpoint_eager || - ep == proc_ep->endpoint_max) { - /* match found */ - endpoint = proc_ep; - return endpoint; - } else { - continue; - } - } - } - } - - return endpoint; -} - - -/* - * Private Data connection establishment process. Operations to be - * performed once the eager connection of the given endpoint has - * completed. - * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL addressing information - * - * @return OMPI_SUCCESS or error status on failure - */ -static int mca_btl_udapl_endpoint_pd_finish_eager( - mca_btl_udapl_endpoint_t* endpoint) -{ - mca_btl_udapl_module_t* btl = endpoint->endpoint_btl; - int rc = OMPI_SUCCESS; - char *priv_data_ptr = NULL; - DAT_COUNT priv_data_size = 0; - - OPAL_THREAD_LOCK(&endpoint->endpoint_lock); - endpoint->endpoint_state = MCA_BTL_UDAPL_CONN_MAX; - OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); - - /* initiate the eager rdma connection */ - if ((1 == mca_btl_udapl_component.udapl_use_eager_rdma) && - (btl->udapl_eager_rdma_endpoint_count < - mca_btl_udapl_component.udapl_max_eager_rdma_peers)) { - mca_btl_udapl_endpoint_connect_eager_rdma(endpoint); - } - - /* Only one side does dat_ep_connect() and if by chance the - * connection is already established we don't need to bother - * with this. - */ - if((BTL_UDAPL_NUM_CONNECTION != endpoint->endpoint_connections_completed) - && (0 < ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, - &endpoint->endpoint_proc->proc_ompi->proc_name, - &ompi_proc_local()->proc_name))) { - - rc = mca_btl_udapl_endpoint_create(btl, &endpoint->endpoint_max); - if(DAT_SUCCESS != rc) { - endpoint->endpoint_state = MCA_BTL_UDAPL_FAILED; - return OMPI_ERROR; - } - - if (mca_btl_udapl_component.udapl_conn_priv_data) { - int32_t priv_data_conn_type = BTL_UDAPL_MAX_CONNECTION; - - priv_data_size = (sizeof(mca_btl_udapl_addr_t) + sizeof(int32_t)); - priv_data_ptr = (char *)malloc(priv_data_size); - - if (NULL == priv_data_ptr) { - BTL_ERROR(("ERROR: %s %s\n", - "mca_btl_udapl_endpoint_pd_finish_eager", - "out of resources")); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* private data consists of local btl address, listen port (psp), - * and endpoint state to indicate EAGER or MAX endpoint - */ - memcpy(priv_data_ptr, &btl->udapl_addr, - sizeof(mca_btl_udapl_addr_t)); - memcpy((priv_data_ptr + sizeof(mca_btl_udapl_addr_t)), - &priv_data_conn_type, sizeof(int32_t)); - } - - rc = dat_ep_connect(endpoint->endpoint_max, - &endpoint->endpoint_addr.addr, endpoint->endpoint_addr.port, - mca_btl_udapl_component.udapl_timeout, - priv_data_size, priv_data_ptr, 0, - DAT_CONNECT_DEFAULT_FLAG); - - if (mca_btl_udapl_component.udapl_conn_priv_data) { - free(priv_data_ptr); - } - - if(DAT_SUCCESS != rc) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_ep_connect", - major, minor)); - dat_ep_free(endpoint->endpoint_max); - return OMPI_ERROR; - } - } - - /* post eager recv buffers */ - rc = mca_btl_udapl_endpoint_post_recv(endpoint, - mca_btl_udapl_component.udapl_eager_frag_size); - if (OMPI_SUCCESS != rc) { - return rc; - } - - /* Not progressing here because the entire endpoint needs to be - * marked MCA_BTL_UDAPL_CONNECTED, otherwise - * mca_btl_udapl_endpoint_send() will just put queued sends back on - * the queue. - */ - - return OMPI_SUCCESS; -} - - -/* - * Private Data connection establishment process. Operations to be - * performed once the max connection of the given endpoint has - * completed. - * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL addressing information - * - * @return OMPI_SUCCESS or error status on failure - */ -static int -mca_btl_udapl_endpoint_pd_finish_max(mca_btl_udapl_endpoint_t* endpoint) -{ - int rc = OMPI_SUCCESS; - - /* post max recv buffers */ - rc = mca_btl_udapl_endpoint_post_recv(endpoint, - mca_btl_udapl_component.udapl_max_frag_size); - - /* Not progressing here because the entire endpoint needs to be - * marked MCA_BTL_UDAPL_CONNECTED otherwise - * mca_btl_udapl_endpoint_send() will just put queued sends back on - * the queue. - */ - - return rc; -} - - -/* - * Private Data connection establishment process. Operations to be - * performed once both the eager and max max connections of the given - * endpoint has completed. - * - * @param endpoint (IN) BTL addressing information - * - * @return OMPI_SUCCESS or error status on failure */ -static int -mca_btl_udapl_endpoint_pd_connections_completed(mca_btl_udapl_endpoint_t* endpoint) -{ - int rc = OMPI_SUCCESS; - mca_btl_udapl_module_t* udapl_btl = endpoint->endpoint_btl; - - OPAL_THREAD_LOCK(&endpoint->endpoint_lock); - endpoint->endpoint_state = MCA_BTL_UDAPL_CONNECTED; - OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); - - OPAL_THREAD_ADD32(&(endpoint->endpoint_btl->udapl_connect_inprogress), -1); - - /* progress eager frag queue */ - mca_btl_udapl_frag_progress_pending(udapl_btl, endpoint, - BTL_UDAPL_EAGER_CONNECTION); - - /* progress max frag queue */ - mca_btl_udapl_frag_progress_pending(udapl_btl, endpoint, - BTL_UDAPL_MAX_CONNECTION); - - return rc; -} - - -/* - * Private Data connection establishment process. Called once the - * DAT_CONNECTION_EVENT_ESTABLISHED is dequeued from the connecton - * event dispatcher (evd). This event is the local completion event - * for both the dat_ep_connect and dat_cr_accpept calls. - * - * @param btl (IN) BTL module - * @param ep (IN) EP handle used to find endpoint to be returned - * - * @return Pointer to the base endpoint matching addr - */ -int -mca_btl_udapl_endpoint_pd_established_conn(struct mca_btl_udapl_module_t* btl, - DAT_EP_HANDLE established_ep) -{ - int rc = OMPI_SUCCESS; - mca_btl_base_endpoint_t* proc_ep = NULL; - - /* search for ep and decide what to do next */ - proc_ep = - mca_btl_udapl_find_endpoint_connection_match(btl, established_ep); - - if (proc_ep == NULL) { - /* If this point is reached, no matching endpoint was found */ - BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL, - ("ERROR: could not match endpoint\n")); - return OMPI_ERROR; - } - - proc_ep->endpoint_connections_completed++; - - if (established_ep == proc_ep->endpoint_eager) { - rc = mca_btl_udapl_endpoint_pd_finish_eager(proc_ep); - } else if (established_ep == proc_ep->endpoint_max) { - rc = mca_btl_udapl_endpoint_pd_finish_max(proc_ep); - } - - if (rc == OMPI_SUCCESS && BTL_UDAPL_NUM_CONNECTION == - proc_ep->endpoint_connections_completed) { - rc = mca_btl_udapl_endpoint_pd_connections_completed(proc_ep); - } - - return rc; -} - - -/* - * Post receive buffers for a newly established endpoint connection. - */ - -static int mca_btl_udapl_endpoint_post_recv(mca_btl_udapl_endpoint_t* endpoint, - size_t size) -{ - mca_btl_udapl_frag_t* frag = NULL; - DAT_DTO_COOKIE cookie; - DAT_EP_HANDLE ep; - int rc; - int i; - - for(i = 0; i < mca_btl_udapl_component.udapl_num_recvs; i++) { - if(size == mca_btl_udapl_component.udapl_eager_frag_size) { - MCA_BTL_UDAPL_FRAG_ALLOC_EAGER_RECV(endpoint->endpoint_btl, frag); - ep = endpoint->endpoint_eager; - } else { - assert(size == mca_btl_udapl_component.udapl_max_frag_size); - MCA_BTL_UDAPL_FRAG_ALLOC_MAX_RECV(endpoint->endpoint_btl, frag); - ep = endpoint->endpoint_max; - } - - if (NULL == frag) { - BTL_ERROR(("ERROR: %s posting recv, out of resources\n", - "MCA_BTL_UDAPL_ALLOC")); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - assert(size == frag->size); - /* Set up the LMR triplet from the frag segment */ - /* Note that this triplet defines a sub-region of a registered LMR */ - frag->triplet.virtual_address = - (DAT_VADDR)(uintptr_t)frag->segment.base.seg_addr.pval; - frag->triplet.segment_length = frag->size; - - frag->btl = endpoint->endpoint_btl; - frag->endpoint = endpoint; - frag->base.des_dst = &frag->segment.base; - frag->base.des_dst_cnt = 1; - frag->base.des_src = NULL; - frag->base.des_src_cnt = 0; - frag->base.des_flags = 0; - frag->type = MCA_BTL_UDAPL_RECV; - - cookie.as_ptr = frag; - - rc = dat_ep_post_recv(ep, 1, - &frag->triplet, cookie, DAT_COMPLETION_DEFAULT_FLAG); - if(DAT_SUCCESS != rc) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_ep_post_recv", - major, minor)); - return OMPI_ERROR; - } - } - - return OMPI_SUCCESS; -} - - -/* - * Initialize state of the endpoint instance. - * - */ - -static void mca_btl_udapl_endpoint_construct(mca_btl_base_endpoint_t* endpoint) -{ - endpoint->endpoint_btl = 0; - endpoint->endpoint_proc = 0; - - endpoint->endpoint_connection_seq = 0; - endpoint->endpoint_connections_completed = 0;; - endpoint->endpoint_eager_sends = mca_btl_udapl_component.udapl_num_sends; - endpoint->endpoint_max_sends = mca_btl_udapl_component.udapl_num_sends; - - endpoint->endpoint_state = MCA_BTL_UDAPL_CLOSED; - endpoint->endpoint_eager = DAT_HANDLE_NULL; - endpoint->endpoint_max = DAT_HANDLE_NULL; - - endpoint->endpoint_sr_tokens[BTL_UDAPL_EAGER_CONNECTION] = - endpoint->endpoint_eager_sends; - endpoint->endpoint_sr_tokens[BTL_UDAPL_MAX_CONNECTION] = - endpoint->endpoint_max_sends; - endpoint->endpoint_sr_credits[BTL_UDAPL_EAGER_CONNECTION] = 0; - endpoint->endpoint_sr_credits[BTL_UDAPL_MAX_CONNECTION] = 0; - endpoint->endpoint_lwqe_tokens[BTL_UDAPL_EAGER_CONNECTION] = - mca_btl_udapl_component.udapl_num_sends + - (2*mca_btl_udapl_component.udapl_eager_rdma_num); - endpoint->endpoint_lwqe_tokens[BTL_UDAPL_MAX_CONNECTION] = - mca_btl_udapl_component.udapl_num_sends + - (2*mca_btl_udapl_component.udapl_eager_rdma_num); - - OBJ_CONSTRUCT(&endpoint->endpoint_eager_frags, opal_list_t); - OBJ_CONSTRUCT(&endpoint->endpoint_max_frags, opal_list_t); - OBJ_CONSTRUCT(&endpoint->endpoint_lock, opal_mutex_t); - - /* initialize eager RDMA */ - memset(&endpoint->endpoint_eager_rdma_local, 0, - sizeof(mca_btl_udapl_eager_rdma_local_t)); - memset (&endpoint->endpoint_eager_rdma_remote, 0, - sizeof(mca_btl_udapl_eager_rdma_remote_t)); - OBJ_CONSTRUCT(&endpoint->endpoint_eager_rdma_local.lock, opal_mutex_t); - OBJ_CONSTRUCT(&endpoint->endpoint_eager_rdma_remote.lock, opal_mutex_t); -} - -/* - * Destroy a endpoint - * - */ - -static void mca_btl_udapl_endpoint_destruct(mca_btl_base_endpoint_t* endpoint) -{ - mca_btl_udapl_module_t* udapl_btl = endpoint->endpoint_btl; - mca_mpool_base_registration_t *reg = - (mca_mpool_base_registration_t*)endpoint->endpoint_eager_rdma_local.reg; - - OBJ_DESTRUCT(&endpoint->endpoint_eager_frags); - OBJ_DESTRUCT(&endpoint->endpoint_max_frags); - OBJ_DESTRUCT(&endpoint->endpoint_lock); - - /* release eager rdma resources */ - if (NULL != reg) { - udapl_btl->super.btl_mpool->mpool_free(udapl_btl->super.btl_mpool, - NULL, reg); - } - - if (NULL != endpoint->endpoint_eager_rdma_local.base.pval) { - free(endpoint->endpoint_eager_rdma_local.base.pval); - } -} - - -/* - * Release the fragment used to send the eager rdma control message. - * Callback to be executed upon receiving local completion event - * from sending a control message operation. Should essentially do - * the same thing as mca_btl_udapl_free(). - * - * @param btl (IN) BTL module - * @param endpoint (IN) BTL addressing information - * @param descriptor (IN) Description of the data to be transferred - * @param status (IN/OUT) - */ -static void mca_btl_udapl_endpoint_control_send_cb( - mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* endpoint, - struct mca_btl_base_descriptor_t* descriptor, - int status) -{ - int connection = BTL_UDAPL_EAGER_CONNECTION; - mca_btl_udapl_frag_t* frag = (mca_btl_udapl_frag_t*)descriptor; - - if(frag->size != mca_btl_udapl_component.udapl_eager_frag_size) { - connection = BTL_UDAPL_MAX_CONNECTION; - } - - /* control messages are not part of the regular accounting - * so here we subtract because the addition was made during - * the send completion during progress */ - OPAL_THREAD_ADD32(&(endpoint->endpoint_lwqe_tokens[connection]), -1); - - MCA_BTL_UDAPL_FRAG_RETURN_CONTROL(((mca_btl_udapl_module_t*)btl), - ((mca_btl_udapl_frag_t*)descriptor)); -} - -/* - * Allocate and initialize descriptor to be used in sending uDAPL BTL - * control messages. Should essentially accomplish same as would be - * from calling mca_btl_udapl_alloc(). - * - * @param btl (IN) BTL module - * @param size (IN) Size of segment required to be transferred - * - * @return descriptor (IN) Description of the data to be transferred - */ -static mca_btl_base_descriptor_t* mca_btl_udapl_endpoint_initialize_control_message( - struct mca_btl_base_module_t* btl, - size_t size) -{ - mca_btl_udapl_module_t* udapl_btl = (mca_btl_udapl_module_t*) btl; - mca_btl_udapl_frag_t* frag; - int pad = 0; - - /* compute pad as needed */ - MCA_BTL_UDAPL_FRAG_CALC_ALIGNMENT_PAD(pad, - (size + sizeof(mca_btl_udapl_footer_t))); - - /* control messages size should never be greater than eager message size */ - assert((size+pad) <= btl->btl_eager_limit); - - MCA_BTL_UDAPL_FRAG_ALLOC_CONTROL(udapl_btl, frag); - - /* Set up the LMR triplet from the frag segment */ - frag->segment.base.seg_len = (uint32_t)size; - frag->triplet.virtual_address = - (DAT_VADDR)(uintptr_t)frag->segment.base.seg_addr.pval; - - /* assume send/recv as default when computing segment_length */ - frag->triplet.segment_length = - frag->segment.base.seg_len + sizeof(mca_btl_udapl_footer_t); - - assert(frag->triplet.lmr_context == - ((mca_btl_udapl_reg_t*)frag->registration)->lmr_triplet.lmr_context); - - frag->btl = udapl_btl; - frag->base.des_src = &frag->segment.base; - frag->base.des_src_cnt = 1; - frag->base.des_dst = NULL; - frag->base.des_dst_cnt = 0; - frag->base.des_flags = 0; - frag->base.des_cbfunc = mca_btl_udapl_endpoint_control_send_cb; - frag->base.des_cbdata = NULL; - - return &frag->base; -} - -/* - * Transfer the given endpoints rdma segment information. Expects that - * the endpoints rdma segment has previoulsy been created and - * registered as required. - * - * @param endpoint (IN) BTL addressing information - * - * @return OMPI_SUCCESS or error status on failure - */ -static int mca_btl_udapl_endpoint_send_eager_rdma( - mca_btl_base_endpoint_t* endpoint) -{ - mca_btl_udapl_eager_rdma_connect_t* rdma_connect; - mca_btl_base_descriptor_t* des; - mca_btl_udapl_segment_t* segment; - mca_btl_udapl_frag_t* data_frag; - mca_btl_udapl_frag_t* local_frag = (mca_btl_udapl_frag_t*)endpoint->endpoint_eager_rdma_local.base.pval; - mca_btl_udapl_module_t* udapl_btl = endpoint->endpoint_btl; - size_t cntrl_msg_size = sizeof(mca_btl_udapl_eager_rdma_connect_t); - int rc = OMPI_SUCCESS; - - des = mca_btl_udapl_endpoint_initialize_control_message( - &udapl_btl->super, cntrl_msg_size); - - des->des_flags = 0; - des->des_cbfunc = mca_btl_udapl_endpoint_control_send_cb; - des->des_cbdata = NULL; - - /* fill in data */ - segment = des->des_src; - rdma_connect = - (mca_btl_udapl_eager_rdma_connect_t*)segment->base.seg_addr.pval; - rdma_connect->control.type = - MCA_BTL_UDAPL_CONTROL_RDMA_CONNECT; - rdma_connect->rkey = - endpoint->endpoint_eager_rdma_local.reg->rmr_context; - rdma_connect->rdma_start.pval = - (unsigned char*)local_frag->base.super.ptr; - - /* prep fragment and put on queue */ - data_frag = (mca_btl_udapl_frag_t*)des; - data_frag->endpoint = endpoint; - data_frag->ftr = (mca_btl_udapl_footer_t *) - ((char *)data_frag->segment.base.seg_addr.pval + - data_frag->segment.base.seg_len); - data_frag->ftr->tag = MCA_BTL_TAG_UDAPL; - data_frag->type = MCA_BTL_UDAPL_SEND; - - OPAL_THREAD_LOCK(&endpoint->endpoint_lock); - opal_list_append(&endpoint->endpoint_eager_frags, - (opal_list_item_t*)data_frag); - OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); - - return rc; -} - -/* - * Endpoint handed in is the local process peer. This routine - * creates and initializes a local memory region which will be used for - * reading from locally. This memory region will be made available to peer - * for writing into by sending a description of the area to the given - * endpoint. - * - * Note: The local memory region is actually two areas, one is a - * contiguous memory region containing only the fragment structures. A - * pointer to the first fragment structure is held here: - * endpoint->endpoint_eager_rdma_local.base.pval. Each of these - * fragment structures will contain a pointer, - * frag->segment.base.seg_addr.pval set during a call to OBJ_CONSTRUCT(), - * to its associated data region. The data region for all fragments - * will be contiguous and created by accessing the mpool. - * - * @param endpoint (IN) BTL addressing information - */ -void mca_btl_udapl_endpoint_connect_eager_rdma( - mca_btl_udapl_endpoint_t* endpoint) -{ - char* buf; - char* alloc_ptr; - size_t size_plus_align; - int i; - uint32_t flags = MCA_MPOOL_FLAGS_CACHE_BYPASS; - mca_btl_udapl_module_t* udapl_btl = endpoint->endpoint_btl; - - OPAL_THREAD_LOCK(&endpoint->endpoint_eager_rdma_local.lock); - if (endpoint->endpoint_eager_rdma_local.base.pval) - goto unlock_rdma_local; - - if (mca_btl_udapl_component.udapl_eager_rdma_num <= 0) { - /* NOTE: Need to find a more generic way to check ranges - * for all mca parameters. - */ - BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, ("help-mpi-btl-udapl.txt", - "invalid num rdma segments", - true, - mca_btl_udapl_component.udapl_eager_rdma_num)); - goto unlock_rdma_local; - } - - /* create space for fragment structures */ - alloc_ptr = (char*)malloc(mca_btl_udapl_component.udapl_eager_rdma_num * - sizeof(mca_btl_udapl_frag_eager_rdma_t)); - - if(NULL == alloc_ptr) { - goto unlock_rdma_local; - } - - /* get size of one fragment's data region */ - size_plus_align = OPAL_ALIGN( - mca_btl_udapl_component.udapl_eager_frag_size, - DAT_OPTIMAL_ALIGNMENT, size_t); - - /* set flags value accordingly if ro aware */ - if (mca_btl_udapl_component.ro_aware_system) { - flags |= MCA_MPOOL_FLAGS_SO_MEM; - } - - /* create and register memory for all rdma segments */ - buf = udapl_btl->super.btl_mpool->mpool_alloc(udapl_btl->super.btl_mpool, - (size_plus_align * mca_btl_udapl_component.udapl_eager_rdma_num), - 0, flags, - (mca_mpool_base_registration_t**)&endpoint->endpoint_eager_rdma_local.reg); - - if(!buf) - goto unlock_rdma_local; - - /* initialize the rdma segments */ - for(i = 0; i < mca_btl_udapl_component.udapl_eager_rdma_num; i++) { - mca_btl_udapl_frag_eager_rdma_t* local_rdma_frag; - ompi_free_list_item_t *item = (ompi_free_list_item_t *)(alloc_ptr + - i*sizeof(mca_btl_udapl_frag_eager_rdma_t)); - item->registration = (void*)endpoint->endpoint_eager_rdma_local.reg; - item->ptr = buf + i * size_plus_align; - OBJ_CONSTRUCT(item, mca_btl_udapl_frag_eager_rdma_t); - - local_rdma_frag = ((mca_btl_udapl_frag_eager_rdma_t*)item); - - local_rdma_frag->base.des_dst = &local_rdma_frag->segment.base; - local_rdma_frag->base.des_dst_cnt = 1; - local_rdma_frag->base.des_src = NULL; - local_rdma_frag->base.des_src_cnt = 0; - local_rdma_frag->btl = endpoint->endpoint_btl; - - - local_rdma_frag->endpoint = endpoint; - local_rdma_frag->type = MCA_BTL_UDAPL_FRAG_EAGER_RDMA; - local_rdma_frag->triplet.segment_length = local_rdma_frag->size; - } - - OPAL_THREAD_LOCK(&udapl_btl->udapl_eager_rdma_lock); - endpoint->endpoint_eager_rdma_index = - opal_pointer_array_add(udapl_btl->udapl_eager_rdma_endpoints, endpoint); - if( 0 > endpoint->endpoint_eager_rdma_index ) - goto cleanup; - - /* record first fragment location */ - endpoint->endpoint_eager_rdma_local.base.pval = alloc_ptr; - udapl_btl->udapl_eager_rdma_endpoint_count++; - - /* send the relevant data describing the registered space to the endpoint */ - if (mca_btl_udapl_endpoint_send_eager_rdma(endpoint) == 0) { - OPAL_THREAD_UNLOCK(&udapl_btl->udapl_eager_rdma_lock); - OPAL_THREAD_UNLOCK(&endpoint->endpoint_eager_rdma_local.lock); - return; - } - - udapl_btl->udapl_eager_rdma_endpoint_count--; - endpoint->endpoint_eager_rdma_local.base.pval = NULL; - opal_pointer_array_set_item(udapl_btl->udapl_eager_rdma_endpoints, - endpoint->endpoint_eager_rdma_index, NULL); - -cleanup: - /* this would fail if we hit the max and can not add anymore to the array - * and this could happen because we do not lock before checking if max has - * been reached - */ - free(alloc_ptr); - endpoint->endpoint_eager_rdma_local.base.pval = NULL; - OPAL_THREAD_UNLOCK(&udapl_btl->udapl_eager_rdma_lock); - udapl_btl->super.btl_mpool->mpool_free(udapl_btl->super.btl_mpool, - buf, - (mca_mpool_base_registration_t*)endpoint->endpoint_eager_rdma_local.reg); - - unlock_rdma_local: - OPAL_THREAD_UNLOCK(&endpoint->endpoint_eager_rdma_local.lock); - -} - -/* - * Send control message with the number of credits available on the - * endpoint. Update the credit value accordingly. - * - * @param endpoint (IN) BTL addressing information - * - * @return OMPI_SUCCESS or error status on failure - */ -int mca_btl_udapl_endpoint_send_eager_rdma_credits( - mca_btl_base_endpoint_t* endpoint) -{ - mca_btl_udapl_eager_rdma_credit_t *rdma_credit; - mca_btl_base_descriptor_t* des; - mca_btl_udapl_segment_t* segment; - DAT_DTO_COOKIE cookie; - mca_btl_udapl_frag_t* frag; - mca_btl_udapl_module_t* udapl_btl = endpoint->endpoint_btl; - size_t cntrl_msg_size = sizeof(mca_btl_udapl_eager_rdma_credit_t); - int rc = OMPI_SUCCESS; - - des = mca_btl_udapl_endpoint_initialize_control_message( - &udapl_btl->super, cntrl_msg_size); - - /* fill in data */ - segment = des->des_src; - rdma_credit = (mca_btl_udapl_eager_rdma_credit_t*)segment->base.seg_addr.pval; - rdma_credit->control.type = MCA_BTL_UDAPL_CONTROL_RDMA_CREDIT; - rdma_credit->credits = endpoint->endpoint_eager_rdma_local.credits; - - /* reset local credits value */ - OPAL_THREAD_LOCK(&endpoint->endpoint_eager_rdma_local.lock); - endpoint->endpoint_eager_rdma_local.credits -= rdma_credit->credits; - - /* prep and send fragment : control messages do not count - * against the token/credit number so do not subtract from tokens - * with this send - */ - frag = (mca_btl_udapl_frag_t*)des; - frag->endpoint = endpoint; - frag->ftr = (mca_btl_udapl_footer_t *) - ((char *)frag->segment.base.seg_addr.pval + frag->segment.base.seg_len); - frag->ftr->tag = MCA_BTL_TAG_UDAPL; - frag->type = MCA_BTL_UDAPL_SEND; - cookie.as_ptr = frag; - - rc = dat_ep_post_send(endpoint->endpoint_eager, 1, - &frag->triplet, cookie, - DAT_COMPLETION_DEFAULT_FLAG); - - OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); - - if(DAT_SUCCESS != rc) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_ep_post_send", - major, minor)); - endpoint->endpoint_state = MCA_BTL_UDAPL_FAILED; - rc = OMPI_ERROR; - } - - return rc; -} - -/* - * Send control message with the number of credits available on the - * endpoint. Update the credit value accordingly. - * - * @param endpoint (IN) BTL addressing information - * - * @param connection (IN) 0 for eager and 1 for max connection - * - * @return OMPI_SUCCESS or error status on failure - */ -int mca_btl_udapl_endpoint_send_sr_credits( - mca_btl_base_endpoint_t* endpoint, const int connection) -{ - mca_btl_udapl_sr_credit_t *sr_credit; - mca_btl_base_descriptor_t* des; - mca_btl_udapl_segment_t* segment; - DAT_DTO_COOKIE cookie; - mca_btl_udapl_frag_t* frag; - mca_btl_udapl_module_t* udapl_btl = endpoint->endpoint_btl; - size_t cntrl_msg_size = sizeof(mca_btl_udapl_sr_credit_t); - int rc = OMPI_SUCCESS; - - des = mca_btl_udapl_endpoint_initialize_control_message( - &udapl_btl->super, cntrl_msg_size); - - /* fill in data */ - segment = des->des_src; - sr_credit = (mca_btl_udapl_sr_credit_t*)segment->base.seg_addr.pval; - sr_credit->control.type = MCA_BTL_UDAPL_CONTROL_SR_CREDIT; - OPAL_THREAD_LOCK(&endpoint->endpoint_lock); - sr_credit->credits = endpoint->endpoint_sr_credits[connection]; - sr_credit->connection = connection; - - /* reset local credits value */ - endpoint->endpoint_sr_credits[connection] = 0; - - /* prep and send fragment : control messages do not count - * against the token/credit count so do not subtract from tokens - * with this send - */ - frag = (mca_btl_udapl_frag_t*)des; - frag->endpoint = endpoint; - frag->ftr = (mca_btl_udapl_footer_t *) - ((char *)frag->segment.base.seg_addr.pval + frag->segment.base.seg_len); - frag->ftr->tag = MCA_BTL_TAG_UDAPL; - frag->type = MCA_BTL_UDAPL_SEND; - cookie.as_ptr = frag; - - if (BTL_UDAPL_EAGER_CONNECTION == connection) { - rc = dat_ep_post_send(endpoint->endpoint_eager, 1, - &frag->triplet, cookie, - DAT_COMPLETION_DEFAULT_FLAG); - - } else { - assert(BTL_UDAPL_MAX_CONNECTION == connection); - rc = dat_ep_post_send(endpoint->endpoint_max, 1, - &frag->triplet, cookie, - DAT_COMPLETION_DEFAULT_FLAG); - } - - OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); - - if(DAT_SUCCESS != rc) { - char* major; - char* minor; - - dat_strerror(rc, (const char**)&major, - (const char**)&minor); - BTL_ERROR(("ERROR: %s %s %s\n", "dat_ep_post_send", - major, minor)); - endpoint->endpoint_state = MCA_BTL_UDAPL_FAILED; - rc = OMPI_ERROR; - } - - return rc; -} - - -OBJ_CLASS_INSTANCE( - mca_btl_udapl_endpoint_t, - opal_list_item_t, - mca_btl_udapl_endpoint_construct, - mca_btl_udapl_endpoint_destruct); - diff --git a/ompi/mca/btl/udapl/btl_udapl_endpoint.h b/ompi/mca/btl/udapl/btl_udapl_endpoint.h deleted file mode 100644 index 830cec877c..0000000000 --- a/ompi/mca/btl/udapl/btl_udapl_endpoint.h +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006 Sandia National Laboratories. All rights - * reserved. - * Copyright (c) 2006-2009 Sun Microsystems, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BTL_UDAPL_ENDPOINT_H -#define MCA_BTL_UDAPL_ENDPOINT_H - -#include "opal/class/opal_list.h" -#include "opal/mca/event/event.h" -#include "ompi/mca/btl/base/btl_base_error.h" -#include "btl_udapl_frag.h" -#include "btl_udapl.h" -#include "btl_udapl_eager_rdma.h" - -BEGIN_C_DECLS - - -#define BTL_UDAPL_TOKEN_AVAIL(E, C, T) \ -do { \ - (T) = 0; \ - if ( (E)->endpoint_lwqe_tokens[(C)] > 0 && \ - ((E)->endpoint_sr_tokens[(C)] + \ - (((C) == BTL_UDAPL_EAGER_CONNECTION)?(E)->endpoint_eager_rdma_remote.tokens:0)) \ - ) { \ - (T) = 1; \ - } \ -} while (0) - -/** - * Structure used to publish uDAPL id information to peers. - */ -struct mca_btl_udapl_addr_t { - DAT_CONN_QUAL port; - DAT_SOCK_ADDR addr; - bool inuse; -}; -typedef struct mca_btl_udapl_addr_t mca_btl_udapl_addr_t; - - -/** - * State of uDAPL endpoint connection. - */ - -typedef enum { - MCA_BTL_UDAPL_CONN_EAGER, - MCA_BTL_UDAPL_CONN_MAX, - MCA_BTL_UDAPL_CONNECTED, - MCA_BTL_UDAPL_CLOSED, - MCA_BTL_UDAPL_FAILED -} mca_btl_udapl_endpoint_state_t; - -/* - * Establish a name for the 2 connections opened per peer - */ -typedef enum { - BTL_UDAPL_EAGER_CONNECTION, - BTL_UDAPL_MAX_CONNECTION, - BTL_UDAPL_NUM_CONNECTION -} mca_btl_udapl_endpoint_conn_t; - -/* - * Encapsulate data that describes sendrecv credit information. - */ -struct mca_btl_udapl_sr_credit_t { - mca_btl_udapl_control_header_t control; - uint32_t credits; - int connection; /* 0 == BTL_UDAPL_EAGER_CONNECTION; - 1 == BTL_UDAPL_MAX_CONNECTION */ -}; -typedef struct mca_btl_udapl_sr_credit_t mca_btl_udapl_sr_credit_t; - -/** - * An abstraction that represents a connection to a endpoint process. - * An instance of mca_btl_base_endpoint_t is associated w/ each process - * and BTL pair at startup. However, connections to the endpoint - * are established dynamically on an as-needed basis: -*/ - -struct mca_btl_base_endpoint_t { - opal_list_item_t super; - - struct mca_btl_udapl_module_t* endpoint_btl; - /**< BTL instance that created this connection */ - - struct mca_btl_udapl_proc_t* endpoint_proc; - /**< proc structure corresponding to endpoint */ - - mca_btl_udapl_endpoint_state_t endpoint_state; - /**< current state of the endpoint connection */ - - opal_list_t endpoint_eager_frags; - opal_list_t endpoint_max_frags; - /**< pending send frags on this endpoint */ - - int32_t endpoint_eager_sends; - int32_t endpoint_max_sends; - /**< number of sends that may be posted */ - - int32_t endpoint_sr_tokens[BTL_UDAPL_NUM_CONNECTION]; - /**< number of sends that may be posted */ - - int32_t endpoint_sr_credits[BTL_UDAPL_NUM_CONNECTION]; - /**< number of recvs that are now available */ - - int32_t endpoint_lwqe_tokens[BTL_UDAPL_NUM_CONNECTION]; - /**< number of local work queue credits available (combination of - posted sends and rdma writes allowed per endpoint */ - - int32_t endpoint_connection_seq; - /**< sequence number of sendrecv message for the connection est */ - - int32_t endpoint_connections_completed; - /**< count of completed connections for priv data connection est. */ - - opal_mutex_t endpoint_lock; - /**< lock for concurrent access to endpoint state */ - - mca_btl_udapl_addr_t endpoint_addr; - /**< remote address on the other side of this endpoint */ - - DAT_EP_HANDLE endpoint_eager; - DAT_EP_HANDLE endpoint_max; - /**< uDAPL endpoint handle */ - - int32_t endpoint_eager_rdma_index; - /**< index into array of endpoints with RDMA buffers */ - mca_btl_udapl_eager_rdma_local_t endpoint_eager_rdma_local; - /**< info about local RDMA buffer */ - mca_btl_udapl_eager_rdma_remote_t endpoint_eager_rdma_remote; - /**< info about remote RDMA buffer */ -}; - -typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t; -typedef mca_btl_base_endpoint_t mca_btl_udapl_endpoint_t; - -OBJ_CLASS_DECLARATION(mca_btl_udapl_endpoint_t); - - -/* - * Start sending data on an endpoint. - */ - -int mca_btl_udapl_endpoint_send(mca_btl_base_endpoint_t* endpoint, - mca_btl_udapl_frag_t* frag); - -/* - * Set up OOB recv callback. - */ - -void mca_btl_udapl_endpoint_post_oob_recv(void); - -/* - * Finish establishing a connection - */ - -int mca_btl_udapl_endpoint_finish_connect(struct mca_btl_udapl_module_t* btl, - mca_btl_udapl_addr_t* addr, - int32_t* seq, - DAT_EP_HANDLE endpoint); - -/* - * Send number of eager rdma credits - */ -int mca_btl_udapl_endpoint_send_eager_rdma_credits(mca_btl_base_endpoint_t* endpoint); - -/* - * Establish uDAPL endpoint parameters - */ -int mca_btl_udapl_endpoint_get_params(struct mca_btl_udapl_module_t* btl, - DAT_EP_PARAM* ep_param); - -/* - * Create uDAPL endpoint - */ -int mca_btl_udapl_endpoint_create(struct mca_btl_udapl_module_t* btl, - DAT_EP_HANDLE* udapl_endpoint); - - /* - * Send number of send recv credits - */ -int mca_btl_udapl_endpoint_send_sr_credits(mca_btl_base_endpoint_t* endpoint, - const int connection); - -/* - * Handle the established DAT endpoint when private data is in use - */ -int mca_btl_udapl_endpoint_pd_established_conn( - struct mca_btl_udapl_module_t* btl, - DAT_EP_HANDLE established_ep); - -/* - * Utility routine. Search list of endpoints to find one that matches - * the given address. - */ -mca_btl_udapl_endpoint_t* mca_btl_udapl_find_endpoint_address_match( - struct mca_btl_udapl_module_t* btl, - mca_btl_udapl_addr_t addr); - -END_C_DECLS -#endif diff --git a/ompi/mca/btl/udapl/btl_udapl_frag.c b/ompi/mca/btl/udapl/btl_udapl_frag.c deleted file mode 100644 index 7ad6f6372c..0000000000 --- a/ompi/mca/btl/udapl/btl_udapl_frag.c +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006 Sandia National Laboratories. All rights - * reserved. - * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "btl_udapl.h" -#include "btl_udapl_frag.h" - - -static void mca_btl_udapl_frag_common_constructor(mca_btl_udapl_frag_t* frag) -{ - mca_btl_udapl_reg_t* reg = - (mca_btl_udapl_reg_t*)frag->base.super.registration; - -#if OPAL_ENABLE_DEBUG - frag->base.des_src = NULL; - frag->base.des_src_cnt = 0; - frag->base.des_dst = NULL; - frag->base.des_dst_cnt = 0; - frag->base.des_flags = 0; -#endif - - frag->registration = reg; - frag->segment.base.seg_addr.pval = (unsigned char*)frag->base.super.ptr; - frag->ftr = NULL; - - /* Don't understand why yet, but there are cases where reg is NULL - - that is, this memory has not been registered. So be careful not - to dereference a NULL pointer. */ - if(NULL != reg) { - /* Save the LMR context so we can set up LMR subset triplets later */ - frag->triplet.lmr_context = reg->lmr_triplet.lmr_context; - } -} - -static void mca_btl_udapl_frag_eager_constructor(mca_btl_udapl_frag_t* frag) -{ - frag->segment.base.seg_len = mca_btl_udapl_module.super.btl_eager_limit; - frag->size = mca_btl_udapl_component.udapl_eager_frag_size; - mca_btl_udapl_frag_common_constructor(frag); -} - -static void mca_btl_udapl_frag_max_constructor(mca_btl_udapl_frag_t* frag) -{ - frag->segment.base.seg_len = mca_btl_udapl_module.super.btl_max_send_size; - frag->size = mca_btl_udapl_component.udapl_max_frag_size; - mca_btl_udapl_frag_common_constructor(frag); -} - -static void mca_btl_udapl_frag_user_constructor(mca_btl_udapl_frag_t* frag) -{ - mca_btl_udapl_frag_common_constructor(frag); - frag->segment.base.seg_len = 0; - frag->segment.base.seg_addr.pval = NULL; - frag->ftr = NULL; - frag->size = 0; - frag->registration = NULL; -} - -static void mca_btl_udapl_frag_eager_rdma_constructor(mca_btl_udapl_frag_t* frag) -{ - mca_btl_udapl_frag_eager_constructor(frag); - frag->segment.base.seg_len = mca_btl_udapl_module.super.btl_eager_limit; - frag->size = mca_btl_udapl_component.udapl_eager_frag_size; - frag->rdma_ftr = (mca_btl_udapl_rdma_footer_t *) - ((char *)(frag->segment.base.seg_addr.pval) + - frag->size - - sizeof(mca_btl_udapl_rdma_footer_t)); - frag->rdma_ftr->active=0; -} - -static void mca_btl_udapl_frag_common_destructor(mca_btl_udapl_frag_t* frag) -{ -#if OPAL_ENABLE_DEBUG - frag->ftr = NULL; - frag->size = 0; - frag->registration = NULL; - frag->segment.base.seg_len = 0; - frag->segment.base.seg_addr.pval = NULL; - - frag->base.des_src = NULL; - frag->base.des_src_cnt = 0; - frag->base.des_dst = NULL; - frag->base.des_dst_cnt = 0; - frag->base.des_flags = 0; -#endif -} - - -OBJ_CLASS_INSTANCE( - mca_btl_udapl_frag_t, - mca_btl_base_descriptor_t, - NULL, - NULL); - -OBJ_CLASS_INSTANCE( - mca_btl_udapl_frag_eager_t, - mca_btl_base_descriptor_t, - mca_btl_udapl_frag_eager_constructor, - mca_btl_udapl_frag_common_destructor); - -OBJ_CLASS_INSTANCE( - mca_btl_udapl_frag_max_t, - mca_btl_base_descriptor_t, - mca_btl_udapl_frag_max_constructor, - mca_btl_udapl_frag_common_destructor); - -OBJ_CLASS_INSTANCE( - mca_btl_udapl_frag_user_t, - mca_btl_base_descriptor_t, - mca_btl_udapl_frag_user_constructor, - NULL); - -OBJ_CLASS_INSTANCE( - mca_btl_udapl_frag_eager_rdma_t, - mca_btl_base_descriptor_t, - mca_btl_udapl_frag_eager_rdma_constructor, - mca_btl_udapl_frag_common_destructor); diff --git a/ompi/mca/btl/udapl/btl_udapl_frag.h b/ompi/mca/btl/udapl/btl_udapl_frag.h deleted file mode 100644 index ff8c3726a1..0000000000 --- a/ompi/mca/btl/udapl/btl_udapl_frag.h +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006 Sandia National Laboratories. All rights - * reserved. - * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BTL_UDAPL_FRAG_H -#define MCA_BTL_UDAPL_FRAG_H - - -#define MCA_BTL_UDAPL_FRAG_ALIGN (8) -#include "ompi_config.h" - -BEGIN_C_DECLS - -typedef enum { - MCA_BTL_UDAPL_SEND, - MCA_BTL_UDAPL_RECV, - MCA_BTL_UDAPL_PUT, - MCA_BTL_UDAPL_GET, - MCA_BTL_UDAPL_CONN_RECV, - MCA_BTL_UDAPL_CONN_SEND, - MCA_BTL_UDAPL_RDMA_WRITE, - MCA_BTL_UDAPL_FRAG_EAGER_RDMA, - MCA_BTL_UDAPL_IGNORE -} mca_btl_udapl_frag_type_t; - -typedef enum { - MCA_BTL_UDAPL_CONTROL_NOOP, - MCA_BTL_UDAPL_CONTROL_RDMA_CONNECT, - MCA_BTL_UDAPL_CONTROL_RDMA_CREDIT, - MCA_BTL_UDAPL_CONTROL_SR_CREDIT -} mca_btl_udapl_control_t; - -/* Control message header */ -struct mca_btl_udapl_control_header_t { - mca_btl_udapl_control_t type; -}; -typedef struct mca_btl_udapl_control_header_t mca_btl_udapl_control_header_t; - -/** - * uDAPL btl footer. - * This is put after the payload packet so the PML header can be aligned. - * Must be aligned on MCA_BTL_UDAPL_FRAG_ALIGN byte boundary. - */ -struct mca_btl_udapl_footer_t { - mca_btl_base_tag_t tag; -}; -typedef struct mca_btl_udapl_footer_t mca_btl_udapl_footer_t; - -/** - * uDAPL BTL rdma footer. - * This is used in addtion to the uDAPL BTL footer. The two are seperate to - * allow for any padding that may be required between the two. - */ -struct mca_btl_udapl_rdma_footer_t { - uint32_t size; - volatile uint8_t active;/* 0 = not in use; 1 = data is available to be - * received; this should always be the last entry - * in this structure - */ - char pad[3]; /* pad out be aligned on MCA_BTL_UDAPL_FRAG_ALIGN byte boundary */ -}; -typedef struct mca_btl_udapl_rdma_footer_t mca_btl_udapl_rdma_footer_t; - -struct mca_btl_udapl_segment_t { - mca_btl_base_segment_t base; - DAT_RMR_CONTEXT context; -}; -typedef struct mca_btl_udapl_segment_t mca_btl_udapl_segment_t; - -/** - * uDAPL fragment derived type. - */ -struct mca_btl_udapl_frag_t { - mca_btl_base_descriptor_t base; - mca_btl_udapl_segment_t segment; - - struct mca_btl_udapl_module_t* btl; - struct mca_btl_base_endpoint_t* endpoint; - DAT_LMR_TRIPLET triplet; - struct mca_btl_udapl_reg_t* registration; - - mca_btl_udapl_footer_t* ftr; - mca_btl_udapl_rdma_footer_t* rdma_ftr; - size_t size; - mca_btl_udapl_frag_type_t type; - uint32_t pad; /* Padding the structure to be evenly divisble by MCA_BTL_UDAPL_FRAG_ALIGN */ -}; -typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_t; -OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_t); - - -typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_eager_t; -OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_eager_t); - -typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_max_t; -OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_max_t); - -typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_user_t; -OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_user_t); - -typedef struct mca_btl_udapl_frag_t mca_btl_udapl_frag_eager_rdma_t; -OBJ_CLASS_DECLARATION(mca_btl_udapl_frag_eager_rdma_t); - - -/* - * Macros to allocate/return descriptors from module specific - * free list(s). - */ - -#define MCA_BTL_UDAPL_FRAG_ALLOC_EAGER(btl, frag) \ -{ \ - ompi_free_list_item_t *item; \ - OMPI_FREE_LIST_GET_MT(&((mca_btl_udapl_module_t*)btl)->udapl_frag_eager, item); \ - frag = (mca_btl_udapl_frag_t*) item; \ -} - -#define MCA_BTL_UDAPL_FRAG_RETURN_EAGER(btl, frag) \ -{ \ - OMPI_FREE_LIST_RETURN_MT(&((mca_btl_udapl_module_t*)btl)->udapl_frag_eager, \ - (ompi_free_list_item_t*)(frag)); \ -} - -#define MCA_BTL_UDAPL_FRAG_ALLOC_EAGER_RECV(btl, frag) \ -{ \ - ompi_free_list_item_t *item; \ - OMPI_FREE_LIST_GET_MT(&((mca_btl_udapl_module_t*)btl)->udapl_frag_eager_recv, item); \ - frag = (mca_btl_udapl_frag_t*) item; \ -} - -#define MCA_BTL_UDAPL_FRAG_ALLOC_MAX(btl, frag) \ -{ \ - ompi_free_list_item_t *item; \ - OMPI_FREE_LIST_GET_MT(&((mca_btl_udapl_module_t*)btl)->udapl_frag_max, item); \ - frag = (mca_btl_udapl_frag_t*) item; \ -} - -#define MCA_BTL_UDAPL_FRAG_RETURN_MAX(btl, frag) \ -{ \ - OMPI_FREE_LIST_RETURN_MT(&((mca_btl_udapl_module_t*)btl)->udapl_frag_max, \ - (ompi_free_list_item_t*)(frag)); \ -} - -#define MCA_BTL_UDAPL_FRAG_ALLOC_MAX_RECV(btl, frag) \ -{ \ - ompi_free_list_item_t *item; \ - OMPI_FREE_LIST_GET_MT(&((mca_btl_udapl_module_t*)btl)->udapl_frag_max_recv, item); \ - frag = (mca_btl_udapl_frag_t*) item; \ -} - -#define MCA_BTL_UDAPL_FRAG_ALLOC_USER(btl, frag) \ -{ \ - ompi_free_list_item_t *item; \ - OMPI_FREE_LIST_GET_MT(&((mca_btl_udapl_module_t*)btl)->udapl_frag_user, item); \ - frag = (mca_btl_udapl_frag_t*) item; \ -} - -#define MCA_BTL_UDAPL_FRAG_RETURN_USER(btl, frag) \ -{ \ - OMPI_FREE_LIST_RETURN_MT(&((mca_btl_udapl_module_t*)btl)->udapl_frag_user, \ - (ompi_free_list_item_t*)(frag)); \ -} - -#define MCA_BTL_UDAPL_FRAG_ALLOC_CONTROL(btl, frag) \ -{ \ - ompi_free_list_item_t *item; \ - OMPI_FREE_LIST_GET_MT(&((mca_btl_udapl_module_t*)btl)->udapl_frag_control, item); \ - frag = (mca_btl_udapl_frag_t*) item; \ -} - -#define MCA_BTL_UDAPL_FRAG_RETURN_CONTROL(btl, frag) \ -{ \ - OMPI_FREE_LIST_RETURN_MT(&((mca_btl_udapl_module_t*)btl)->udapl_frag_control, \ - (ompi_free_list_item_t*)(frag)); \ -} - -/* - * Calculate the pad value P required to align the given size S - */ -#define MCA_BTL_UDAPL_FRAG_CALC_ALIGNMENT_PAD(P,S) do { \ - (P) = ((S) % MCA_BTL_UDAPL_FRAG_ALIGN) == 0 ? \ - 0 : (MCA_BTL_UDAPL_FRAG_ALIGN - ((S) % MCA_BTL_UDAPL_FRAG_ALIGN)); \ -} while (0); - -END_C_DECLS -#endif diff --git a/ompi/mca/btl/udapl/btl_udapl_mca.c b/ompi/mca/btl/udapl/btl_udapl_mca.c deleted file mode 100644 index 9bc694cdc0..0000000000 --- a/ompi/mca/btl/udapl/btl_udapl_mca.c +++ /dev/null @@ -1,314 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "opal/mca/base/base.h" -#include "btl_udapl.h" -#include "btl_udapl_mca.h" -#include - -/* - * Utility routine for string parameter registration. - * - * @param param_name (IN) MCA parameter name - * @param param_desc (IN) MCA parameter description - * @param default_value (IN) MCA parameter default value - * @param out_value (OUT) value of MCA parameter; either default, - * or value as determined from typical - * MCA parameter setting methods - * @param flags (IN) MCA parameter boundary flag - * @return OMPI_SUCCESS or OMPI_ERR_BAD_PARAM - */ -static inline int mca_btl_udapl_reg_string(const char* param_name, - const char* param_desc, - const char* default_value, - char **storage, int flags) -{ - *storage = default_value; - (void) mca_base_component_var_register(&mca_btl_udapl_component.super.btl_version, param_name, - param_desc, MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); - - if (NULL == *storage && !((flags & REGSTR_EMPTY_OK) == REGSTR_EMPTY_OK)) { - BTL_ERROR(("ERROR: MCA Parameter %s : Value (NULL) out of range : " - "Default value (%s)\n \t Parameter Description : %s", - param_name, default_value, param_desc)); - return OMPI_ERR_BAD_PARAM; - } - - if ((flags & REGSTR_EMPTY_NOT_OK) && 0 == strlen(*storage)) { - BTL_ERROR(("ERROR: MCA Parameter %s : Value (%s) out of range : " - "Default value (%s)\n \t Parameter Description : %s", - param_name, *storage, default_value, param_desc)); - return OMPI_ERR_BAD_PARAM; - } - - return OMPI_SUCCESS; -} - - -/* - * Utility routine for integer parameter registration. - * - * @param param_name (IN) MCA parameter name - * @param param_desc (IN) MCA parameter description - * @param default_value (IN) MCA parameter default value - * @param out_value (OUT) value of MCA parameter; either default, - * or value as determined from typical - * MCA parameter setting methods - * @param flags (IN) MCA parameter boundary flag - * @return OMPI_SUCCESS or OMPI_ERR_BAD_PARAM - */ -static inline int mca_btl_udapl_reg_int(const char* param_name, - const char* param_desc, - int default_value, int *storage, - int flags) -{ - *storage = default_value; - (void) mca_base_component_var_register(&mca_btl_udapl_component.super.btl_version, param_name, - param_desc, MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); - - if ((flags & REGINT_NEG_ONE_OK) && -1 == *storage) { - return OMPI_SUCCESS; - } - - if (((flags & REGINT_GE_ZERO) && *storage < 0) || - ((flags & REGINT_GE_ONE) && *storage < 1) || - ((flags & REGINT_NONZERO) && 0 == *storage)) { - BTL_ERROR(("ERROR: MCA Parameter %s : Value (%d) out of range : " - "Default value (%d)\n \t Parameter Description : %s\n", - param_name, *storage, default_value, param_desc)); - return OMPI_ERR_BAD_PARAM; - } - - return OMPI_SUCCESS; -} - - -/* - * Register and check all MCA parameters - * - * @return OMPI_SUCCESS or OMPI_ERR_BAD_PARAM - */ -int mca_btl_udapl_register_mca_params(void) -{ - int rc, tmp_rc; - - rc = OMPI_SUCCESS; - - /* register uDAPL component parameters */ - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("free_list_num", - "Initial size of free lists (must be >= 1).", - 8, - &mca_btl_udapl_component.udapl_free_list_num, - REGINT_GE_ONE), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("free_list_max", - "Maximum size of free lists " - "(-1 = infinite, otherwise must be >= 1).", - -1, - &mca_btl_udapl_component.udapl_free_list_max, - REGINT_NEG_ONE_OK | REGINT_GE_ONE), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("free_list_inc", - "Increment size of free lists (must be >= 1).", - 8, - &mca_btl_udapl_component.udapl_free_list_inc, - REGINT_GE_ONE), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_string("mpool", - "Name of the memory pool to be used.", - "grdma", - &mca_btl_udapl_component.udapl_mpool_name, - REGSTR_EMPTY_NOT_OK), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("max_modules", - "Maximum number of supported HCAs.", - 8, - &mca_btl_udapl_component.udapl_max_btls, - REGINT_GE_ONE), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("num_recvs", - "Total number of receive buffers to keep posted " - "per endpoint (must be >= 1).", - 8, - &mca_btl_udapl_component.udapl_num_recvs, - REGINT_GE_ONE), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("num_sends", - "Maximum number of sends to post on an endpoint " - "(must be >= 1).", - 7, - &mca_btl_udapl_component.udapl_num_sends, - REGINT_GE_ONE), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("sr_win", - "Window size at which point an explicit " - "credit message will be generated (must be >= 1).", - 4, - &mca_btl_udapl_component.udapl_sr_win, - REGINT_GE_ONE), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("use_eager_rdma", - "Use of RDMA for small messages : " - "1 = default, use RDMA for small messages; " - "0 = do not use RDMA for small messages. ", - 1, - &mca_btl_udapl_component.udapl_use_eager_rdma, - REGINT_GE_ZERO), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("eager_rdma_num", - "Number of RDMA buffers to allocate " - "for small messages (must be >= 1).", - 32, - &mca_btl_udapl_component.udapl_eager_rdma_num, - REGINT_GE_ONE), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("max_eager_rdma_peers", - "Maximum number of peers allowed to use " - "RDMA for short messages (independently RDMA will " - "still be used for large messages, (must be >= 0; " - "if zero then RDMA will not be used for short messages).", - 16, - &mca_btl_udapl_component.udapl_max_eager_rdma_peers, - REGINT_GE_ZERO), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("eager_rdma_win", - "Window size at which point an explicit " - "credit message will be generated (must be >= 1).", - 28, - &mca_btl_udapl_component.udapl_eager_rdma_win, - REGINT_GE_ONE), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("timeout", - "Connection timeout, in microseconds.", - MCA_BTL_UDAPL_CONN_TIMEOUT_DEFAULT, - &mca_btl_udapl_component.udapl_timeout, - REGINT_GE_ONE), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("conn_priv_data", - "Use connect private data to establish connections " - "(not supported by all uDAPL implementations).", - 0, - &mca_btl_udapl_component.udapl_conn_priv_data, - REGINT_GE_ZERO), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("async_events", - "The asynchronous event queue will only be " - "checked after entering progress this number of times.", - 100000000, - &mca_btl_udapl_component.udapl_async_events, - REGINT_GE_ONE), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("buffer_alignment", - "Preferred communication buffer alignment, " - "in bytes (must be >= 1).", - DAT_OPTIMAL_ALIGNMENT, - &mca_btl_udapl_component.udapl_buffer_alignment, - REGINT_GE_ONE), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_string("if_include", - "Comma-delimited list of interfaces to be included " - "(e.g. \"ibd0,ibd1 or OpenIB-cma,OpenIB-cma-1\"; empty value means " - "to use all interfaces found). Mutually exclusive with " - "btl_udapl_if_exclude.", - NULL, &mca_btl_udapl_component.if_include, - REGSTR_EMPTY_OK), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_string("if_exclude", - "Comma-delimited list of interfaces to be excluded from use " - "(e.g. \"ibd0,ibd1 or OpenIB-cma,OpenIB-cma-1\"; empty value means " - "not to exclude any). Mutually exclusive with btl_udapl_if_include.", - NULL, &mca_btl_udapl_component.if_exclude, - REGSTR_EMPTY_OK), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("verbose", - "Verbosity level of the uDAPL BTL (-1 thru 100)", - VERBOSE_SHOW_HELP, - &(mca_btl_udapl_component.udapl_verbosity), - REGINT_NEG_ONE_OK), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("compare_subnet", - "By default uDAPL BTL will compare subnets using netmask to " - "determine if an interface is reachable. Setting this parameter to " - "0 will essentially turn this comparison off and the uDAPL BTL will " - "assume all uDAPL interfaces are reachable (0 or 1, default==1).", - 1, - &(mca_btl_udapl_component.udapl_compare_subnet), - REGINT_GE_ZERO), tmp_rc, rc); - - /* register uDAPL module parameters */ - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("async_evd_qlen", - "The asynchronous event dispatcher queue length.", - MCA_BTL_UDAPL_ASYNC_EVD_QLEN_DEFAULT, - (int*)&mca_btl_udapl_module.udapl_async_evd_qlen, - REGINT_GE_ONE), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("conn_evd_qlen", - "The connection event dispatcher queue length is " - "a function of the number of connections expected.", - MCA_BTL_UDAPL_CONN_EVD_QLEN_DEFAULT, - (int*)&mca_btl_udapl_module.udapl_conn_evd_qlen, - REGINT_GE_ONE), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("dto_evd_qlen", - "The data transfer operation event dispatcher queue length is " - "a function of the number of connections as well as the " - "maximum number of outstanding data transfer operations.", - MCA_BTL_UDAPL_DTO_EVD_QLEN_DEFAULT, - (int*)&mca_btl_udapl_module.udapl_dto_evd_qlen, - REGINT_GE_ONE), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("max_request_dtos", - "Maximum number of outstanding " - "submitted sends and rdma operations per endpoint, (see Section " - "6.6.6 of uDAPL Spec.).", - MCA_BTL_UDAPL_MAX_REQUEST_DTOS_DEFAULT, - (int*)&mca_btl_udapl_module.udapl_max_request_dtos, - REGINT_GE_ONE), tmp_rc, rc); - - CHECK_PARAM_REGISTER_RETURN_VALUE(mca_btl_udapl_reg_int("max_recv_dtos", - "Maximum number of outstanding " - "submitted receive operations per endpoint, (see Section " - "6.6.6 of uDAPL Spec.).", - MCA_BTL_UDAPL_MAX_RECV_DTOS_DEFAULT, - (int*)&mca_btl_udapl_module.udapl_max_recv_dtos, - REGINT_GE_ONE), tmp_rc, rc); - - mca_btl_udapl_module.super.btl_exclusivity = - MCA_BTL_EXCLUSIVITY_DEFAULT - 10; - mca_btl_udapl_module.super.btl_eager_limit = 8*1024; - mca_btl_udapl_module.super.btl_rndv_eager_limit = 8*1024; - mca_btl_udapl_module.super.btl_max_send_size = 64*1024; - mca_btl_udapl_module.super.btl_rdma_pipeline_send_length = 512*1024; - mca_btl_udapl_module.super.btl_rdma_pipeline_frag_size = 128 * 1024; - mca_btl_udapl_module.super.btl_min_rdma_pipeline_size = 0; - mca_btl_udapl_module.super.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND; - mca_btl_udapl_module.super.btl_bandwidth = 225; - mca_btl_udapl_module.super.btl_latency = 0; - - mca_btl_base_param_register(&mca_btl_udapl_component.super.btl_version, - &mca_btl_udapl_module.super); - - return rc; -} diff --git a/ompi/mca/btl/udapl/btl_udapl_mca.h b/ompi/mca/btl/udapl/btl_udapl_mca.h deleted file mode 100644 index d978a563fa..0000000000 --- a/ompi/mca/btl/udapl/btl_udapl_mca.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BTL_UDAPL_MCA_H -#define MCA_BTL_UDAPL_MCA_H - -BEGIN_C_DECLS - -/* Define Integer Boundaries */ -#define REGINT_NEG_ONE_OK 0x01 /* value = -1 is valid */ -#define REGINT_GE_ZERO 0x02 /* value >= 0 is valid */ -#define REGINT_GE_ONE 0x04 /* value > 1 is valid */ -#define REGINT_NONZERO 0x08 /* value != 0 is valid */ - -/* Define String Boundaries */ -#define REGSTR_EMPTY_OK 0x01 /* empty string is valid */ -#define REGSTR_EMPTY_NOT_OK 0x02 /* empty string is not valid */ - -/* Define default parameter values that need to be known beyond the - * initial setting; for example, if a parameter is tuned dynamically - * by the BTL it would not be advisable to do so if the user has - * modified the default. - */ -#define MCA_BTL_UDAPL_ASYNC_EVD_QLEN_DEFAULT 256 -#define MCA_BTL_UDAPL_CONN_EVD_QLEN_DEFAULT 256 -#define MCA_BTL_UDAPL_DTO_EVD_QLEN_DEFAULT 256 -#define MCA_BTL_UDAPL_CONN_TIMEOUT_DEFAULT 10000000 -#define MCA_BTL_UDAPL_CONN_TIMEOUT_INC 200000 /* connection timeout - * is in microseconds; - * this incremental - * value is equivalent - * to .2 seconds - */ -#define MCA_BTL_UDAPL_CONN_TIMEOUT_MAX 2147483647 -#define MCA_BTL_UDAPL_MAX_RECV_DTOS_DEFAULT 8 -#define MCA_BTL_UDAPL_MAX_REQUEST_DTOS_DEFAULT 76 -#define MCA_BTL_UDAPL_NUM_RECVS_DEFAULT 8 - - -#define CHECK_PARAM_REGISTER_RETURN_VALUE(expr, tmp_rc, rc) \ -{ \ - tmp_rc = (expr); \ - if (OMPI_SUCCESS != tmp_rc) { \ - rc = tmp_rc; \ - } \ -} - -/** - * Function to register MCA params and check for sane values - */ - -int mca_btl_udapl_register_mca_params(void); - - -END_C_DECLS -#endif diff --git a/ompi/mca/btl/udapl/btl_udapl_proc.c b/ompi/mca/btl/udapl/btl_udapl_proc.c deleted file mode 100644 index 95cb8101d9..0000000000 --- a/ompi/mca/btl/udapl/btl_udapl_proc.c +++ /dev/null @@ -1,312 +0,0 @@ -/* - * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006 Sandia National Laboratories. All rights - * reserved. - * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/runtime/ompi_module_exchange.h" -#include "opal/util/net.h" -#include "btl_udapl.h" -#include "btl_udapl_endpoint.h" -#include "btl_udapl_proc.h" - -static void mca_btl_udapl_proc_construct(mca_btl_udapl_proc_t* proc); -static void mca_btl_udapl_proc_destruct(mca_btl_udapl_proc_t* proc); - -OBJ_CLASS_INSTANCE(mca_btl_udapl_proc_t, - opal_list_item_t, mca_btl_udapl_proc_construct, - mca_btl_udapl_proc_destruct); - -void mca_btl_udapl_proc_construct(mca_btl_udapl_proc_t* udapl_proc) -{ - udapl_proc->proc_ompi = 0; - udapl_proc->proc_addr_count = 0; - udapl_proc->proc_endpoints = 0; - udapl_proc->proc_endpoint_count = 0; - OBJ_CONSTRUCT(&udapl_proc->proc_lock, opal_mutex_t); - - /* add to list of all proc instance */ - OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock); - opal_list_append(&mca_btl_udapl_component.udapl_procs, &udapl_proc->super); - OPAL_THREAD_UNLOCK(&mca_btl_udapl_component.udapl_lock); -} - - -/* - * Cleanup uDAPL proc instance - */ - -void mca_btl_udapl_proc_destruct(mca_btl_udapl_proc_t* udapl_proc) -{ - /* remove from list of all proc instances */ - OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock); - opal_list_remove_item(&mca_btl_udapl_component.udapl_procs, &udapl_proc->super); - OPAL_THREAD_UNLOCK(&mca_btl_udapl_component.udapl_lock); - - /* release resources */ - if(NULL != udapl_proc->proc_endpoints) { - free(udapl_proc->proc_endpoints); - } - OBJ_DESTRUCT(&udapl_proc->proc_lock); -} - - -/* - * Look for an existing uDAPL process instances based on the associated - * ompi_proc_t instance. - */ -static mca_btl_udapl_proc_t* mca_btl_udapl_proc_lookup_ompi(ompi_proc_t* ompi_proc) -{ - mca_btl_udapl_proc_t* udapl_proc; - - OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock); - - for(udapl_proc = (mca_btl_udapl_proc_t*) - opal_list_get_first(&mca_btl_udapl_component.udapl_procs); - udapl_proc != (mca_btl_udapl_proc_t*) - opal_list_get_end(&mca_btl_udapl_component.udapl_procs); - udapl_proc = (mca_btl_udapl_proc_t*)opal_list_get_next(udapl_proc)) { - - if(udapl_proc->proc_ompi == ompi_proc) { - OPAL_THREAD_UNLOCK(&mca_btl_udapl_component.udapl_lock); - return udapl_proc; - } - - } - - OPAL_THREAD_UNLOCK(&mca_btl_udapl_component.udapl_lock); - - return NULL; -} - -/* - * Create a uDAPL process structure. There is a one-to-one correspondence - * between a ompi_proc_t and a mca_btl_udapl_proc_t instance. We cache - * additional data (specifically the list of mca_btl_udapl_endpoint_t instances, - * and published addresses) associated w/ a given destination on this - * datastructure. - */ - -mca_btl_udapl_proc_t* mca_btl_udapl_proc_create(ompi_proc_t* ompi_proc) -{ - mca_btl_udapl_proc_t* udapl_proc = NULL; - size_t size; - int rc; - - /* Check if we have already created a uDAPL proc - * structure for this ompi process */ - udapl_proc = mca_btl_udapl_proc_lookup_ompi(ompi_proc); - if(udapl_proc != NULL) { - return udapl_proc; - } - - /* create a new udapl proc out of the ompi_proc ... */ - udapl_proc = OBJ_NEW(mca_btl_udapl_proc_t); - udapl_proc->proc_endpoint_count = 0; - udapl_proc->proc_ompi = ompi_proc; - - /* query for the peer address info */ - rc = ompi_modex_recv( - &mca_btl_udapl_component.super.btl_version, - ompi_proc, - (void*)&udapl_proc->proc_addrs, - &size); - if(OMPI_SUCCESS != rc) { - BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL, - ("ompi_modex_recv failed for peer %s", - OMPI_NAME_PRINT(&ompi_proc->proc_name))); - OBJ_RELEASE(udapl_proc); - return NULL; - } - - if((size % sizeof(mca_btl_udapl_addr_t)) != 0) { - BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_CRITICAL, - ("invalid udapl address for peer %s", - OMPI_NAME_PRINT(&ompi_proc->proc_name))); - OBJ_RELEASE(udapl_proc); - return NULL; - } - - udapl_proc->proc_addr_count = size/sizeof(mca_btl_udapl_addr_t); - if (0 == udapl_proc->proc_addr_count) { - udapl_proc->proc_endpoints = NULL; - } else { - udapl_proc->proc_endpoints = (mca_btl_base_endpoint_t**) - malloc(udapl_proc->proc_addr_count * sizeof(mca_btl_base_endpoint_t*)); - } - if(NULL == udapl_proc->proc_endpoints) { - OBJ_RELEASE(udapl_proc); - return NULL; - } - return udapl_proc; -} - - -/* - * Find an address on the peer_process which matches stated criteria - * to the udapl btl module address information. Return in peer_addr_idx - * the index to the peer_process address that matches the btl module - * address. Where match criteria is: - * - the address in not already in use - * - compare addresses using netmask, the netmask value can be modified with - * "--mca btl_udapl_if_mask" - * - * Note: since this is called from mca_btl_udapl_proc_insert() it - * is assumed that the process lock is locked when entered. - * - * @param udapl_btl (IN) BTL module - * @param peer_process (IN) BTL peer process - * @param peer_addr_idx(IN/OUT) Index of address on peer_process - * which matches the udapl_btl address data. - * On success should be >= 0. - * @return OMPI_SUCCESS or error status on failure - */ -static int mca_btl_udapl_proc_address_match( - mca_btl_udapl_module_t* udapl_btl, - mca_btl_udapl_proc_t* peer_proc, - int* peer_addr_idx) -{ - int i; - struct sockaddr *saddr; - struct sockaddr_in *btl_addr; - struct sockaddr_in *peer_addr; - char btl_addr_string[INET_ADDRSTRLEN]; - char peer_addr_string[INET_ADDRSTRLEN]; - - *peer_addr_idx = MCA_BTL_UDAPL_INVALID_PEER_ADDR_IDX; - - /* use generic address to find address family */ - saddr = (struct sockaddr *)&(udapl_btl->udapl_addr.addr); - - if (saddr->sa_family == AF_INET) { - - btl_addr = (struct sockaddr_in *)saddr; - - /* Loop thru peer process addresses looking for match. - * Match criteria: - * - address should not be "inuse" - * - both udapl btl module and peer address should be on - * the same subnet (compare with if_mask value) - */ - for(i = 0; i < (int) peer_proc->proc_addr_count; i++) { - - peer_addr = - (struct sockaddr_in *)&(peer_proc->proc_addrs[i].addr); - - if (VERBOSE_INFORM <= - mca_btl_udapl_component.udapl_verbosity) { - - /* retrieve udapl btl and peer address string for reporting */ - inet_ntop(AF_INET, (void *) &btl_addr->sin_addr, - btl_addr_string, INET_ADDRSTRLEN); - inet_ntop(AF_INET, (void *) &peer_addr->sin_addr, - peer_addr_string, INET_ADDRSTRLEN); - } - - if ((false == peer_proc->proc_addrs[i].inuse) && - (opal_net_samenetwork((struct sockaddr *)btl_addr, - (struct sockaddr *)peer_addr, udapl_btl->udapl_if_mask))) { - - /* capture index of remote address where match found */ - *peer_addr_idx = i; - - /* mark this address as now being used */ - peer_proc->proc_addrs[i].inuse = true; - - /* report what address was found to match */ - BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_INFORM, - ("uDAPL BTL module(%s) matched %s", - btl_addr_string, peer_addr_string)); - break; - } else { - /* peer address already used by another udapl btl - * module or netmask check not successful so skip - */ - BTL_UDAPL_VERBOSE_OUTPUT(VERBOSE_INFORM, - ("uDAPL BTL module(%s) either skipped because it " - "is already in use or match criteria not successful " - "for peer address %s", - btl_addr_string, peer_addr_string)); - } - } - - } else { - /* current uDAPL BTL only supports IPv4 */ - BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, - ("help-mpi-btl-udapl.txt", "IPv4 only", - true, ompi_process_info.nodename)); - return OMPI_ERROR; - } - - if (MCA_BTL_UDAPL_INVALID_PEER_ADDR_IDX == *peer_addr_idx) { - BTL_UDAPL_VERBOSE_HELP(VERBOSE_SHOW_HELP, - ("help-mpi-btl-udapl.txt", "no network match", - true, btl_addr_string, ompi_process_info.nodename, - (NULL == peer_proc->proc_ompi->proc_hostname) ? - "unknown" : peer_proc->proc_ompi->proc_hostname)); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - return OMPI_SUCCESS; -} - - -/* - * Note that this routine must be called with the lock on the process - * already held. Insert a btl instance into the proc array and assign - * it an address. - */ -int mca_btl_udapl_proc_insert( - mca_btl_udapl_proc_t* udapl_proc, - mca_btl_base_endpoint_t* udapl_endpoint) -{ - int peer_address_idx; - mca_btl_udapl_module_t* udapl_btl = udapl_endpoint->endpoint_btl; - - /* Check so as not to create more endpoints than addresses. - * Example: If one node has 3 btl modules and another only has 2, - * this check prevents the node with 3 btl modules from - * overloading the other, i.e. only 2 possible connections will - * be possible. - */ - if (udapl_proc->proc_endpoint_count > udapl_proc->proc_addr_count) - return OMPI_ERR_OUT_OF_RESOURCE; - - /* Find an endpoint on the udapl process of interest that matches - * the endpoint information of the current udapl btl module - */ - if (OMPI_SUCCESS != - mca_btl_udapl_proc_address_match(udapl_btl, udapl_proc, - &peer_address_idx)) { - /* no address on peer proc met criteria */ - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* insert into endpoint array */ - udapl_endpoint->endpoint_proc = udapl_proc; - udapl_endpoint->endpoint_addr = - udapl_proc->proc_addrs[peer_address_idx]; - - udapl_proc->proc_endpoints[udapl_proc->proc_endpoint_count] = udapl_endpoint; - udapl_proc->proc_endpoint_count++; - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/btl/udapl/btl_udapl_proc.h b/ompi/mca/btl/udapl/btl_udapl_proc.h deleted file mode 100644 index da301e4168..0000000000 --- a/ompi/mca/btl/udapl/btl_udapl_proc.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_BTL_UDAPL_PROC_H -#define MCA_BTL_UDAPL_PROC_H - -#include "opal/class/opal_object.h" -#include "ompi/proc/proc.h" -#include "btl_udapl.h" -#include "btl_udapl_endpoint.h" - -BEGIN_C_DECLS - -/** - * Represents the state of a remote process and the set of addresses - * that it exports. Also cache an instance of mca_btl_base_endpoint_t for - * each - * BTL instance that attempts to open a connection to the process. - */ -struct mca_btl_udapl_proc_t { - opal_list_item_t super; - /**< allow proc to be placed on a list */ - - ompi_proc_t *proc_ompi; - /**< pointer to corresponding ompi_proc_t */ - - struct mca_btl_udapl_addr_t* proc_addrs; - /**< array of addresses exported by peer */ - - size_t proc_addr_count; - /**< number of addresses published by peer */ - - struct mca_btl_base_endpoint_t **proc_endpoints; - /**< array of endpoints that have been created to access this proc */ - - size_t proc_endpoint_count; - /**< number of endpoints */ - - opal_mutex_t proc_lock; - /**< lock to protect against concurrent access to proc state */ -}; -typedef struct mca_btl_udapl_proc_t mca_btl_udapl_proc_t; -OBJ_CLASS_DECLARATION(mca_btl_udapl_proc_t); - -#define MCA_BTL_UDAPL_INVALID_PEER_ADDR_IDX -1 - -mca_btl_udapl_proc_t* mca_btl_udapl_proc_create(ompi_proc_t* ompi_proc); -int mca_btl_udapl_proc_insert(mca_btl_udapl_proc_t*, mca_btl_base_endpoint_t*); - -END_C_DECLS -#endif diff --git a/ompi/mca/btl/udapl/configure.m4 b/ompi/mca/btl/udapl/configure.m4 deleted file mode 100644 index c712ed59f6..0000000000 --- a/ompi/mca/btl/udapl/configure.m4 +++ /dev/null @@ -1,62 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - - -# MCA_btl_udapl_CONFIG([action-if-can-compile], -# [action-if-cant-compile]) -# ------------------------------------------------ -AC_DEFUN([MCA_ompi_btl_udapl_CONFIG],[ - AC_CONFIG_FILES([ompi/mca/btl/udapl/Makefile]) - - OMPI_CHECK_UDAPL([btl_udapl], - [btl_udapl_happy="yes"], - [btl_udapl_happy="no"]) - - AS_IF([test "$btl_udapl_happy" = "yes"], - [$1], - [$2]) - - # Borrowed from MVAPI BTL - a data structure in the uDAPL headers - # is not fully ISO C. Remove -pedantic to silence a warning. - btl_udapl_CFLAGS="`echo $CFLAGS | sed 's/-pedantic//g'`" - AS_IF([test "$btl_udapl_CFLAGS" != "$CFLAGS" -a "$btl_udapl_happy" = "yes"], - [AC_MSG_WARN([Removed -pedantic from CFLAGS for -uDAPL component because the uDAPL headers are not fully ISO C])]) - - # Test for uDAPL relaxed ordered specific symbols - AS_IF([test "$btl_udapl_happy" = "yes"], - [AC_MSG_CHECKING(for uDAPL DAT_MEM_TYPE_SO_VIRTUAL) - AC_TRY_COMPILE([#include ], - [DAT_MEM_TYPE dmt = DAT_MEM_TYPE_SO_VIRTUAL;], - [AC_MSG_RESULT(yes) - btl_udapl_ro_aware=1], - [AC_MSG_RESULT(no) - btl_udapl_ro_aware=0]) - AC_DEFINE_UNQUOTED([HAVE_DAT_MEM_TYPE_SO_VIRTUAL], - [$btl_udapl_ro_aware], - [uDAPL DAT_MEM_TYPE_SO_VIRTUAL check])]) - - # substitute in the things needed to build udapl - AC_SUBST([btl_udapl_CFLAGS]) - AC_SUBST([btl_udapl_CPPFLAGS]) - AC_SUBST([btl_udapl_LDFLAGS]) - AC_SUBST([btl_udapl_LIBS]) -])dnl diff --git a/ompi/mca/btl/udapl/help-mpi-btl-udapl.txt b/ompi/mca/btl/udapl/help-mpi-btl-udapl.txt deleted file mode 100644 index 13b5876aa3..0000000000 --- a/ompi/mca/btl/udapl/help-mpi-btl-udapl.txt +++ /dev/null @@ -1,139 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2006 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved. -# -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English general help file for Open MPI. -# -[invalid num rdma segments] -WARNING: MCA parameter [btl_udapl_eager_rdma_num = %d] is not valid. -RDMA will not be used for short messages. Try setting to positive -value, e.g. 16. -# -[use default endpoint params] -WARNING: Using default uDAPL endpoint parameters not those that -would have been modified by MCA parameters. -# -[optimal buffer alignment mismatch] -WARNING: DAT_OPTIMAL_ALIGNMENT = %d : BTL buffer_alignment = %d. -The BTL buffer_alignment value may not be optimal. If all nodes -report the same DAT_OPTIMAL_ALIGNMENT value and this differs from -BTL buffer_alignment then setting "--mca btl_udapl_buffer_alignment -%d" may improve performance. -# -[max_recv_dtos too low] -WARNING: The MCA parameter max_recv_dtos has been modified to a value, -%d, that is insufficient. This value must be greater than or equal to -num_recvs, %d. The uDAPL BTL will adjust to allow the program to -proceed. -# -[max_request_dtos too low] -WARNING: The MCA parameter max_request_dtos has been modified to a -value, %d, which may not be sufficient. Try setting max_request_dtos -to %d if program fails. -# -[max_recv_dtos system max] -WARNING: The MCA parameter max_recv_dtos is trying to be set to, -%d, which is larger than allowable so the value will be set to maximum -allowed, %d. -# -[max_request_dtos system max] -WARNING: The MCA parameter max_request_dtos is trying to be set to, -%d, which is larger than allowable so the value will be set to maximum -allowed, %d. -# -[evd_qlen adapter max] -WARNING: The MCA parameter %s is trying to be set to %d, -which is larger than allowable so the value will be set to maximum -allowed, %d. -# -[evd_qlen too low] -WARNING: The MCA parameter %s has been modified to a value, -%d, which may not be sufficient. Try setting %s to %d if -program fails. -# -[connection timeout low] -WARNING: The MCA parameter %s has been modified to a value, -%d, which may not be sufficient. Try setting %s to %d if -program fails. -# -[dat_lmr_create DAT_INSUFFICIENT_RESOURCES] -WARNING: The uDAPL BTL is not able to register memory. Possibly out of -allowed privileged memory (i.e. memory that can be pinned). Increasing -the allowed privileged memory may alleviate this issue. -# -[dat_ia_open fail] -WARNING: Failed to open "%s" [%s:%s]. -This may be a real error or it may be an invalid entry in the uDAPL -Registry which is contained in the dat.conf file. Contact your local -System Administrator to confirm the availability of the interfaces in -the dat.conf file. -# -[specified include and exclude] -ERROR: You have specified both the btl_udapl_if_include and -btl_udapl_if_exclude MCA parameters. These two parameters are -mutually exclusive; you can only specify one or the other. - -For reference, the values that you specified are: - - btl_udapl_if_include: %s - btl_udapl_if_exclude: %s -# -[nonexistent entry] -WARNING: One or more nonexistent interfaces were specified: - - Host: %s - MCA parameter: btl_udapl_if_%sclude - Nonexistent entities: %s - -These entities will be ignored. -# -[IPv4 only] -WARNING: uDAPL BTL only supports IPv4 addressing at this time. -Something other than an IPv4 address was detected on %s. -# -[no network match] -WARNING: Interface %s on node %s not able to find matching -interface on peer node %s. Could be that the interfaces are on -different subnets or there are fewer available uDAPL interfaces on peer. -# -[interface not found] -WARNING: Host %s, not able to determine interface name for -address %s. Will attempt to continue, assuming all addresses to -peer are reachable. -# -[netmask not found] -WARNING: Host %s, not able to determine netmask for address -%s. Will attempt to continue assuming all addresses to -peer are reachable. -# -[relaxed order support] -WARNING: While attempting to open interface %s the system reported -DAT_INVALID_RO_COOKIE. This indicates the currrent system supports -relaxed ordering. An attempt will be made to open the interface using -the following modified interface name %s. Open MPI must not use RDMA -for short eager messages in this scenario. Therefore, if opened -successfully RDMA will not be used for short eager messages. This will -negatively impact short message latency. -# -[dat_ia_open fail RO] -WARNING: Failed to open "%s" [%s:%s]. -Attempted to call dat_ia_open() on an interface that has been prefixed -with "RO_AWARE_" after first trying to open %s and failed with -DAT_INVALID_RO_COOKIE. -#