diff --git a/ompi/mca/pml/teg/Makefile.am b/ompi/mca/pml/teg/Makefile.am deleted file mode 100644 index f4d1cddcb8..0000000000 --- a/ompi/mca/pml/teg/Makefile.am +++ /dev/null @@ -1,67 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - - - -teg_sources = \ - pml_teg.c \ - pml_teg.h \ - pml_teg_cancel.c \ - pml_teg_component.c \ - pml_teg_component.h \ - pml_teg_iprobe.c \ - pml_teg_irecv.c \ - pml_teg_isend.c \ - pml_teg_ptl.c \ - pml_teg_ptl.h \ - pml_teg_proc.c \ - pml_teg_proc.h \ - pml_teg_progress.c \ - pml_teg_recvfrag.c \ - pml_teg_recvfrag.h \ - pml_teg_recvreq.c \ - pml_teg_recvreq.h \ - pml_teg_sendreq.c \ - pml_teg_sendreq.h \ - pml_teg_start.c \ - pml_ptl_array.c \ - pml_ptl_array.h - -if OMPI_BUILD_pml_teg_DSO -component_noinst = -component_install = mca_pml_teg.la -else -component_noinst = libmca_pml_teg.la -component_install = -endif - - -mcacomponentdir = $(libdir)/openmpi -mcacomponent_LTLIBRARIES = $(component_install) -mca_pml_teg_la_SOURCES = $(teg_sources) -mca_pml_teg_la_LIBADD = \ - $(top_ompi_builddir)/ompi/libmpi.la \ - $(top_ompi_builddir)/orte/liborte.la \ - $(top_ompi_builddir)/opal/libopal.la -mca_pml_teg_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_pml_teg_la_SOURCES = $(teg_sources) -libmca_pml_teg_la_LIBADD = -libmca_pml_teg_la_LDFLAGS = -module -avoid-version - diff --git a/ompi/mca/pml/teg/configure.params b/ompi/mca/pml/teg/configure.params deleted file mode 100644 index 5fe511dfe8..0000000000 --- a/ompi/mca/pml/teg/configure.params +++ /dev/null @@ -1,24 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Specific to this module - -PARAM_INIT_FILE=pml_teg.c -PARAM_CONFIG_HEADER_FILE="teg_config.h" -PARAM_CONFIG_FILES="Makefile" diff --git a/ompi/mca/pml/teg/pml_ptl_array.c b/ompi/mca/pml/teg/pml_ptl_array.c deleted file mode 100644 index 6d24c18999..0000000000 --- a/ompi/mca/pml/teg/pml_ptl_array.c +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include - -#include "ompi/mca/pml/pml.h" -#include "pml_ptl_array.h" -#include "ompi/constants.h" - -static void mca_ptl_array_construct(mca_ptl_array_t* array) -{ - array->ptl_procs = 0; - array->ptl_size = 0; - array->ptl_index = 0; - array->ptl_reserve = 0; -} - - -static void mca_ptl_array_destruct(mca_ptl_array_t* array) -{ - if(array->ptl_procs != 0) - free(array->ptl_procs); -} - -OBJ_CLASS_INSTANCE( - mca_pml_teg_ptl_array_t, - opal_object_t, - mca_ptl_array_construct, - mca_ptl_array_destruct -); - -int mca_ptl_array_reserve(mca_ptl_array_t* array, size_t size) -{ - mca_ptl_proc_t *procs; - if(array->ptl_reserve >= size) - return OMPI_SUCCESS; - - procs = (mca_ptl_proc_t *)realloc(array->ptl_procs, sizeof(mca_ptl_proc_t)*size); - if(NULL == procs) - return OMPI_ERR_OUT_OF_RESOURCE; - array->ptl_procs = procs; - array->ptl_reserve = size; - memset(array->ptl_procs+array->ptl_size, 0, (size-array->ptl_size)*sizeof(mca_ptl_proc_t)); - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/pml/teg/pml_ptl_array.h b/ompi/mca/pml/teg/pml_ptl_array.h deleted file mode 100644 index 8395a58bf1..0000000000 --- a/ompi/mca/pml/teg/pml_ptl_array.h +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef OMPI_PTL_ARRAY_H -#define OMPI_PTL_ARRAY_H - -#include "opal/util/output.h" -#include "ompi/mca/ptl/ptl.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -extern opal_class_t mca_pml_teg_ptl_array_t_class; - -/** - * A data structure associated with a ompi_proc_t that caches - * addressing/scheduling attributes for a specific PTL instance - * that can be used to reach the process. - */ -struct mca_ptl_proc_t { - int ptl_weight; /**< PTL weight for scheduling */ - struct mca_ptl_base_peer_t* ptl_peer; /**< PTL addressing info */ - struct mca_pml_base_ptl_t* ptl_base; /**< PML specific PTL info */ - mca_ptl_base_module_t *ptl; /**< PTL module */ -}; -typedef struct mca_ptl_proc_t mca_ptl_proc_t; - -/** - * A dynamically growable array of mca_ptl_proc_t instances. - * Maintains an index into the array that is used for round-robin - * scheduling across contents. - */ -struct mca_ptl_array_t { - opal_object_t super; - mca_ptl_proc_t* ptl_procs; /**< array of ptl procs */ - size_t ptl_size; /**< number available */ - size_t ptl_reserve; /**< size of allocated ptl_proc array */ - size_t ptl_index; /**< last used index*/ -}; -typedef struct mca_ptl_array_t mca_ptl_array_t; -typedef struct mca_ptl_array_t mca_pml_teg_ptl_array_t; - - -/** - * If required, reallocate (grow) the array to the indicate size. - * - * @param array (IN) - * @param size (IN) - */ -int mca_ptl_array_reserve(mca_ptl_array_t*, size_t); - -static inline size_t mca_ptl_array_get_size(mca_ptl_array_t* array) -{ - return array->ptl_size; -} - -/** - * Grow the array if required, and set the size. - * - * @param array (IN) - * @param size (IN) - */ -static inline void mca_ptl_array_set_size(mca_ptl_array_t* array, size_t size) -{ - if(array->ptl_size > array->ptl_reserve) - mca_ptl_array_reserve(array, size); - array->ptl_size = size; -} - -/** - * Grow the array size by one and return the item at that index. - * - * @param array (IN) - */ -static inline mca_ptl_proc_t* mca_ptl_array_insert(mca_ptl_array_t* array) -{ -#if OMPI_ENABLE_DEBUG - if(array->ptl_size >= array->ptl_reserve) { - opal_output(0, "mca_ptl_array_insert: invalid array index %d >= %d", - array->ptl_size, array->ptl_reserve); - return 0; - } -#endif - return &array->ptl_procs[array->ptl_size++]; -} - -/** - * Return an array item at the specified index. - * - * @param array (IN) - * @param index (IN) - */ -static inline mca_ptl_proc_t* mca_ptl_array_get_index(mca_ptl_array_t* array, size_t index) -{ -#if OMPI_ENABLE_DEBUG - if(index >= array->ptl_size) { - opal_output(0, "mca_ptl_array_get_index: invalid array index %d >= %d", - index, array->ptl_size); - return 0; - } -#endif - return &array->ptl_procs[index]; -} - -/** - * Return the next LRU index in the array. - * - * @param array (IN) - * @param index (IN) - */ -static inline mca_ptl_proc_t* mca_ptl_array_get_next(mca_ptl_array_t* array) -{ - mca_ptl_proc_t* ptl_proc; -#if OMPI_ENABLE_DEBUG - if(array->ptl_size == 0) { - opal_output(0, "mca_ptl_array_get_next: invalid array size"); - return 0; - } -#endif - ptl_proc = &array->ptl_procs[array->ptl_index++]; - if(array->ptl_index == array->ptl_size) - array->ptl_index = 0; - return ptl_proc; -} - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif - diff --git a/ompi/mca/pml/teg/pml_teg.c b/ompi/mca/pml/teg/pml_teg.c deleted file mode 100644 index 82d92eee8b..0000000000 --- a/ompi/mca/pml/teg/pml_teg.c +++ /dev/null @@ -1,454 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#include - -#include "ompi/class/ompi_bitmap.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/ptl/base/base.h" -#include "ompi/mca/ptl/base/ptl_base_comm.h" -#include "ompi/mca/ptl/base/ptl_base_header.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" -#include "ompi/mca/ptl/base/ptl_base_sendfrag.h" -#include "pml_teg.h" -#include "pml_teg_component.h" -#include "pml_teg_proc.h" -#include "pml_teg_ptl.h" -#include "pml_teg_recvreq.h" -#include "pml_teg_sendreq.h" -#include "pml_teg_recvfrag.h" - - -mca_pml_teg_t mca_pml_teg = { - { - mca_pml_teg_add_procs, - mca_pml_teg_del_procs, - mca_pml_teg_enable, - mca_pml_teg_progress, - mca_pml_teg_add_comm, - mca_pml_teg_del_comm, - mca_pml_teg_irecv_init, - mca_pml_teg_irecv, - mca_pml_teg_recv, - mca_pml_teg_isend_init, - mca_pml_teg_isend, - mca_pml_teg_send, - mca_pml_teg_iprobe, - mca_pml_teg_probe, - mca_pml_teg_start, - 32768, - (0x7fffffff) /* XXX should be INT_MAX, as in ob1 */ - } -}; - - -int mca_pml_teg_add_comm(ompi_communicator_t* comm) -{ - /* allocate pml specific comm data */ - mca_pml_ptl_comm_t* pml_comm = OBJ_NEW(mca_pml_ptl_comm_t); - if (NULL == pml_comm) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - mca_pml_ptl_comm_init_size(pml_comm, comm->c_remote_group->grp_proc_count); - comm->c_pml_comm = pml_comm; - return OMPI_SUCCESS; -} - -int mca_pml_teg_del_comm(ompi_communicator_t* comm) -{ - OBJ_RELEASE(comm->c_pml_comm); - comm->c_pml_comm = NULL; /* make sure it's set to NULL */ - return OMPI_SUCCESS; -} - -static int ptl_exclusivity_compare(const void* arg1, const void* arg2) -{ - mca_ptl_base_module_t* ptl1 = *(struct mca_ptl_base_module_t**)arg1; - mca_ptl_base_module_t* ptl2 = *(struct mca_ptl_base_module_t**)arg2; - if( ptl1->ptl_exclusivity > ptl2->ptl_exclusivity ) { - return -1; - } else if (ptl1->ptl_exclusivity == ptl2->ptl_exclusivity ) { - return 0; - } else { - return 1; - } -} - - -static int mca_pml_teg_add_ptls(void) -{ - /* build an array of ptls and ptl modules */ - mca_ptl_base_selected_module_t* selected_ptl; - size_t num_ptls = opal_list_get_size(&mca_ptl_base_modules_initialized); - size_t cache_bytes = 0; - - mca_pml_teg.teg_num_ptl_modules = 0; - mca_pml_teg.teg_num_ptl_progress = 0; - mca_pml_teg.teg_num_ptl_components = 0; - mca_pml_teg.teg_ptl_modules = (mca_ptl_base_module_t **)malloc(sizeof(mca_ptl_base_module_t*) * num_ptls); - mca_pml_teg.teg_ptl_progress = (mca_ptl_base_component_progress_fn_t*)malloc(sizeof(mca_ptl_base_component_progress_fn_t) * num_ptls); - mca_pml_teg.teg_ptl_components = (mca_ptl_base_component_t **)malloc(sizeof(mca_ptl_base_component_t*) * num_ptls); - if (NULL == mca_pml_teg.teg_ptl_modules || - NULL == mca_pml_teg.teg_ptl_progress || - NULL == mca_pml_teg.teg_ptl_components) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - for(selected_ptl = (mca_ptl_base_selected_module_t*) - opal_list_get_first(&mca_ptl_base_modules_initialized); - selected_ptl != (mca_ptl_base_selected_module_t*) - opal_list_get_end(&mca_ptl_base_modules_initialized); - selected_ptl = (mca_ptl_base_selected_module_t*)opal_list_get_next(selected_ptl)) { - mca_ptl_base_module_t *ptl = selected_ptl->pbsm_module; - size_t i; - - mca_pml_teg.teg_ptl_modules[mca_pml_teg.teg_num_ptl_modules++] = ptl; - for(i=0; i < mca_pml_teg.teg_num_ptl_components; i++) { - if(mca_pml_teg.teg_ptl_components[i] == ptl->ptl_component) { - break; - } - } - if(i == mca_pml_teg.teg_num_ptl_components) { - mca_pml_teg.teg_ptl_components[mca_pml_teg.teg_num_ptl_components++] = ptl->ptl_component; - } - - /* - *setup ptl - */ - - /* set pointer to fragment matching logic routine, if this - * not already set by the ptl - */ - if(NULL == ptl->ptl_match) { - ptl->ptl_match = mca_pml_teg_recv_frag_match; - } - ptl->ptl_send_progress = mca_pml_teg_send_request_progress; - ptl->ptl_recv_progress = mca_pml_teg_recv_request_progress; - ptl->ptl_stack = ptl; - ptl->ptl_base = NULL; - - /* find maximum required size for cache */ - if(ptl->ptl_cache_bytes > cache_bytes) { - cache_bytes = ptl->ptl_cache_bytes; - } - } - - /* setup send fragments based on largest required send request */ - ompi_free_list_init( &mca_pml_teg.teg_send_requests, - sizeof(mca_pml_teg_send_request_t) + cache_bytes, - OBJ_CLASS(mca_pml_teg_send_request_t), - mca_pml_teg.teg_free_list_num, - mca_pml_teg.teg_free_list_max, - mca_pml_teg.teg_free_list_inc, - NULL ); - - /* sort ptl list by exclusivity */ - qsort(mca_pml_teg.teg_ptl_modules, mca_pml_teg.teg_num_ptl_modules, sizeof(struct mca_ptl_t*), ptl_exclusivity_compare); - return OMPI_SUCCESS; -} - -/* - * Called by the base PML in order to notify the PMLs about their selected status. After the init pass, - * the base module will choose one PML (depending on informations provided by the init function) and then - * it will call the pml_enable function with true (for the selected one) and with false for all the - * others. The selected one can then pass control information through to all PTL modules. - */ - -int mca_pml_teg_enable(bool enable) -{ - size_t i; - int value = enable; - - /* If I'm not selected then prepare for close */ - if( false == enable ) return OMPI_SUCCESS; - - /* recv requests */ - ompi_free_list_init( &mca_pml_teg.teg_recv_requests, - sizeof(mca_pml_teg_recv_request_t), - OBJ_CLASS(mca_pml_teg_recv_request_t), - mca_pml_teg.teg_free_list_num, - mca_pml_teg.teg_free_list_max, - mca_pml_teg.teg_free_list_inc, - NULL ); - - /* Grab all the PTLs and prepare them */ - mca_pml_teg_add_ptls(); - - /* and now notify them about the status */ - for(i=0; i < mca_pml_teg.teg_num_ptl_components; i++) { - if(NULL != mca_pml_teg.teg_ptl_components[i]->ptlm_control) { - int rc = mca_pml_teg.teg_ptl_components[i]->ptlm_control(MCA_PTL_ENABLE,&value,sizeof(value)); - if(rc != OMPI_SUCCESS) - return rc; - } - } - return OMPI_SUCCESS; -} - -/* - * For each proc setup a datastructure that indicates the PTLs - * that can be used to reach the destination. - * - */ - -int mca_pml_teg_add_procs(ompi_proc_t** procs, size_t nprocs) -{ - size_t p; - ompi_bitmap_t reachable; - struct mca_ptl_base_peer_t** ptl_peers = NULL; - int rc; - size_t p_index; - - if(nprocs == 0) - return OMPI_SUCCESS; - - OBJ_CONSTRUCT(&reachable, ompi_bitmap_t); - rc = ompi_bitmap_init(&reachable, nprocs); - if(OMPI_SUCCESS != rc) - return rc; - - /* attempt to add all procs to each ptl */ - ptl_peers = (struct mca_ptl_base_peer_t **)malloc(nprocs * sizeof(struct mca_ptl_base_peer_t*)); - for(p_index = 0; p_index < mca_pml_teg.teg_num_ptl_modules; p_index++) { - mca_ptl_base_module_t* ptl = mca_pml_teg.teg_ptl_modules[p_index]; - int ptl_inuse = 0; - - /* if the ptl can reach the destination proc it sets the - * corresponding bit (proc index) in the reachable bitmap - * and can return addressing information for each proc - * that is passed back to the ptl on data transfer calls - */ - ompi_bitmap_clear_all_bits(&reachable); - memset(ptl_peers, 0, nprocs * sizeof(struct mca_ptl_base_peer_t*)); - rc = ptl->ptl_add_procs(ptl, nprocs, procs, ptl_peers, &reachable); - if(OMPI_SUCCESS != rc) { - free(ptl_peers); - return rc; - } - - /* for each proc that is reachable - add the ptl to the procs array(s) */ - for(p=0; p < nprocs; p++) { - ompi_proc_t *proc; - mca_pml_teg_proc_t* proc_pml; - mca_ptl_proc_t* proc_ptl; - size_t size; - - if( !ompi_bitmap_is_set_bit(&reachable, p) ) continue; - - proc = procs[p]; - proc_pml = (mca_pml_teg_proc_t*) proc->proc_pml; - - /* this ptl can be used */ - ptl_inuse++; - - /* initialize each proc */ - if(NULL == proc_pml) { - - /* allocate pml specific proc data */ - proc_pml = OBJ_NEW(mca_pml_teg_proc_t); - if (NULL == proc_pml) { - opal_output(0, "mca_pml_teg_add_procs: unable to allocate resources"); - free(ptl_peers); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* preallocate space in array for max number of ptls */ - mca_ptl_array_reserve(&proc_pml->proc_ptl_first, mca_pml_teg.teg_num_ptl_modules); - mca_ptl_array_reserve(&proc_pml->proc_ptl_next, mca_pml_teg.teg_num_ptl_modules); - proc_pml->base.proc_ompi = proc; - proc->proc_pml = (mca_pml_proc_t*) proc_pml; - } - - /* dont allow an additional PTL with a lower exclusivity ranking */ - size = mca_ptl_array_get_size(&proc_pml->proc_ptl_next); - if(size > 0) { - proc_ptl = mca_ptl_array_get_index(&proc_pml->proc_ptl_next, size-1); - /* skip this ptl if the exclusivity is less than the previous */ - if(proc_ptl->ptl->ptl_exclusivity > ptl->ptl_exclusivity) { - if(ptl_peers[p] != NULL) { - ptl->ptl_del_procs(ptl, 1, &proc, &ptl_peers[p]); - } - continue; - } - } - - /* cache the ptl on the proc */ - proc_ptl = mca_ptl_array_insert(&proc_pml->proc_ptl_next); - proc_ptl->ptl = ptl; - proc_ptl->ptl_peer = ptl_peers[p]; - proc_ptl->ptl_weight = 0; - proc_pml->proc_ptl_flags |= ptl->ptl_flags; - } - - if(ptl_inuse > 0 && NULL != ptl->ptl_component->ptlm_progress) { - size_t p; - bool found = false; - for(p=0; p < mca_pml_teg.teg_num_ptl_progress; p++) { - if(mca_pml_teg.teg_ptl_progress[p] == ptl->ptl_component->ptlm_progress) { - found = true; - break; - } - } - if(found == false) { - mca_pml_teg.teg_ptl_progress[mca_pml_teg.teg_num_ptl_progress] = - ptl->ptl_component->ptlm_progress; - mca_pml_teg.teg_num_ptl_progress++; - } - } - } - free(ptl_peers); - - /* iterate back through procs and compute metrics for registered ptls */ - for(p=0; pproc_pml; - double total_bandwidth = 0; - uint32_t latency = 0; - size_t n_index; - size_t n_size; - - /* skip over procs w/ no ptls registered */ - if(NULL == proc_pml) - continue; - - /* (1) determine the total bandwidth available across all ptls - * note that we need to do this here, as we may already have ptls configured - * (2) determine the highest priority ranking for latency - */ - n_size = mca_ptl_array_get_size(&proc_pml->proc_ptl_next); - for(n_index = 0; n_index < n_size; n_index++) { - struct mca_ptl_proc_t* proc_ptl = mca_ptl_array_get_index(&proc_pml->proc_ptl_next, n_index); - struct mca_ptl_base_module_t* ptl = proc_ptl->ptl; - total_bandwidth += proc_ptl->ptl->ptl_bandwidth; - if(ptl->ptl_latency > latency) - latency = ptl->ptl_latency; - } - - /* (1) set the weight of each ptl as a percentage of overall bandwidth - * (2) copy all ptl instances at the highest priority ranking into the - * list of ptls used for first fragments - */ - - for(n_index = 0; n_index < n_size; n_index++) { - struct mca_ptl_proc_t* proc_ptl = mca_ptl_array_get_index(&proc_pml->proc_ptl_next, n_index); - struct mca_ptl_base_module_t *ptl = proc_ptl->ptl; - double weight; - - /* compute weighting factor for this ptl */ - if(ptl->ptl_bandwidth) - weight = proc_ptl->ptl->ptl_bandwidth / total_bandwidth; - else - weight = 1.0 / n_size; - proc_ptl->ptl_weight = (int)(weight * 100); - - /* - * save/create ptl extension for use by pml - */ - proc_ptl->ptl_base = ptl->ptl_base; - if (NULL == proc_ptl->ptl_base && - ptl->ptl_cache_bytes > 0 && - NULL != ptl->ptl_request_init && - NULL != ptl->ptl_request_fini) { - - mca_pml_base_ptl_t* ptl_base = OBJ_NEW(mca_pml_base_ptl_t); - ptl_base->ptl = ptl; - ptl_base->ptl_cache_size = ptl->ptl_cache_size; - proc_ptl->ptl_base = ptl->ptl_base = ptl_base; - } - - /* check to see if this ptl is already in the array of ptls used for first - * fragments - if not add it. - */ - if(ptl->ptl_latency == latency) { - struct mca_ptl_proc_t* proc_new = mca_ptl_array_insert(&proc_pml->proc_ptl_first); - *proc_new = *proc_ptl; - } - - } - } - return OMPI_SUCCESS; -} - -/* - * iterate through each proc and notify any PTLs associated - * with the proc that it is/has gone away - */ - -int mca_pml_teg_del_procs(ompi_proc_t** procs, size_t nprocs) -{ - size_t p; - int rc; - for(p = 0; p < nprocs; p++) { - ompi_proc_t *proc = procs[p]; - mca_pml_teg_proc_t* proc_pml = (mca_pml_teg_proc_t*) proc->proc_pml; - size_t f_index, f_size; - size_t n_index, n_size; - - /* notify each ptl that the proc is going away */ - f_size = mca_ptl_array_get_size(&proc_pml->proc_ptl_first); - for(f_index = 0; f_index < f_size; f_index++) { - mca_ptl_proc_t* ptl_proc = mca_ptl_array_get_index(&proc_pml->proc_ptl_first, f_index); - mca_ptl_base_module_t* ptl = ptl_proc->ptl; - - rc = ptl->ptl_del_procs(ptl, 1, &proc, &ptl_proc->ptl_peer); - if(OMPI_SUCCESS != rc) { - return rc; - } - - /* remove this from next array so that we dont call it twice w/ - * the same address pointer - */ - n_size = mca_ptl_array_get_size(&proc_pml->proc_ptl_first); - for(n_index = 0; n_index < n_size; n_index++) { - mca_ptl_proc_t* next_proc = mca_ptl_array_get_index(&proc_pml->proc_ptl_next, n_index); - if(next_proc->ptl == ptl) { - memset(next_proc, 0, sizeof(mca_ptl_proc_t)); - break; - } - } - } - - /* notify each ptl that was not in the array of ptls for first fragments */ - n_size = mca_ptl_array_get_size(&proc_pml->proc_ptl_next); - for(n_index = 0; n_index < n_size; n_index++) { - mca_ptl_proc_t* ptl_proc = mca_ptl_array_get_index(&proc_pml->proc_ptl_first, n_index); - mca_ptl_base_module_t* ptl = ptl_proc->ptl; - if (ptl != 0) { - rc = ptl->ptl_del_procs(ptl,1,&proc,&ptl_proc->ptl_peer); - if(OMPI_SUCCESS != rc) - return rc; - } - } - - /* do any required cleanup */ - OBJ_RELEASE(proc_pml); - proc->proc_pml = NULL; - } - return OMPI_SUCCESS; -} - -int mca_pml_teg_component_fini(void) -{ - /* FIX */ - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/pml/teg/pml_teg.h b/ompi/mca/pml/teg/pml_teg.h deleted file mode 100644 index b9cffb0dfc..0000000000 --- a/ompi/mca/pml/teg/pml_teg.h +++ /dev/null @@ -1,263 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_TEG_H -#define MCA_PML_TEG_H - -#include "opal/threads/threads.h" -#include "opal/threads/condition.h" -#include "ompi/class/ompi_free_list.h" -#include "opal/util/cmd_line.h" -#include "ompi/request/request.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/pml_base_request.h" -#include "ompi/mca/pml/base/pml_base_bsend.h" -#include "ompi/mca/ptl/base/ptl_base_sendreq.h" -#include "ompi/mca/ptl/ptl.h" - - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif -/** - * TEG PML module - */ - -struct mca_pml_teg_t { - mca_pml_base_module_t super; - - mca_ptl_base_component_t **teg_ptl_components; - size_t teg_num_ptl_components; - - mca_ptl_base_module_t** teg_ptl_modules; - size_t teg_num_ptl_modules; - - mca_ptl_base_component_progress_fn_t* teg_ptl_progress; - size_t teg_num_ptl_progress; - - opal_list_t teg_procs; - opal_mutex_t teg_lock; - - int teg_priority; - - int teg_free_list_num; /* initial size of free list */ - int teg_free_list_max; /* maximum size of free list */ - int teg_free_list_inc; /* number of elements to grow free list */ - int teg_poll_iterations; /* number of iterations to poll for completion */ - - /* free list of requests */ - ompi_free_list_t teg_send_requests; - ompi_free_list_t teg_recv_requests; - - /* list of pending send requests */ - opal_list_t teg_send_pending; -}; -typedef struct mca_pml_teg_t mca_pml_teg_t; - -extern mca_pml_teg_t mca_pml_teg; - - -/* - * PML module functions. - */ - - -extern int mca_pml_teg_component_open(void); -extern int mca_pml_teg_component_close(void); - -extern mca_pml_base_module_t* mca_pml_teg_component_init( - int *priority, - bool enable_progress_threads, - bool enable_mpi_threads -); - -extern int mca_pml_teg_component_fini(void); - - - -/* - * PML interface functions. - */ - -extern int mca_pml_teg_add_comm( - struct ompi_communicator_t* comm -); - -extern int mca_pml_teg_del_comm( - struct ompi_communicator_t* comm -); - -extern int mca_pml_teg_add_procs( - struct ompi_proc_t **procs, - size_t nprocs -); - -extern int mca_pml_teg_del_procs( - struct ompi_proc_t **procs, - size_t nprocs -); - -extern int mca_pml_teg_enable( - bool enable -); - -extern int mca_pml_teg_progress(void); - -extern int mca_pml_teg_iprobe( - int dst, - int tag, - struct ompi_communicator_t* comm, - int *matched, - ompi_status_public_t* status -); - -extern int mca_pml_teg_probe( - int dst, - int tag, - struct ompi_communicator_t* comm, - ompi_status_public_t* status -); - -extern int mca_pml_teg_cancelled( - ompi_request_t* request, - int *flag -); - - -extern int mca_pml_teg_isend_init( - void *buf, - size_t count, - struct ompi_datatype_t *datatype, - int dst, - int tag, - mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm, - struct ompi_request_t **request -); - -extern int mca_pml_teg_isend( - void *buf, - size_t count, - struct ompi_datatype_t *datatype, - int dst, - int tag, - mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm, - struct ompi_request_t **request -); - -extern int mca_pml_teg_send( - void *buf, - size_t count, - struct ompi_datatype_t *datatype, - int dst, - int tag, - mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm -); - -extern int mca_pml_teg_irecv_init( - void *buf, - size_t count, - struct ompi_datatype_t *datatype, - int src, - int tag, - struct ompi_communicator_t* comm, - struct ompi_request_t **request -); - -extern int mca_pml_teg_irecv( - void *buf, - size_t count, - struct ompi_datatype_t *datatype, - int src, - int tag, - struct ompi_communicator_t* comm, - struct ompi_request_t **request -); - -extern int mca_pml_teg_recv( - void *buf, - size_t count, - struct ompi_datatype_t *datatype, - int src, - int tag, - struct ompi_communicator_t* comm, - ompi_status_public_t* status -); - -extern int mca_pml_teg_progress(void); - -extern int mca_pml_teg_start( - size_t count, - ompi_request_t** requests -); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#define MCA_PML_TEG_FINI(request) \ -{ \ - mca_pml_base_request_t* pml_request = *(mca_pml_base_request_t**)(request); \ - if(pml_request->req_persistent) { \ - if(pml_request->req_free_called) { \ - MCA_PML_TEG_FREE(request); \ - } else { \ - pml_request->req_ompi.req_state = OMPI_REQUEST_INACTIVE; \ - } \ - } else { \ - MCA_PML_TEG_FREE(request); \ - } \ -} - - -#define MCA_PML_TEG_FREE(request) \ -{ \ - mca_pml_base_request_t* pml_request = *(mca_pml_base_request_t**)(request); \ - pml_request->req_free_called = true; \ - if( pml_request->req_pml_complete == true) \ - { \ - switch(pml_request->req_type) { \ - case MCA_PML_REQUEST_SEND: \ - { \ - mca_pml_teg_send_request_t* sendreq = (mca_pml_teg_send_request_t*)pml_request; \ - while(sendreq->req_lock > 0); \ - if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { \ - mca_pml_base_bsend_request_fini((ompi_request_t*)sendreq); \ - } \ - MCA_PML_TEG_SEND_REQUEST_RETURN(sendreq); \ - break; \ - } \ - case MCA_PML_REQUEST_RECV: \ - { \ - mca_pml_teg_recv_request_t* recvreq = (mca_pml_teg_recv_request_t*)pml_request; \ - MCA_PML_TEG_RECV_REQUEST_RETURN(recvreq); \ - break; \ - } \ - default: \ - break; \ - } \ - } \ - *(request) = MPI_REQUEST_NULL; \ -} - -#endif - diff --git a/ompi/mca/pml/teg/pml_teg_cancel.c b/ompi/mca/pml/teg/pml_teg_cancel.c deleted file mode 100644 index f7cd8afde5..0000000000 --- a/ompi/mca/pml/teg/pml_teg_cancel.c +++ /dev/null @@ -1,30 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_teg.h" - -int mca_pml_teg_cancelled(ompi_request_t* request, int* flag) -{ - if(NULL != flag) - *flag = (true == request->req_status._cancelled ? 1 : 0); - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/pml/teg/pml_teg_component.c b/ompi/mca/pml/teg/pml_teg_component.c deleted file mode 100644 index 12df850c3d..0000000000 --- a/ompi/mca/pml/teg/pml_teg_component.c +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "opal/event/event.h" -#include "mpi.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "opal/mca/base/mca_base_param.h" -#include "ompi/mca/pml/base/pml_base_bsend.h" -#include "ompi/mca/ptl/base/base.h" -#include "pml_teg.h" -#include "pml_teg_proc.h" -#include "pml_teg_sendreq.h" -#include "pml_teg_recvreq.h" - - -mca_pml_base_component_1_0_0_t mca_pml_teg_component = { - - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - { - /* Indicate that we are a pml v1.0.0 component (which also implies - a specific MCA version) */ - - MCA_PML_BASE_VERSION_1_0_0, - - "teg", /* MCA component name */ - OMPI_MAJOR_VERSION, /* MCA component major version */ - OMPI_MINOR_VERSION, /* MCA component minor version */ - OMPI_RELEASE_VERSION, /* MCA component release version */ - mca_pml_teg_component_open, /* component open */ - mca_pml_teg_component_close /* component close */ - }, - - /* Next the MCA v1.0.0 component meta data */ - - { - /* Whether the component is checkpointable or not */ - false - }, - - mca_pml_teg_component_init, /* component init */ - mca_pml_teg_component_fini /* component finalize */ -}; - - - -static inline int mca_pml_teg_param_register_int( - const char* param_name, - int default_value) -{ - int id = mca_base_param_register_int("pml","teg",param_name,NULL,default_value); - int param_value = default_value; - mca_base_param_lookup_int(id,¶m_value); - return param_value; -} - - -int mca_pml_teg_component_open(void) -{ - OBJ_CONSTRUCT(&mca_pml_teg.teg_lock, opal_mutex_t); - OBJ_CONSTRUCT(&mca_pml_teg.teg_send_requests, ompi_free_list_t); - OBJ_CONSTRUCT(&mca_pml_teg.teg_recv_requests, ompi_free_list_t); - OBJ_CONSTRUCT(&mca_pml_teg.teg_procs, opal_list_t); - OBJ_CONSTRUCT(&mca_pml_teg.teg_send_pending, opal_list_t); - - mca_pml_teg.teg_ptl_components = NULL; - mca_pml_teg.teg_num_ptl_components = 0; - mca_pml_teg.teg_ptl_modules = NULL; - mca_pml_teg.teg_num_ptl_modules = 0; - mca_pml_teg.teg_ptl_progress = NULL; - mca_pml_teg.teg_num_ptl_progress = 0; - - mca_pml_teg.teg_free_list_num = - mca_pml_teg_param_register_int("free_list_num", 4); - mca_pml_teg.teg_free_list_max = - mca_pml_teg_param_register_int("free_list_max", -1); - mca_pml_teg.teg_free_list_inc = - mca_pml_teg_param_register_int("free_list_inc", 64); - mca_pml_teg.teg_poll_iterations = - mca_pml_teg_param_register_int("poll_iterations", 100000); - mca_pml_teg.teg_priority = - mca_pml_teg_param_register_int("priority", 0); - - return mca_ptl_base_open(); -} - - -int mca_pml_teg_component_close(void) -{ - int rc; - - /* I was not enabled */ - if( NULL == mca_pml_teg.teg_ptl_components ) - return OMPI_SUCCESS; - - if(OMPI_SUCCESS != (rc = mca_ptl_base_close())) - return rc; - - if(NULL != mca_pml_teg.teg_ptl_components) { - free(mca_pml_teg.teg_ptl_components); - mca_pml_teg.teg_ptl_components = NULL; - } - if(NULL != mca_pml_teg.teg_ptl_modules) { - free(mca_pml_teg.teg_ptl_modules); - mca_pml_teg.teg_ptl_modules = NULL; - } - if(NULL != mca_pml_teg.teg_ptl_progress) { - free(mca_pml_teg.teg_ptl_progress); - mca_pml_teg.teg_ptl_progress = NULL; - } - OBJ_DESTRUCT(&mca_pml_teg.teg_send_pending); - OBJ_DESTRUCT(&mca_pml_teg.teg_send_requests); - OBJ_DESTRUCT(&mca_pml_teg.teg_recv_requests); - OBJ_DESTRUCT(&mca_pml_teg.teg_procs); - OBJ_DESTRUCT(&mca_pml_teg.teg_lock); - return OMPI_SUCCESS; -} - - -mca_pml_base_module_t* mca_pml_teg_component_init( int* priority, - bool enable_progress_threads, - bool enable_mpi_threads ) -{ - int rc; - *priority = mca_pml_teg.teg_priority; - - /* buffered send */ - if(OMPI_SUCCESS != mca_pml_base_bsend_init(enable_mpi_threads)) { - opal_output(0, "mca_pml_teg_component_init: mca_pml_bsend_init failed\n"); - return NULL; - } - - rc = mca_ptl_base_select( enable_progress_threads, enable_mpi_threads ); - if( rc != OMPI_SUCCESS ) - return NULL; - - return &mca_pml_teg.super; -} - diff --git a/ompi/mca/pml/teg/pml_teg_component.h b/ompi/mca/pml/teg/pml_teg_component.h deleted file mode 100644 index e7a668f515..0000000000 --- a/ompi/mca/pml/teg/pml_teg_component.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_TEG_COMPONENT_H -#define MCA_PML_TEG_COMPONENT_H - -/* - * PML module functions. - */ - -OMPI_COMP_EXPORT extern mca_pml_base_component_1_0_0_t mca_pml_teg_component; - -#endif diff --git a/ompi/mca/pml/teg/pml_teg_iprobe.c b/ompi/mca/pml/teg/pml_teg_iprobe.c deleted file mode 100644 index 282da578b2..0000000000 --- a/ompi/mca/pml/teg/pml_teg_iprobe.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/request/request.h" -#include "pml_teg_recvreq.h" - - -int mca_pml_teg_iprobe(int src, - int tag, - struct ompi_communicator_t *comm, - int *matched, ompi_status_public_t * status) -{ - int rc; - mca_ptl_base_recv_request_t recvreq; - - OBJ_CONSTRUCT( &(recvreq), mca_ptl_base_recv_request_t ); - recvreq.req_recv.req_base.req_ompi.req_type = OMPI_REQUEST_PML; - recvreq.req_recv.req_base.req_type = MCA_PML_REQUEST_IPROBE; - MCA_PML_TEG_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char, src, tag, comm, true); - - *matched = 0; - if ((rc = mca_pml_teg_recv_request_start(&recvreq)) == OMPI_SUCCESS) { - if( recvreq.req_recv.req_base.req_ompi.req_complete == true ) { - if( NULL != status ) { - *status = recvreq.req_recv.req_base.req_ompi.req_status; - } - *matched = 1; - } else { - /* we are supposed to progress ... */ - opal_progress(); - } - } - MCA_PML_BASE_RECV_REQUEST_FINI((&recvreq.req_recv)); - return rc; -} - - -int mca_pml_teg_probe(int src, - int tag, - struct ompi_communicator_t *comm, - ompi_status_public_t * status) -{ - int rc; - mca_ptl_base_recv_request_t recvreq; - - OBJ_CONSTRUCT( &(recvreq), mca_ptl_base_recv_request_t ); - recvreq.req_recv.req_base.req_ompi.req_type = OMPI_REQUEST_PML; - recvreq.req_recv.req_base.req_type = MCA_PML_REQUEST_PROBE; - MCA_PML_TEG_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char, src, tag, comm, true); - - if ((rc = mca_pml_teg_recv_request_start(&recvreq)) != OMPI_SUCCESS) { - MCA_PML_BASE_RECV_REQUEST_FINI((&recvreq.req_recv)); - return rc; - } - - if (recvreq.req_recv.req_base.req_ompi.req_complete == false) { - /* give up and sleep until completion */ - if (opal_using_threads()) { - opal_mutex_lock(&ompi_request_lock); - ompi_request_waiting++; - while (recvreq.req_recv.req_base.req_ompi.req_complete == false) - opal_condition_wait(&ompi_request_cond, &ompi_request_lock); - ompi_request_waiting--; - opal_mutex_unlock(&ompi_request_lock); - } else { - ompi_request_waiting++; - while (recvreq.req_recv.req_base.req_ompi.req_complete == false) - opal_condition_wait(&ompi_request_cond, &ompi_request_lock); - ompi_request_waiting--; - } - } - - if (NULL != status) { - *status = recvreq.req_recv.req_base.req_ompi.req_status; - } - MCA_PML_BASE_RECV_REQUEST_FINI(&recvreq.req_recv); - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/pml/teg/pml_teg_irecv.c b/ompi/mca/pml/teg/pml_teg_irecv.c deleted file mode 100644 index 46b74eca7f..0000000000 --- a/ompi/mca/pml/teg/pml_teg_irecv.c +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/request/request.h" -#include "pml_teg_recvreq.h" - - -int mca_pml_teg_irecv_init(void *addr, - size_t count, - struct ompi_datatype_t * datatype, - int src, - int tag, - struct ompi_communicator_t *comm, - struct ompi_request_t **request) -{ - int rc; - mca_ptl_base_recv_request_t *recvreq; - MCA_PML_TEG_RECV_REQUEST_ALLOC(recvreq, rc); - if (NULL == recvreq) - return rc; - - MCA_PML_TEG_RECV_REQUEST_INIT(recvreq, - addr, - count, datatype, src, tag, comm, true); - - *request = (ompi_request_t *) recvreq; - return OMPI_SUCCESS; -} - -int mca_pml_teg_irecv(void *addr, - size_t count, - struct ompi_datatype_t * datatype, - int src, - int tag, - struct ompi_communicator_t *comm, - struct ompi_request_t **request) -{ - int rc; - - mca_ptl_base_recv_request_t *recvreq; - MCA_PML_TEG_RECV_REQUEST_ALLOC(recvreq, rc); - if (NULL == recvreq) - return rc; - - MCA_PML_TEG_RECV_REQUEST_INIT(recvreq, - addr, - count, datatype, src, tag, comm, false); - - if ((rc = mca_pml_teg_recv_request_start(recvreq)) != OMPI_SUCCESS) { - MCA_PML_TEG_RECV_REQUEST_RETURN(recvreq); - return rc; - } - *request = (ompi_request_t *) recvreq; - return OMPI_SUCCESS; -} - - -int mca_pml_teg_recv(void *addr, - size_t count, - struct ompi_datatype_t * datatype, - int src, - int tag, - struct ompi_communicator_t *comm, - ompi_status_public_t * status) -{ - int rc; - mca_ptl_base_recv_request_t *recvreq; - MCA_PML_TEG_RECV_REQUEST_ALLOC(recvreq, rc); - if (NULL == recvreq) - return rc; - - MCA_PML_TEG_RECV_REQUEST_INIT(recvreq, - addr, - count, datatype, src, tag, comm, false); - - if ((rc = mca_pml_teg_recv_request_start(recvreq)) != OMPI_SUCCESS) { - goto recv_finish; - } - - if (recvreq->req_recv.req_base.req_ompi.req_complete == false) { - /* give up and sleep until completion */ - if (opal_using_threads()) { - opal_mutex_lock(&ompi_request_lock); - ompi_request_waiting++; - while (recvreq->req_recv.req_base.req_ompi.req_complete == false) - opal_condition_wait(&ompi_request_cond, &ompi_request_lock); - ompi_request_waiting--; - opal_mutex_unlock(&ompi_request_lock); - } else { - ompi_request_waiting++; - while (recvreq->req_recv.req_base.req_ompi.req_complete == false) - opal_condition_wait(&ompi_request_cond, &ompi_request_lock); - ompi_request_waiting--; - } - } - recv_finish: - if (NULL != status) { /* return status */ - *status = recvreq->req_recv.req_base.req_ompi.req_status; - } - - MCA_PML_TEG_RECV_REQUEST_RETURN(recvreq); - return recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR; -} diff --git a/ompi/mca/pml/teg/pml_teg_isend.c b/ompi/mca/pml/teg/pml_teg_isend.c deleted file mode 100644 index 9c400c77a0..0000000000 --- a/ompi/mca/pml/teg/pml_teg_isend.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_teg.h" -#include "pml_teg_proc.h" -#include "pml_teg_sendreq.h" -#include "pml_teg_recvreq.h" - - -int mca_pml_teg_isend_init(void *buf, - size_t count, - struct ompi_datatype_t * datatype, - int dst, - int tag, - mca_pml_base_send_mode_t sendmode, - ompi_communicator_t * comm, - ompi_request_t ** request) -{ - int rc; - - mca_pml_teg_send_request_t *sendreq; - MCA_PML_TEG_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc); - if (rc != OMPI_SUCCESS) - return rc; - - MCA_PML_TEG_SEND_REQUEST_INIT(sendreq, - buf, - count, - datatype, - dst, tag, - comm, sendmode, true); - - *request = (ompi_request_t *) sendreq; - return OMPI_SUCCESS; -} - - -int mca_pml_teg_isend(void *buf, - size_t count, - struct ompi_datatype_t * datatype, - int dst, - int tag, - mca_pml_base_send_mode_t sendmode, - ompi_communicator_t * comm, - ompi_request_t ** request) -{ - int rc; - mca_pml_teg_send_request_t *sendreq; - MCA_PML_TEG_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc); - if (rc != OMPI_SUCCESS) - return rc; - MCA_PML_TEG_SEND_REQUEST_INIT(sendreq, - buf, - count, - datatype, - dst, tag, - comm, sendmode, false); - - MCA_PML_TEG_SEND_REQUEST_START(sendreq, rc); - *request = (ompi_request_t *) sendreq; - return rc; -} - - -int mca_pml_teg_send(void *buf, - size_t count, - struct ompi_datatype_t * datatype, - int dst, - int tag, - mca_pml_base_send_mode_t sendmode, - ompi_communicator_t * comm) -{ - int rc; - mca_pml_teg_send_request_t *sendreq; - MCA_PML_TEG_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc); - if (rc != OMPI_SUCCESS) - return rc; - - MCA_PML_TEG_SEND_REQUEST_INIT(sendreq, - buf, - count, - datatype, - dst, tag, - comm, sendmode, false); - - MCA_PML_TEG_SEND_REQUEST_START(sendreq, rc); - if (rc != OMPI_SUCCESS) { - MCA_PML_TEG_FREE((ompi_request_t **) & sendreq); - return rc; - } - - if (sendreq->req_send.req_base.req_ompi.req_complete == false) { - /* give up and sleep until completion */ - if (opal_using_threads()) { - opal_mutex_lock(&ompi_request_lock); - ompi_request_waiting++; - while (sendreq->req_send.req_base.req_ompi.req_complete == false) - opal_condition_wait(&ompi_request_cond, &ompi_request_lock); - ompi_request_waiting--; - opal_mutex_unlock(&ompi_request_lock); - } else { - ompi_request_waiting++; - while (sendreq->req_send.req_base.req_ompi.req_complete == false) - opal_condition_wait(&ompi_request_cond, &ompi_request_lock); - ompi_request_waiting--; - } - } - - /* return request to pool */ - MCA_PML_TEG_FREE((ompi_request_t **) & sendreq); - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/pml/teg/pml_teg_proc.c b/ompi/mca/pml/teg/pml_teg_proc.c deleted file mode 100644 index 1f224b5182..0000000000 --- a/ompi/mca/pml/teg/pml_teg_proc.c +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "opal/sys/atomic.h" -#include "pml_teg.h" -#include "pml_teg_proc.h" -#include "pml_ptl_array.h" - - -static void mca_pml_teg_proc_construct(mca_pml_teg_proc_t* proc) -{ - proc->base.proc_ompi = NULL; - proc->proc_ptl_flags = 0; - OBJ_CONSTRUCT(&proc->base.proc_lock, opal_mutex_t); - OBJ_CONSTRUCT(&proc->proc_ptl_first, mca_pml_teg_ptl_array_t); - OBJ_CONSTRUCT(&proc->proc_ptl_next, mca_pml_teg_ptl_array_t); - - OPAL_THREAD_LOCK(&mca_pml_teg.teg_lock); - opal_list_append(&mca_pml_teg.teg_procs, (opal_list_item_t*)proc); - OPAL_THREAD_UNLOCK(&mca_pml_teg.teg_lock); -} - - -static void mca_pml_teg_proc_destruct(mca_pml_teg_proc_t* proc) -{ - OPAL_THREAD_LOCK(&mca_pml_teg.teg_lock); - opal_list_remove_item(&mca_pml_teg.teg_procs, (opal_list_item_t*)proc); - OPAL_THREAD_UNLOCK(&mca_pml_teg.teg_lock); - - OBJ_DESTRUCT(&proc->base.proc_lock); - OBJ_DESTRUCT(&proc->proc_ptl_first); - OBJ_DESTRUCT(&proc->proc_ptl_next); -} - -OBJ_CLASS_INSTANCE( - mca_pml_teg_proc_t, - opal_list_item_t, - mca_pml_teg_proc_construct, - mca_pml_teg_proc_destruct -); - diff --git a/ompi/mca/pml/teg/pml_teg_proc.h b/ompi/mca/pml/teg/pml_teg_proc.h deleted file mode 100644 index af8904762e..0000000000 --- a/ompi/mca/pml/teg/pml_teg_proc.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PML_PROC_H -#define MCA_PML_PROC_H - -#include "opal/threads/mutex.h" -#include "ompi/communicator/communicator.h" -#include "ompi/group/group.h" -#include "ompi/proc/proc.h" -#include "pml_ptl_array.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif -/** - * Structure associated w/ ompi_proc_t that contains data specific - * to the PML. Note that this name is not PML specific. - */ -struct mca_pml_teg_proc_t { - mca_pml_proc_t base; - mca_ptl_array_t proc_ptl_first; /**< array of ptls to use for first fragments */ - mca_ptl_array_t proc_ptl_next; /**< array of ptls to use for remaining fragments */ - uint32_t proc_ptl_flags; /**< aggregate ptl flags */ -}; -typedef struct mca_pml_teg_proc_t mca_pml_teg_proc_t; - - -OMPI_COMP_EXPORT extern opal_class_t mca_pml_teg_proc_t_class; - -/** - * Return the mca_pml_proc_t instance cached in the communicators local group. - * - * @param comm Communicator - * @param rank Peer rank - * @return mca_pml_proc_t instance - */ - -static inline mca_pml_proc_t* mca_pml_teg_proc_lookup_local(ompi_communicator_t* comm, int rank) -{ - ompi_proc_t* proc = comm->c_local_group->grp_proc_pointers[rank]; - return proc->proc_pml; -} - -/** - * Return the mca_pml_proc_t instance cached on the communicators remote group. - * - * @param comm Communicator - * @param rank Peer rank - * @return mca_pml_proc_t instance - */ - -static inline mca_pml_proc_t* mca_pml_teg_proc_lookup_remote(ompi_communicator_t* comm, int rank) -{ - ompi_proc_t* proc = comm->c_remote_group->grp_proc_pointers[rank]; - return proc->proc_pml; -} - -/** - * Return the mca_ptl_peer_t instance corresponding to the process/ptl combination. - * - * @param comm Communicator - * @param rank Peer rank - * @return mca_pml_proc_t instance - */ - -static inline struct mca_ptl_base_peer_t* mca_pml_teg_proc_lookup_remote_peer( - ompi_communicator_t* comm, - int rank, - struct mca_ptl_base_module_t* ptl) -{ - ompi_proc_t* proc = comm->c_remote_group->grp_proc_pointers[rank]; - mca_pml_teg_proc_t* proc_pml =(mca_pml_teg_proc_t*) proc->proc_pml; - size_t i, size = mca_ptl_array_get_size(&proc_pml->proc_ptl_first); - mca_ptl_proc_t* proc_ptl = proc_pml->proc_ptl_first.ptl_procs; - for(i = 0; i < size; i++) { - if(proc_ptl->ptl == ptl) { - return proc_ptl->ptl_peer; - } - proc_ptl++; - } - return NULL; -} - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif - diff --git a/ompi/mca/pml/teg/pml_teg_progress.c b/ompi/mca/pml/teg/pml_teg_progress.c deleted file mode 100644 index 9974184251..0000000000 --- a/ompi/mca/pml/teg/pml_teg_progress.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_teg.h" -#include "pml_teg_sendreq.h" - - -int mca_pml_teg_progress(void) -{ - mca_ptl_tstamp_t tstamp = 0; - size_t i; - int count = 0; - - /* - * Progress each of the PTL modules - */ - for(i=0; i 0) { - count += rc; - } - } - return count; -} - diff --git a/ompi/mca/pml/teg/pml_teg_ptl.c b/ompi/mca/pml/teg/pml_teg_ptl.c deleted file mode 100644 index 5b88a9915f..0000000000 --- a/ompi/mca/pml/teg/pml_teg_ptl.c +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_teg_ptl.h" - - -static void mca_pml_base_ptl_construct(mca_pml_base_ptl_t* ptl) -{ - OBJ_CONSTRUCT(&ptl->ptl_cache, opal_list_t); - OBJ_CONSTRUCT(&ptl->ptl_cache_lock, opal_mutex_t); - ptl->ptl = NULL; - ptl->ptl_cache_size = 0; - ptl->ptl_cache_alloc = 0; -} - -static void mca_pml_base_ptl_destruct(mca_pml_base_ptl_t* ptl) -{ - OBJ_DESTRUCT(&ptl->ptl_cache); - OBJ_DESTRUCT(&ptl->ptl_cache_lock); -} - -OBJ_CLASS_INSTANCE( - mca_pml_base_ptl_t, - opal_list_t, - mca_pml_base_ptl_construct, - mca_pml_base_ptl_destruct -); - diff --git a/ompi/mca/pml/teg/pml_teg_ptl.h b/ompi/mca/pml/teg/pml_teg_ptl.h deleted file mode 100644 index 15e7442f2e..0000000000 --- a/ompi/mca/pml/teg/pml_teg_ptl.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef _MCA_PML_BASE_PTL_ -#define _MCA_PML_BASE_PTL_ - -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "opal/threads/condition.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - - -struct mca_pml_base_ptl_t { - opal_list_t ptl_cache; /**< cache of send requests */ - size_t ptl_cache_size; /**< maximum size of cache */ - size_t ptl_cache_alloc; /**< current number of allocated items */ - opal_mutex_t ptl_cache_lock; /**< lock for queue access */ - struct mca_ptl_base_module_t* ptl; /**< back pointer to ptl */ -}; -typedef struct mca_pml_base_ptl_t mca_pml_base_ptl_t; - -OBJ_CLASS_DECLARATION(mca_pml_base_ptl_t); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif - diff --git a/ompi/mca/pml/teg/pml_teg_recvfrag.c b/ompi/mca/pml/teg/pml_teg_recvfrag.c deleted file mode 100644 index 1d66c81d75..0000000000 --- a/ompi/mca/pml/teg/pml_teg_recvfrag.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - */ - -#include "ompi_config.h" - -#include "ompi/mca/pml/pml.h" -#include "pml_teg_recvfrag.h" -#include "pml_teg_proc.h" - - -OMPI_DECLSPEC extern opal_class_t mca_ptl_base_recv_frag_t_class; - - -/** - * Called by the PTL to match attempt a match for new fragments. - * - * @param ptl (IN) The PTL pointer - * @param frag (IN) Receive fragment descriptor. - * @param header (IN) Header corresponding to the receive fragment. - * @return OMPI_SUCCESS or error status on failure. - */ -bool mca_pml_teg_recv_frag_match( - mca_ptl_base_module_t* ptl, - mca_ptl_base_recv_frag_t* frag, - mca_ptl_base_match_header_t* header) -{ - bool matched; - bool matches = false; - opal_list_t matched_frags; - if((matched = mca_ptl_base_match(header, frag, &matched_frags, &matches)) == false) { - frag = (matches ? (mca_ptl_base_recv_frag_t*)opal_list_remove_first(&matched_frags) : NULL); - } - - while(NULL != frag) { - mca_ptl_base_module_t* ptl = frag->frag_base.frag_owner; - mca_ptl_base_recv_request_t *request = frag->frag_request; - mca_ptl_base_match_header_t *header = &frag->frag_base.frag_header.hdr_match; - - /* - * Initialize request status. - */ - /* TODO request->req_recv.req_bytes_packed = header->hdr_msg_length; */ - request->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = header->hdr_src; - request->req_recv.req_base.req_ompi.req_status.MPI_TAG = header->hdr_tag; - - /* - * If probe - signal request is complete - but don't notify PTL - */ - if(request->req_recv.req_base.req_type == MCA_PML_REQUEST_PROBE) { - - ptl->ptl_recv_progress( ptl, - request, - header->hdr_msg_length, - header->hdr_msg_length ); - matched = mca_pml_teg_recv_frag_match( ptl, frag, header ); - - } else { - - /* if required - setup pointer to ptls peer */ - if (NULL == frag->frag_base.frag_peer) { - frag->frag_base.frag_peer = mca_pml_teg_proc_lookup_remote_peer( - request->req_recv.req_base.req_comm,header->hdr_src,ptl); - } - - MCA_PML_TEG_RECV_MATCHED( ptl, frag ); - }; - - /* process any additional fragments that arrived out of order */ - frag = (matches ? (mca_ptl_base_recv_frag_t*)opal_list_remove_first(&matched_frags) : NULL); - }; - return matched; -} - - diff --git a/ompi/mca/pml/teg/pml_teg_recvfrag.h b/ompi/mca/pml/teg/pml_teg_recvfrag.h deleted file mode 100644 index c3f9255f66..0000000000 --- a/ompi/mca/pml/teg/pml_teg_recvfrag.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_TEG_RECVFRAG_H -#define MCA_PML_TEG_RECVFRAG_H - -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/pml/base/pml_base_recvreq.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" - -/** - * Called by the PTL to match attempt a match for new fragments. - * - * @param ptl (IN) The PTL pointer - * @param frag (IN) Receive fragment descriptor. - * @param header (IN) Header corresponding to the receive fragment. - * @return OMPI_SUCCESS or error status on failure. - */ -bool mca_pml_teg_recv_frag_match( - mca_ptl_base_module_t* ptl, - mca_ptl_base_recv_frag_t* frag, - mca_ptl_base_match_header_t* header -); - -#define MCA_PML_TEG_RECV_MATCHED( ptl, frag ) \ -do { \ - mca_pml_base_recv_request_t* _request = (mca_pml_base_recv_request_t*)(frag)->frag_request; \ - /* Now that we have the sender we can create the convertor. Additionally, we know */ \ - /* that the required convertor should start at the position zero as we just match */ \ - /* the first fragment. */ \ - if( 0 != (_request)->req_bytes_packed ) { \ - (_request)->req_base.req_proc = ompi_comm_peer_lookup( \ - (_request)->req_base.req_comm, \ - frag->frag_base.frag_header.hdr_match.hdr_src); \ - ompi_convertor_copy_and_prepare_for_recv( \ - (_request)->req_base.req_proc->proc_convertor, \ - (_request)->req_base.req_datatype, \ - (_request)->req_base.req_count, \ - (_request)->req_base.req_addr, \ - &((_request)->req_convertor) ); \ - } \ - ptl->ptl_matched( (ptl), (frag) ); /* notify ptl of match */ \ -} while (0) - -#endif - diff --git a/ompi/mca/pml/teg/pml_teg_recvreq.c b/ompi/mca/pml/teg/pml_teg_recvreq.c deleted file mode 100644 index b2a2429ae0..0000000000 --- a/ompi/mca/pml/teg/pml_teg_recvreq.c +++ /dev/null @@ -1,282 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/ptl/base/ptl_base_comm.h" -#include "pml_teg_recvreq.h" -#include "pml_teg_recvfrag.h" -#include "pml_teg_sendreq.h" - -static mca_ptl_base_recv_frag_t* mca_pml_teg_recv_request_match_specific_proc( - mca_ptl_base_recv_request_t* request, int proc); - -static int mca_pml_teg_recv_request_fini(struct ompi_request_t** request) -{ - MCA_PML_TEG_FINI(request); - return OMPI_SUCCESS; -} - -static int mca_pml_teg_recv_request_free(struct ompi_request_t** request) -{ - MCA_PML_TEG_FREE(request); - return OMPI_SUCCESS; -} - -static int mca_pml_teg_recv_request_cancel(struct ompi_request_t* request, int complete) -{ - mca_pml_base_request_t* teg_request = (mca_pml_base_request_t*)request; - ompi_communicator_t* ompi_comm = teg_request->req_comm; - mca_pml_ptl_comm_t* pml_comm = (mca_pml_ptl_comm_t*)ompi_comm->c_pml_comm; - - if( true == request->req_complete ) { /* way to late to cancel this one */ - return OMPI_SUCCESS; - } - - /* The rest should be protected behind the match logic lock */ - OPAL_THREAD_LOCK(&pml_comm->c_matching_lock); - - if( OMPI_ANY_TAG == request->req_status.MPI_TAG ) { /* the match have not been already done */ - - if( teg_request->req_peer == OMPI_ANY_SOURCE ) { - opal_list_remove_item( &(pml_comm->c_wild_receives), - (opal_list_item_t*)request ); - } else { - opal_list_remove_item( pml_comm->c_specific_receives + teg_request->req_peer, - (opal_list_item_t*)request ); - } - } - - OPAL_THREAD_UNLOCK(&pml_comm->c_matching_lock); - - request->req_status._cancelled = true; - request->req_complete = true; /* mark it as completed so all the test/wait functions - * on this particular request will finish */ - /* Now we have a problem if we are in a multi-threaded environment. We shou ld - * broadcast the condition on the request in order to allow the other threa ds - * to complete their test/wait functions. - */ - ompi_request_completed++; - if(ompi_request_waiting) { - opal_condition_broadcast(&ompi_request_cond); - } - return OMPI_SUCCESS; -} - -static void mca_pml_teg_recv_request_construct(mca_ptl_base_recv_request_t* request) -{ - request->req_recv.req_base.req_type = MCA_PML_REQUEST_RECV; - request->req_recv.req_base.req_ompi.req_fini = mca_pml_teg_recv_request_fini; - request->req_recv.req_base.req_ompi.req_free = mca_pml_teg_recv_request_free; - request->req_recv.req_base.req_ompi.req_cancel = mca_pml_teg_recv_request_cancel; -} - -static void mca_pml_teg_recv_request_destruct(mca_ptl_base_recv_request_t* request) -{ -} - -OBJ_CLASS_INSTANCE( - mca_pml_teg_recv_request_t, - mca_ptl_base_recv_request_t, - mca_pml_teg_recv_request_construct, - mca_pml_teg_recv_request_destruct); - - -/* - * Update the recv request status to reflect the number of bytes - * received and actually delivered to the application. - */ - -void mca_pml_teg_recv_request_progress( - struct mca_ptl_base_module_t* ptl, - mca_ptl_base_recv_request_t* req, - size_t bytes_received, - size_t bytes_delivered) -{ - OPAL_THREAD_LOCK(&ompi_request_lock); - req->req_bytes_received += bytes_received; - req->req_bytes_delivered += bytes_delivered; - if (req->req_bytes_received >= req->req_recv.req_bytes_packed) { - /* initialize request status */ - req->req_recv.req_base.req_ompi.req_status._count = req->req_bytes_delivered; - req->req_recv.req_base.req_pml_complete = true; - req->req_recv.req_base.req_ompi.req_complete = true; - ompi_request_completed++; - if(ompi_request_waiting) { - opal_condition_broadcast(&ompi_request_cond); - } - } - OPAL_THREAD_UNLOCK(&ompi_request_lock); -} - - - -/* - * This routine is used to match a posted receive when the source process - * is specified. -*/ - -void mca_pml_teg_recv_request_match_specific(mca_ptl_base_recv_request_t* request) -{ - ompi_communicator_t *comm = request->req_recv.req_base.req_comm; - mca_pml_ptl_comm_t* pml_comm = comm->c_pml_comm; - int req_peer = request->req_recv.req_base.req_peer; - mca_ptl_base_recv_frag_t* frag; - - /* check for a specific match */ - OPAL_THREAD_LOCK(&pml_comm->c_matching_lock); - - /* assign sequence number */ - request->req_recv.req_base.req_sequence = pml_comm->c_recv_seq++; - - if (opal_list_get_size(&pml_comm->c_unexpected_frags[req_peer]) > 0 && - (frag = mca_pml_teg_recv_request_match_specific_proc(request, req_peer)) != NULL) { - mca_ptl_base_module_t* ptl = frag->frag_base.frag_owner; - /* setup pointer to ptls peer */ - if(NULL == frag->frag_base.frag_peer) - frag->frag_base.frag_peer = mca_pml_teg_proc_lookup_remote_peer(comm,req_peer,ptl); - OPAL_THREAD_UNLOCK(&pml_comm->c_matching_lock); - if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) || - (MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) { - MCA_PML_TEG_RECV_MATCHED( ptl, frag ); - } - return; /* match found */ - } - - /* We didn't find any matches. Record this irecv so we can match - * it when the message comes in. - */ - if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE) { - opal_list_append(pml_comm->c_specific_receives+req_peer, (opal_list_item_t*)request); - } - OPAL_THREAD_UNLOCK(&pml_comm->c_matching_lock); -} - - -/* - * this routine is used to try and match a wild posted receive - where - * wild is determined by the value assigned to the source process -*/ - -void mca_pml_teg_recv_request_match_wild(mca_ptl_base_recv_request_t* request) -{ - ompi_communicator_t *comm = request->req_recv.req_base.req_comm; - mca_pml_ptl_comm_t* pml_comm = comm->c_pml_comm; - int proc_count = comm->c_remote_group->grp_proc_count; - int proc; - - /* - * Loop over all the outstanding messages to find one that matches. - * There is an outer loop over lists of messages from each - * process, then an inner loop over the messages from the - * process. - */ - OPAL_THREAD_LOCK(&pml_comm->c_matching_lock); - - /* assign sequence number */ - request->req_recv.req_base.req_sequence = pml_comm->c_recv_seq++; - - for (proc = 0; proc < proc_count; proc++) { - mca_ptl_base_recv_frag_t* frag; - - /* continue if no frags to match */ - if (opal_list_get_size(&pml_comm->c_unexpected_frags[proc]) == 0) - continue; - - /* loop over messages from the current proc */ - if ((frag = mca_pml_teg_recv_request_match_specific_proc(request, proc)) != NULL) { - mca_ptl_base_module_t* ptl = frag->frag_base.frag_owner; - /* if required - setup pointer to ptls peer */ - if(NULL == frag->frag_base.frag_peer) - frag->frag_base.frag_peer = mca_pml_teg_proc_lookup_remote_peer(comm,proc,ptl); - OPAL_THREAD_UNLOCK(&pml_comm->c_matching_lock); - if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) || - (MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) { - MCA_PML_TEG_RECV_MATCHED( ptl, frag ); - } - return; /* match found */ - } - } - - /* We didn't find any matches. Record this irecv so we can match to - * it when the message comes in. - */ - - if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE) - opal_list_append(&pml_comm->c_wild_receives, (opal_list_item_t*)request); - OPAL_THREAD_UNLOCK(&pml_comm->c_matching_lock); -} - - -/* - * this routine tries to match a posted receive. If a match is found, - * it places the request in the appropriate matched receive list. -*/ - -static mca_ptl_base_recv_frag_t* mca_pml_teg_recv_request_match_specific_proc( - mca_ptl_base_recv_request_t* request, int proc) -{ - mca_pml_ptl_comm_t *pml_comm = request->req_recv.req_base.req_comm->c_pml_comm; - opal_list_t* unexpected_frags = pml_comm->c_unexpected_frags+proc; - mca_ptl_base_recv_frag_t* frag; - mca_ptl_base_match_header_t* header; - int tag = request->req_recv.req_base.req_tag; - - if( OMPI_ANY_TAG == tag ) { - for (frag = (mca_ptl_base_recv_frag_t*)opal_list_get_first(unexpected_frags); - frag != (mca_ptl_base_recv_frag_t*)opal_list_get_end(unexpected_frags); - frag = (mca_ptl_base_recv_frag_t*)opal_list_get_next(frag)) { - header = &(frag->frag_base.frag_header.hdr_match); - - /* check first frag - we assume that process matching has been done already */ - if( header->hdr_tag >= 0 ) { - goto find_fragment; - } - } - } else { - for (frag = (mca_ptl_base_recv_frag_t*)opal_list_get_first(unexpected_frags); - frag != (mca_ptl_base_recv_frag_t*)opal_list_get_end(unexpected_frags); - frag = (mca_ptl_base_recv_frag_t*)opal_list_get_next(frag)) { - header = &(frag->frag_base.frag_header.hdr_match); - - /* check first frag - we assume that process matching has been done already */ - if ( tag == header->hdr_tag ) { - /* we assume that the tag is correct from MPI point of view (ie. >= 0 ) */ - goto find_fragment; - } - } - } - return NULL; - find_fragment: - request->req_recv.req_bytes_packed = header->hdr_msg_length; - request->req_recv.req_base.req_ompi.req_status.MPI_TAG = header->hdr_tag; - request->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = header->hdr_src; - - if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) || - (MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) { - opal_list_remove_item(unexpected_frags, (opal_list_item_t*)frag); - frag->frag_request = request; - } else { - /* it's a probe, therefore report it's completion */ - mca_pml_teg_recv_request_progress( NULL, request, header->hdr_msg_length, header->hdr_msg_length ); - } - return frag; -} - diff --git a/ompi/mca/pml/teg/pml_teg_recvreq.h b/ompi/mca/pml/teg/pml_teg_recvreq.h deleted file mode 100644 index c4159481a4..0000000000 --- a/ompi/mca/pml/teg/pml_teg_recvreq.h +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef OMPI_PML_TEG_RECV_REQUEST_H -#define OMPI_PML_TEG_RECV_REQUEST_H - -#include "pml_teg.h" -#include "pml_teg_proc.h" -#include "ompi/mca/ptl/base/ptl_base_recvreq.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif -typedef mca_ptl_base_recv_request_t mca_pml_teg_recv_request_t; - -OBJ_CLASS_DECLARATION(mca_pml_teg_recv_request_t); - - -/** - * Allocate a recv request from the modules free list. - * - * @param rc (OUT) OMPI_SUCCESS or error status on failure. - * @return Receive request. - */ -#define MCA_PML_TEG_RECV_REQUEST_ALLOC(recvreq, rc) \ - do { \ - opal_list_item_t* item; \ - OMPI_FREE_LIST_GET(&mca_pml_teg.teg_recv_requests, item, rc); \ - recvreq = (mca_ptl_base_recv_request_t*)item; \ - } while(0) - - -/** - * Initialize a recv request. - */ -#define MCA_PML_TEG_RECV_REQUEST_INIT( \ - request, \ - addr, \ - count, \ - datatype, \ - src, \ - tag, \ - comm, \ - persistent) \ -{ \ - MCA_PML_BASE_RECV_REQUEST_INIT( \ - (&(request)->req_recv), \ - addr, \ - count, \ - datatype, \ - src, \ - tag, \ - comm, \ - persistent \ - ); \ -} - - -/** - * Return a recv request to the modules free list. - * - * @param request (IN) Receive request. - */ -#define MCA_PML_TEG_RECV_REQUEST_RETURN(request) \ - do { \ - MCA_PML_BASE_RECV_REQUEST_FINI( &request->req_recv ); \ - OMPI_FREE_LIST_RETURN(&mca_pml_teg.teg_recv_requests, (opal_list_item_t*)request); \ - } while(0) - -/** - * Attempt to match the request against the unexpected fragment list - * for all source ranks w/in the communicator. - * - * @param request (IN) Request to match. - */ -void mca_pml_teg_recv_request_match_wild(mca_ptl_base_recv_request_t* request); - -/** - * Attempt to match the request against the unexpected fragment list - * for a specific source rank. - * - * @param request (IN) Request to match. - */ -void mca_pml_teg_recv_request_match_specific(mca_ptl_base_recv_request_t* request); - -/** - * Start an initialized request. - * - * @param request Receive request. - * @return OMPI_SUCESS or error status on failure. - */ -static inline int mca_pml_teg_recv_request_start(mca_ptl_base_recv_request_t* request) -{ - /* init/re-init the request */ - request->req_bytes_received = 0; - request->req_bytes_delivered = 0; - request->req_recv.req_base.req_pml_complete = false; - request->req_recv.req_base.req_ompi.req_complete = false; - request->req_recv.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; - /* always set the req_status.MPI_TAG to ANY_TAG before starting the request. This field - * is used on the cancel part in order to find out if the request has been matched or not. - */ - request->req_recv.req_base.req_ompi.req_status.MPI_TAG = OMPI_ANY_TAG; - request->req_recv.req_base.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; - request->req_recv.req_base.req_ompi.req_status._cancelled = 0; - - /* attempt to match posted recv */ - if(request->req_recv.req_base.req_peer == OMPI_ANY_SOURCE) { - mca_pml_teg_recv_request_match_wild(request); - } else { - mca_pml_teg_recv_request_match_specific(request); - } - return OMPI_SUCCESS; -} - -/** - * Update status of a recv request based on the completion status of - * the receive fragment. - * - * @param ptl (IN) The PTL pointer. - * @param request (IN) Receive request. - * @param bytes_received (IN) Bytes received from peer. - * @param bytes_delivered (IN) Bytes delivered to application. - */ -void mca_pml_teg_recv_request_progress( - struct mca_ptl_base_module_t* ptl, - mca_ptl_base_recv_request_t* request, - size_t bytes_received, - size_t bytes_delivered -); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif - diff --git a/ompi/mca/pml/teg/pml_teg_sendreq.c b/ompi/mca/pml/teg/pml_teg_sendreq.c deleted file mode 100644 index 0b9455d2d8..0000000000 --- a/ompi/mca/pml/teg/pml_teg_sendreq.c +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ - -#include "ompi_config.h" -#include "ompi/constants.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "pml_teg.h" -#include "pml_teg_proc.h" -#include "pml_teg_sendreq.h" -#include "pml_teg_recvreq.h" - - - -static int mca_pml_teg_send_request_fini(struct ompi_request_t** request) -{ - MCA_PML_TEG_FINI(request); - return OMPI_SUCCESS; -} - -static int mca_pml_teg_send_request_free(struct ompi_request_t** request) -{ - MCA_PML_TEG_FREE(request); - return OMPI_SUCCESS; -} - -static int mca_pml_teg_send_request_cancel(struct ompi_request_t* request, int complete) -{ - /* we dont cancel send requests by now */ - return OMPI_SUCCESS; -} - - -static void mca_pml_teg_send_request_construct(mca_pml_teg_send_request_t* req) -{ - req->req_cached = false; - req->req_send.req_base.req_ompi.req_fini = mca_pml_teg_send_request_fini; - req->req_send.req_base.req_ompi.req_free = mca_pml_teg_send_request_free; - req->req_send.req_base.req_ompi.req_cancel = mca_pml_teg_send_request_cancel; -} - - -static void mca_pml_teg_send_request_destruct(mca_pml_teg_send_request_t* req) -{ -} - - -OBJ_CLASS_INSTANCE( - mca_pml_teg_send_request_t, - mca_ptl_base_send_request_t, - mca_pml_teg_send_request_construct, - mca_pml_teg_send_request_destruct); - - -/** - * Schedule message delivery across potentially multiple PTLs. - * - * @param request (IN) Request to schedule - * @return status Error status - * - */ - - -int mca_pml_teg_send_request_schedule(mca_ptl_base_send_request_t* req) -{ - ompi_proc_t *proc; - mca_pml_teg_proc_t* proc_pml; - int send_count = 0; - size_t bytes_remaining; - size_t num_ptl_avail; - size_t num_ptl; - - /* - * Only allow one thread in this routine for a given request. - * However, we cannot block callers on a mutex, so simply keep track - * of the number of times the routine has been called and run through - * the scheduling logic once for every call. - */ - if(OPAL_THREAD_ADD32(&req->req_lock,1) == 1) { - proc = ompi_comm_peer_lookup(req->req_send.req_base.req_comm, req->req_send.req_base.req_peer); - proc_pml = (mca_pml_teg_proc_t*) proc->proc_pml; - do { - /* allocate remaining bytes to PTLs */ - bytes_remaining = req->req_send.req_bytes_packed - req->req_offset; - num_ptl_avail = proc_pml->proc_ptl_next.ptl_size; - num_ptl = 0; - while(bytes_remaining > 0 && num_ptl++ < num_ptl_avail) { - mca_ptl_proc_t* ptl_proc = mca_ptl_array_get_next(&proc_pml->proc_ptl_next); - mca_ptl_base_module_t* ptl = ptl_proc->ptl; - int rc; - - /* if this is the last PTL that is available to use, or the number of - * bytes remaining in the message is less than the PTLs minimum fragment - * size, then go ahead and give the rest of the message to this PTL. - */ - size_t bytes_to_frag; - if(num_ptl == num_ptl_avail || bytes_remaining < ptl->ptl_min_frag_size) { - bytes_to_frag = bytes_remaining; - - /* otherwise attempt to give the PTL a percentage of the message - * based on a weighting factor. for simplicity calculate this as - * a percentage of the overall message length (regardless of amount - * previously assigned) - */ - } else { - bytes_to_frag = (ptl_proc->ptl_weight * bytes_remaining) / 100; - } - - /* makes sure that we don't exceed ptl_max_frag_size */ - if(ptl->ptl_max_frag_size != 0 && bytes_to_frag > ptl->ptl_max_frag_size) - bytes_to_frag = ptl->ptl_max_frag_size; - - rc = ptl->ptl_put(ptl, ptl_proc->ptl_peer, req, req->req_offset, bytes_to_frag, 0); - if(rc == OMPI_SUCCESS) { - send_count++; - bytes_remaining = req->req_send.req_bytes_packed - req->req_offset; - } - } - - /* unable to complete send - queue for later */ - if(send_count == 0) { - OPAL_THREAD_LOCK(&mca_pml_teg.teg_lock); - opal_list_append(&mca_pml_teg.teg_send_pending, (opal_list_item_t*)req); - OPAL_THREAD_UNLOCK(&mca_pml_teg.teg_lock); - req->req_lock = 0; - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* fragments completed while scheduling - so retry */ - } while(OPAL_THREAD_ADD32(&req->req_lock,-1) > 0); - - /* free the request if completed while in the scheduler */ - if (req->req_send.req_base.req_free_called && req->req_send.req_base.req_pml_complete) { - MCA_PML_TEG_FREE((ompi_request_t**)&req); - } - } - return OMPI_SUCCESS; -} - - -/** - * Update the status of the send request to reflect the number of bytes - * "actually" sent (and acknowledged). This should be called by the - * lower layer PTL after the fragment is actually delivered and has been - * acknowledged (if required). Note that this routine should NOT be called - * directly by the PTL, a function pointer is setup on the PTL at init to - * enable upcalls into the PML w/out directly linking to a specific PML - * implementation. - */ - -void mca_pml_teg_send_request_progress( - struct mca_ptl_base_module_t* ptl, - mca_ptl_base_send_request_t* req, - size_t bytes_sent) -{ - bool schedule = false; - - OPAL_THREAD_LOCK(&ompi_request_lock); - req->req_bytes_sent += bytes_sent; - if (req->req_bytes_sent >= req->req_send.req_bytes_packed) { - req->req_send.req_base.req_pml_complete = true; - if (req->req_send.req_base.req_ompi.req_complete == false) { - req->req_send.req_base.req_ompi.req_status.MPI_SOURCE = req->req_send.req_base.req_comm->c_my_rank; - req->req_send.req_base.req_ompi.req_status.MPI_TAG = req->req_send.req_base.req_tag; - req->req_send.req_base.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; - req->req_send.req_base.req_ompi.req_status._count = req->req_bytes_sent; - req->req_send.req_base.req_ompi.req_complete = true; - ompi_request_completed++; - if(ompi_request_waiting) { - opal_condition_broadcast(&ompi_request_cond); - } - } else if(req->req_send.req_base.req_free_called) { - /* don't free the request if in the scheduler */ - if(req->req_lock == 0) { - MCA_PML_TEG_FREE((ompi_request_t**)&req); - } - } else if (req->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { - mca_pml_base_bsend_request_fini((ompi_request_t*)req); - } - /* test to see if we have scheduled the entire request */ - } else if (req->req_offset < req->req_send.req_bytes_packed) { - schedule = true; - } - OPAL_THREAD_UNLOCK(&ompi_request_lock); - - /* schedule remaining fragments of this request */ - if(schedule) { - mca_pml_teg_send_request_schedule(req); - } - - /* check for pending requests that need to be progressed */ - while(opal_list_get_size(&mca_pml_teg.teg_send_pending) != 0) { - OPAL_THREAD_LOCK(&mca_pml_teg.teg_lock); - req = (mca_ptl_base_send_request_t*)opal_list_remove_first(&mca_pml_teg.teg_send_pending); - OPAL_THREAD_UNLOCK(&mca_pml_teg.teg_lock); - if(req == NULL) - break; - if(mca_pml_teg_send_request_schedule(req) != OMPI_SUCCESS) - break; - } -} - diff --git a/ompi/mca/pml/teg/pml_teg_sendreq.h b/ompi/mca/pml/teg/pml_teg_sendreq.h deleted file mode 100644 index 41d4a9d914..0000000000 --- a/ompi/mca/pml/teg/pml_teg_sendreq.h +++ /dev/null @@ -1,215 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef OMPI_PML_TEG_SEND_REQUEST_H -#define OMPI_PML_TEG_SEND_REQUEST_H - -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/ptl/base/ptl_base_sendreq.h" -#include "ompi/mca/ptl/base/ptl_base_sendfrag.h" -#include "ompi/mca/ptl/base/ptl_base_comm.h" -#include "pml_teg_proc.h" -#include "pml_teg_ptl.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - - -typedef mca_ptl_base_send_request_t mca_pml_teg_send_request_t; -OBJ_CLASS_DECLARATION(mca_pml_teg_send_request_t); - - -#define MCA_PML_TEG_SEND_REQUEST_ALLOC( \ - comm, \ - dst, \ - sendreq, \ - rc) \ -{ \ - mca_pml_teg_proc_t *proc = \ - (mca_pml_teg_proc_t*) mca_pml_teg_proc_lookup_remote(comm,dst); \ - mca_ptl_proc_t* ptl_proc; \ - mca_pml_base_ptl_t* ptl_base; \ - \ - if(NULL == proc) { \ - return OMPI_ERR_OUT_OF_RESOURCE; \ - } \ - OPAL_THREAD_SCOPED_LOCK(&proc->base.proc_lock, \ - (ptl_proc = mca_ptl_array_get_next(&proc->proc_ptl_first))); \ - ptl_base = ptl_proc->ptl_base; \ - /* \ - * check to see if there is a cache of send requests associated with \ - * this ptl - if so try the allocation from there. \ - */ \ - if(NULL != ptl_base) { \ - OPAL_THREAD_LOCK(&ptl_base->ptl_cache_lock); \ - sendreq = (mca_pml_teg_send_request_t*) \ - opal_list_remove_first(&ptl_base->ptl_cache); \ - if(NULL != sendreq) { \ - OPAL_THREAD_UNLOCK(&ptl_base->ptl_cache_lock); \ - rc = OMPI_SUCCESS; \ - } else if (ptl_base->ptl_cache_alloc < ptl_base->ptl_cache_size) { \ - /* \ - * allocate an additional request to the cache \ - */ \ - mca_ptl_base_module_t* ptl = ptl_base->ptl; \ - opal_list_item_t* item; \ - OMPI_FREE_LIST_WAIT(&mca_pml_teg.teg_send_requests, item, rc); \ - sendreq = (mca_pml_teg_send_request_t*)item; \ - sendreq->req_ptl = ptl; \ - if(ptl->ptl_request_init(ptl, sendreq) == OMPI_SUCCESS) { \ - sendreq->req_cached = true; \ - ptl_base->ptl_cache_alloc++; \ - } \ - OPAL_THREAD_UNLOCK(&ptl_base->ptl_cache_lock); \ - } else { \ - /* \ - * take a request from the global pool \ - */ \ - opal_list_item_t* item; \ - OPAL_THREAD_UNLOCK(&ptl_base->ptl_cache_lock); \ - OMPI_FREE_LIST_WAIT(&mca_pml_teg.teg_send_requests, item, rc); \ - sendreq = (mca_pml_teg_send_request_t*)item; \ - sendreq->req_ptl = ptl_proc->ptl; \ - } \ - \ - /* otherwise - take the allocation from the global list */ \ - } else { \ - opal_list_item_t* item; \ - OMPI_FREE_LIST_WAIT(&mca_pml_teg.teg_send_requests, item, rc); \ - sendreq = (mca_pml_teg_send_request_t*)item; \ - sendreq->req_ptl = ptl_proc->ptl; \ - } \ - /* update request to point to current peer */ \ - sendreq->req_peer = ptl_proc->ptl_peer; \ - sendreq->req_send.req_base.req_proc = proc->base.proc_ompi; \ -} - - -#define MCA_PML_TEG_SEND_REQUEST_INIT( request, \ - addr, \ - count, \ - datatype, \ - peer, \ - tag, \ - comm, \ - mode, \ - persistent) \ -{ \ - MCA_PML_BASE_SEND_REQUEST_INIT((&request->req_send), \ - addr, \ - count, \ - datatype, \ - peer, \ - tag, \ - comm, \ - mode, \ - persistent \ - ); \ -} - - -#define MCA_PML_TEG_SEND_REQUEST_RETURN(sendreq) \ -{ \ - mca_ptl_base_module_t* ptl = (sendreq)->req_ptl; \ - mca_pml_base_ptl_t* ptl_base = ptl->ptl_base; \ - \ - /* Let the base handle the reference counts */ \ - MCA_PML_BASE_SEND_REQUEST_FINI((&sendreq->req_send)); \ - \ - /* \ - * If there is a cache associated with the ptl - first attempt \ - * to return the send descriptor to the cache. \ - */ \ - if(NULL != ptl->ptl_base && (sendreq)->req_cached) { \ - OPAL_THREAD_LOCK(&ptl_base->ptl_cache_lock); \ - opal_list_prepend(&ptl_base->ptl_cache, \ - (opal_list_item_t*)sendreq); \ - OPAL_THREAD_UNLOCK(&ptl_base->ptl_cache_lock); \ - } else { \ - OMPI_FREE_LIST_RETURN( \ - &mca_pml_teg.teg_send_requests, (opal_list_item_t*)sendreq); \ - } \ -} - - -/** - * Start a send request. - */ -#define MCA_PML_TEG_SEND_REQUEST_START(req, rc) \ -{ \ - mca_ptl_base_module_t* ptl = req->req_ptl; \ - size_t first_fragment_size = ptl->ptl_first_frag_size; \ - int flags; \ - \ - req->req_lock = 0; \ - req->req_bytes_sent = 0; \ - req->req_peer_match.lval = 0; \ - req->req_peer_addr.lval = 0; \ - req->req_peer_size = 0; \ - req->req_offset = 0; \ - req->req_send.req_base.req_pml_complete = false; \ - req->req_send.req_base.req_ompi.req_complete = false; \ - req->req_send.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \ - req->req_send.req_base.req_sequence = mca_pml_ptl_comm_send_sequence( \ - req->req_send.req_base.req_comm->c_pml_comm, req->req_send.req_base.req_peer); \ - \ - /* handle buffered send */ \ - if(req->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { \ - mca_pml_base_bsend_request_start(&req->req_send.req_base.req_ompi); \ - } \ - \ - /* start the first fragment */ \ - if (first_fragment_size == 0 || \ - req->req_send.req_bytes_packed <= first_fragment_size) { \ - first_fragment_size = req->req_send.req_bytes_packed; \ - flags = (req->req_send.req_send_mode == MCA_PML_BASE_SEND_SYNCHRONOUS) ? \ - MCA_PTL_FLAGS_ACK : 0; \ - } else { \ - /* require match for first fragment of a multi-fragment */ \ - flags = MCA_PTL_FLAGS_ACK; \ - } \ - rc = ptl->ptl_send(ptl, req->req_peer, req, 0, first_fragment_size, \ - flags); \ -} - - -/** - * Schedule any data that was not delivered in the first fragment - * across the available PTLs. - */ -int mca_pml_teg_send_request_schedule(mca_ptl_base_send_request_t* req); - - -/** - * Update the request to reflect the number of bytes delivered. If this - * was the first fragment - schedule the rest of the data. - */ -void mca_pml_teg_send_request_progress( - struct mca_ptl_base_module_t* ptl, - mca_ptl_base_send_request_t* send_request, - size_t bytes_sent -); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif - diff --git a/ompi/mca/pml/teg/pml_teg_start.c b/ompi/mca/pml/teg/pml_teg_start.c deleted file mode 100644 index 70eb908bdf..0000000000 --- a/ompi/mca/pml/teg/pml_teg_start.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_teg.h" -#include "pml_teg_recvreq.h" -#include "pml_teg_sendreq.h" - - -int mca_pml_teg_start(size_t count, ompi_request_t** requests) -{ - int rc; - size_t i; - for(i=0; ireq_ompi.req_state) { - case OMPI_REQUEST_INACTIVE: - if(pml_request->req_pml_complete == true) - break; - /* otherwise fall through */ - case OMPI_REQUEST_ACTIVE: { - - ompi_request_t *request; - OPAL_THREAD_LOCK(&ompi_request_lock); - if (pml_request->req_pml_complete == false) { - /* free request after it completes */ - pml_request->req_free_called = true; - } else { - /* can reuse the existing request */ - OPAL_THREAD_UNLOCK(&ompi_request_lock); - break; - } - - /* allocate a new request */ - switch(pml_request->req_type) { - case MCA_PML_REQUEST_SEND: { - mca_pml_base_send_mode_t sendmode = - ((mca_pml_base_send_request_t*)pml_request)->req_send_mode; - rc = mca_pml_teg_isend_init( - pml_request->req_addr, - pml_request->req_count, - pml_request->req_datatype, - pml_request->req_peer, - pml_request->req_tag, - sendmode, - pml_request->req_comm, - &request); - break; - } - case MCA_PML_REQUEST_RECV: - rc = mca_pml_teg_irecv_init( - pml_request->req_addr, - pml_request->req_count, - pml_request->req_datatype, - pml_request->req_peer, - pml_request->req_tag, - pml_request->req_comm, - &request); - break; - default: - rc = OMPI_ERR_REQUEST; - break; - } - OPAL_THREAD_UNLOCK(&ompi_request_lock); - if(OMPI_SUCCESS != rc) - return rc; - pml_request = (mca_pml_base_request_t*)request; - requests[i] = request; - break; - } - default: - return OMPI_ERR_REQUEST; - } - - /* start the request */ - switch(pml_request->req_type) { - case MCA_PML_REQUEST_SEND: - { - mca_ptl_base_send_request_t* sendreq = (mca_ptl_base_send_request_t*)pml_request; - MCA_PML_TEG_SEND_REQUEST_START(sendreq, rc); - if(rc != OMPI_SUCCESS) - return rc; - break; - } - case MCA_PML_REQUEST_RECV: - { - mca_ptl_base_recv_request_t* recvreq = (mca_ptl_base_recv_request_t*)pml_request; - if((rc = mca_pml_teg_recv_request_start(recvreq)) != OMPI_SUCCESS) - return rc; - break; - } - default: - return OMPI_ERR_REQUEST; - } - } - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/pml/teg/post_configure.sh b/ompi/mca/pml/teg/post_configure.sh deleted file mode 100644 index 5e6a97e999..0000000000 --- a/ompi/mca/pml/teg/post_configure.sh +++ /dev/null @@ -1 +0,0 @@ -DIRECT_CALL_HEADER="mca/pml/teg/pml_teg.h" diff --git a/ompi/mca/pml/uniq/Makefile.am b/ompi/mca/pml/uniq/Makefile.am deleted file mode 100644 index f8926fad33..0000000000 --- a/ompi/mca/pml/uniq/Makefile.am +++ /dev/null @@ -1,54 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Use the top-level Makefile.options - - - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if OMPI_BUILD_pml_uniq_DSO -component_noinst = -component_install = mca_pml_uniq.la -else -component_noinst = libmca_pml_uniq.la -component_install = -endif - -mcacomponentdir = $(libdir)/openmpi -mcacomponent_LTLIBRARIES = $(component_install) -mca_pml_uniq_la_SOURCES = $(pml_uniq_la_sources) -mca_pml_uniq_la_LIBADD = \ - $(top_ompi_builddir)/ompi/libmpi.la \ - $(top_ompi_builddir)/orte/liborte.la \ - $(top_ompi_builddir)/opal/libopal.la -mca_pml_uniq_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_pml_uniq_la_SOURCES = $(pml_uniq_la_sources) -libmca_pml_uniq_la_LIBADD = -libmca_pml_uniq_la_LDFLAGS = -module -avoid-version - -pml_uniq_la_sources = pml_uniq.c pml_uniq.h pml_uniq_cancel.c pml_uniq_component.c \ - pml_uniq_component.h pml_uniq_iprobe.c pml_uniq_irecv.c pml_uniq_isend.c \ - pml_uniq_ptl.c pml_uniq_ptl.h pml_uniq_proc.c pml_uniq_proc.h pml_uniq_progress.c \ - pml_uniq_recvfrag.c pml_uniq_recvfrag.h pml_uniq_recvreq.c pml_uniq_recvreq.h \ - pml_uniq_sendreq.c pml_uniq_sendreq.h pml_uniq_start.c - diff --git a/ompi/mca/pml/uniq/configure.params b/ompi/mca/pml/uniq/configure.params deleted file mode 100644 index a433235ee7..0000000000 --- a/ompi/mca/pml/uniq/configure.params +++ /dev/null @@ -1,24 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Specific to this module - -PARAM_INIT_FILE=pml_uniq.c -PARAM_CONFIG_HEADER_FILE="uniq_config.h" -PARAM_CONFIG_FILES="Makefile" diff --git a/ompi/mca/pml/uniq/pml_uniq.c b/ompi/mca/pml/uniq/pml_uniq.c deleted file mode 100644 index e31b9e6635..0000000000 --- a/ompi/mca/pml/uniq/pml_uniq.c +++ /dev/null @@ -1,386 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#include - -#include "ompi/class/ompi_bitmap.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/ptl/base/base.h" -#include "ompi/mca/ptl/base/ptl_base_comm.h" -#include "ompi/mca/ptl/base/ptl_base_header.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" -#include "ompi/mca/ptl/base/ptl_base_sendfrag.h" -#include "pml_uniq.h" -#include "pml_uniq_component.h" -#include "pml_uniq_proc.h" -#include "pml_uniq_ptl.h" -#include "pml_uniq_recvreq.h" -#include "pml_uniq_sendreq.h" -#include "pml_uniq_recvfrag.h" - - -mca_pml_uniq_t mca_pml_uniq = { - { - mca_pml_uniq_add_procs, - mca_pml_uniq_del_procs, - mca_pml_uniq_enable, - mca_pml_uniq_progress, - mca_pml_uniq_add_comm, - mca_pml_uniq_del_comm, - mca_pml_uniq_irecv_init, - mca_pml_uniq_irecv, - mca_pml_uniq_recv, - mca_pml_uniq_isend_init, - mca_pml_uniq_isend, - mca_pml_uniq_send, - mca_pml_uniq_iprobe, - mca_pml_uniq_probe, - mca_pml_uniq_start, - 32768, - (0x7fffffff) /* XXX should be INT_MAX, as in ob1 */ - } -}; - - -int mca_pml_uniq_add_comm(ompi_communicator_t* comm) -{ - /* allocate pml specific comm data */ - mca_pml_ptl_comm_t* pml_comm = OBJ_NEW(mca_pml_ptl_comm_t); - if (NULL == pml_comm) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - mca_pml_ptl_comm_init_size(pml_comm, comm->c_remote_group->grp_proc_count); - comm->c_pml_comm = pml_comm; - return OMPI_SUCCESS; -} - -int mca_pml_uniq_del_comm(ompi_communicator_t* comm) -{ - OBJ_RELEASE(comm->c_pml_comm); - comm->c_pml_comm = NULL; /* make sure it's set to NULL */ - return OMPI_SUCCESS; -} - -static int ptl_exclusivity_compare(const void* arg1, const void* arg2) -{ - mca_ptl_base_module_t* ptl1 = *(struct mca_ptl_base_module_t**)arg1; - mca_ptl_base_module_t* ptl2 = *(struct mca_ptl_base_module_t**)arg2; - if( ptl1->ptl_exclusivity > ptl2->ptl_exclusivity ) { - return -1; - } else if (ptl1->ptl_exclusivity == ptl2->ptl_exclusivity ) { - return 0; - } else { - return 1; - } -} - - -static int mca_pml_uniq_add_ptls(void) -{ - /* build an array of ptls and ptl modules */ - mca_ptl_base_selected_module_t* selected_ptl; - size_t num_ptls = opal_list_get_size(&mca_ptl_base_modules_initialized); - size_t cache_bytes = 0; - - mca_pml_uniq.uniq_num_ptl_modules = 0; - mca_pml_uniq.uniq_num_ptl_progress = 0; - mca_pml_uniq.uniq_num_ptl_components = 0; - mca_pml_uniq.uniq_ptl_modules = (mca_ptl_base_module_t **)malloc(sizeof(mca_ptl_base_module_t*) * num_ptls); - mca_pml_uniq.uniq_ptl_progress = (mca_ptl_base_component_progress_fn_t*)malloc(sizeof(mca_ptl_base_component_progress_fn_t) * num_ptls); - mca_pml_uniq.uniq_ptl_components = (mca_ptl_base_component_t **)malloc(sizeof(mca_ptl_base_component_t*) * num_ptls); - if (NULL == mca_pml_uniq.uniq_ptl_modules || - NULL == mca_pml_uniq.uniq_ptl_progress || - NULL == mca_pml_uniq.uniq_ptl_components) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - for(selected_ptl = (mca_ptl_base_selected_module_t*) - opal_list_get_first(&mca_ptl_base_modules_initialized); - selected_ptl != (mca_ptl_base_selected_module_t*) - opal_list_get_end(&mca_ptl_base_modules_initialized); - selected_ptl = (mca_ptl_base_selected_module_t*)opal_list_get_next(selected_ptl)) { - mca_ptl_base_module_t *ptl = selected_ptl->pbsm_module; - size_t i; - - mca_pml_uniq.uniq_ptl_modules[mca_pml_uniq.uniq_num_ptl_modules++] = ptl; - for(i=0; i < mca_pml_uniq.uniq_num_ptl_components; i++) { - if(mca_pml_uniq.uniq_ptl_components[i] == ptl->ptl_component) { - break; - } - } - if(i == mca_pml_uniq.uniq_num_ptl_components) { - mca_pml_uniq.uniq_ptl_components[mca_pml_uniq.uniq_num_ptl_components++] = ptl->ptl_component; - } - - /* - *setup ptl - */ - - /* set pointer to fragment matching logic routine, if this - * not already set by the ptl - */ - if(NULL == ptl->ptl_match) { - ptl->ptl_match = mca_pml_uniq_recv_frag_match; - } - ptl->ptl_send_progress = mca_pml_uniq_send_request_progress; - ptl->ptl_recv_progress = mca_pml_uniq_recv_request_progress; - ptl->ptl_stack = ptl; - ptl->ptl_base = NULL; - - /* find maximum required size for cache */ - if(ptl->ptl_cache_bytes > cache_bytes) { - cache_bytes = ptl->ptl_cache_bytes; - } - } - - /* setup send fragments based on largest required send request */ - ompi_free_list_init( &mca_pml_uniq.uniq_send_requests, - sizeof(mca_pml_uniq_send_request_t) + cache_bytes, - OBJ_CLASS(mca_pml_uniq_send_request_t), - mca_pml_uniq.uniq_free_list_num, - mca_pml_uniq.uniq_free_list_max, - mca_pml_uniq.uniq_free_list_inc, - NULL ); - - /* sort ptl list by exclusivity */ - qsort(mca_pml_uniq.uniq_ptl_modules, mca_pml_uniq.uniq_num_ptl_modules, sizeof(struct mca_ptl_t*), ptl_exclusivity_compare); - return OMPI_SUCCESS; -} - -/* - * Called by the base PML in order to notify the PMLs about their selected status. After the init pass, - * the base module will choose one PML (depending on informations provided by the init function) and then - * it will call the pml_enable function with true (for the selected one) and with false for all the - * others. The selected one can then pass control information through to all PTL modules. - */ - -int mca_pml_uniq_enable(bool enable) -{ - size_t i; - int value = enable; - - /* If I'm not selected then prepare for close */ - if( false == enable ) return OMPI_SUCCESS; - - /* recv requests */ - ompi_free_list_init( &mca_pml_uniq.uniq_recv_requests, - sizeof(mca_pml_uniq_recv_request_t), - OBJ_CLASS(mca_pml_uniq_recv_request_t), - mca_pml_uniq.uniq_free_list_num, - mca_pml_uniq.uniq_free_list_max, - mca_pml_uniq.uniq_free_list_inc, - NULL ); - - /* Grab all the PTLs and prepare them */ - mca_pml_uniq_add_ptls(); - - /* and now notify them about the status */ - for(i=0; i < mca_pml_uniq.uniq_num_ptl_components; i++) { - if(NULL != mca_pml_uniq.uniq_ptl_components[i]->ptlm_control) { - int rc = mca_pml_uniq.uniq_ptl_components[i]->ptlm_control(MCA_PTL_ENABLE,&value,sizeof(value)); - if(rc != OMPI_SUCCESS) - return rc; - } - } - return OMPI_SUCCESS; -} - -/* - * For each proc setup a datastructure that indicates the PTLs - * that can be used to reach the destination. - * - */ - -int mca_pml_uniq_add_procs(ompi_proc_t** procs, size_t nprocs) -{ - size_t p; - ompi_bitmap_t reachable; - struct mca_ptl_base_peer_t** ptl_peers = NULL; - int rc; - size_t p_index; - - if(nprocs == 0) - return OMPI_SUCCESS; - - OBJ_CONSTRUCT(&reachable, ompi_bitmap_t); - rc = ompi_bitmap_init(&reachable, nprocs); - if(OMPI_SUCCESS != rc) - return rc; - - /* attempt to add all procs to each ptl */ - ptl_peers = (struct mca_ptl_base_peer_t **)malloc(nprocs * sizeof(struct mca_ptl_base_peer_t*)); - for(p_index = 0; p_index < mca_pml_uniq.uniq_num_ptl_modules; p_index++) { - mca_ptl_base_module_t* ptl = mca_pml_uniq.uniq_ptl_modules[p_index]; - int ptl_inuse = 0; - - /* if the ptl can reach the destination proc it sets the - * corresponding bit (proc index) in the reachable bitmap - * and can return addressing information for each proc - * that is passed back to the ptl on data transfer calls - */ - ompi_bitmap_clear_all_bits(&reachable); - memset(ptl_peers, 0, nprocs * sizeof(struct mca_ptl_base_peer_t*)); - rc = ptl->ptl_add_procs(ptl, nprocs, procs, ptl_peers, &reachable); - if(OMPI_SUCCESS != rc) { - free(ptl_peers); - return rc; - } - - /* for each proc that is reachable - add the ptl to the procs array(s) */ - for(p=0; p < nprocs; p++) { - ompi_proc_t *proc; - mca_pml_uniq_proc_t* proc_pml; - - if( !ompi_bitmap_is_set_bit(&reachable, p) ) continue; - - proc = procs[p]; - proc_pml = (mca_pml_uniq_proc_t*) proc->proc_pml; - - /* this ptl can be used */ - ptl_inuse++; - - /* initialize each proc */ - if(NULL == proc_pml) { - - /* allocate pml specific proc data */ - proc_pml = OBJ_NEW(mca_pml_uniq_proc_t); - if (NULL == proc_pml) { - opal_output(0, "mca_pml_uniq_add_procs: unable to allocate resources"); - free(ptl_peers); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - proc_pml->base.proc_ompi = proc; - proc->proc_pml = (mca_pml_proc_t*) proc_pml; - /* it's the first PTL so add it to both first and next */ - proc_pml->proc_ptl_flags |= ptl->ptl_flags; - if (NULL == ptl->ptl_base && - ptl->ptl_cache_bytes > 0 && - NULL != ptl->ptl_request_init && - NULL != ptl->ptl_request_fini) { - - mca_pml_uniq_ptl_t* ptl_base = OBJ_NEW(mca_pml_uniq_ptl_t); - ptl_base->ptl = ptl; - ptl_base->ptl_cache_size = ptl->ptl_cache_size; - ptl->ptl_base = (struct mca_pml_base_ptl_t*)ptl_base; - } - proc_pml->proc_ptl_first.ptl_base = (mca_pml_base_ptl_t*)ptl->ptl_base; - proc_pml->proc_ptl_first.ptl_peer = ptl_peers[p]; - proc_pml->proc_ptl_first.ptl = ptl; -#if PML_UNIQ_ACCEPT_NEXT_PTL - proc_pml->proc_ptl_next.ptl_base = (mca_pml_base_ptl_t*)ptl->ptl_base; - proc_pml->proc_ptl_next.ptl_peer = ptl_peers[p]; - proc_pml->proc_ptl_next.ptl = ptl; -#endif /* PML_UNIQ_ACCEPT_NEXT_PTL */ - } else { - /* choose the best for first and next. For the first look at the latency when - * for the next at the maximum bandwidth. - */ - opal_output( 0, "Not yet done dude !!!" ); -#if PML_UNIQ_ACCEPT_NEXT_PTL -#endif /* PML_UNIQ_ACCEPT_NEXT_PTL */ - } - - /* dont allow an additional PTL with a lower exclusivity ranking */ - if( NULL != proc_pml->proc_ptl_first.ptl ) { - /* skip this ptl if the exclusivity is less than the previous */ - if( proc_pml->proc_ptl_first.ptl->ptl_exclusivity > ptl->ptl_exclusivity ) { - if(ptl_peers[p] != NULL) { - ptl->ptl_del_procs(ptl, 1, &proc, &ptl_peers[p]); - } - continue; - } - } - proc_pml->proc_ptl_flags |= ptl->ptl_flags; - } - - if(ptl_inuse > 0 && NULL != ptl->ptl_component->ptlm_progress) { - size_t p; - bool found = false; - for(p=0; p < mca_pml_uniq.uniq_num_ptl_progress; p++) { - if(mca_pml_uniq.uniq_ptl_progress[p] == ptl->ptl_component->ptlm_progress) { - found = true; - break; - } - } - if(found == false) { - mca_pml_uniq.uniq_ptl_progress[mca_pml_uniq.uniq_num_ptl_progress] = - ptl->ptl_component->ptlm_progress; - mca_pml_uniq.uniq_num_ptl_progress++; - } - } - } - free(ptl_peers); - - return OMPI_SUCCESS; -} - -/* - * iterate through each proc and notify any PTLs associated - * with the proc that it is/has gone away - */ - -int mca_pml_uniq_del_procs(ompi_proc_t** procs, size_t nprocs) -{ - size_t p; - int rc; - for(p = 0; p < nprocs; p++) { - ompi_proc_t *proc = procs[p]; - mca_pml_uniq_proc_t* proc_pml = (mca_pml_uniq_proc_t*) proc->proc_pml; - mca_ptl_proc_t* ptl_proc; - mca_ptl_base_module_t* ptl; - - /* If the PTL used for the first fragment and the one use for the others is not - * the same then we have to remove the processor from both of them. - */ - - ptl_proc = &(proc_pml->proc_ptl_first); - ptl = ptl_proc->ptl; - rc = ptl->ptl_del_procs( ptl, 1, &proc, &ptl_proc->ptl_peer ); - if( OMPI_SUCCESS != rc ) { - return rc; - } -#if PML_UNIQ_ACCEPT_NEXT_PTL - if( proc_pml->proc_ptl_first.ptl != proc_pml->proc_ptl_next.ptl ) { - ptl_proc = &(proc_pml->proc_ptl_next); - ptl = ptl_proc->ptl; - rc = ptl->ptl_del_procs( ptl, 1, &proc, &ptl_proc->ptl_peer ); - if( OMPI_SUCCESS != rc ) { - return rc; - } - } -#endif /* PML_UNIQ_ACCEPT_NEXT_PTL */ - - /* do any required cleanup */ - OBJ_RELEASE(proc_pml); - proc->proc_pml = NULL; - } - return OMPI_SUCCESS; -} - -int mca_pml_uniq_component_fini(void) -{ - /* FIX */ - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/pml/uniq/pml_uniq.h b/ompi/mca/pml/uniq/pml_uniq.h deleted file mode 100644 index e633c24655..0000000000 --- a/ompi/mca/pml/uniq/pml_uniq.h +++ /dev/null @@ -1,258 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_UNIQ_H -#define MCA_PML_UNIQ_H - -#include "opal/threads/threads.h" -#include "opal/threads/condition.h" -#include "ompi/class/ompi_free_list.h" -#include "opal/util/cmd_line.h" -#include "ompi/request/request.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/pml_base_request.h" -#include "ompi/mca/pml/base/pml_base_bsend.h" -#include "ompi/mca/pml/base/pml_base_sendreq.h" -#include "ompi/mca/ptl/ptl.h" - - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif -/** - * UNIQ PML module - */ - -struct mca_pml_uniq_t { - mca_pml_base_module_t super; - - mca_ptl_base_component_t **uniq_ptl_components; - size_t uniq_num_ptl_components; - - mca_ptl_base_module_t** uniq_ptl_modules; - size_t uniq_num_ptl_modules; - - mca_ptl_base_component_progress_fn_t* uniq_ptl_progress; - size_t uniq_num_ptl_progress; - - opal_list_t uniq_procs; - opal_mutex_t uniq_lock; - - int uniq_free_list_num; /* initial size of free list */ - int uniq_free_list_max; /* maximum size of free list */ - int uniq_free_list_inc; /* number of elements to grow free list */ - int uniq_poll_iterations; /* number of iterations to poll for completion */ - int uniq_priority; /* the PML priority */ - - /* free list of requests */ - ompi_free_list_t uniq_send_requests; - ompi_free_list_t uniq_recv_requests; - - /* list of pending send requests */ - opal_list_t uniq_send_pending; -}; -typedef struct mca_pml_uniq_t mca_pml_uniq_t; - -extern mca_pml_uniq_t mca_pml_uniq; - - -/* - * PML module functions. - */ - - -extern int mca_pml_uniq_component_open(void); -extern int mca_pml_uniq_component_close(void); - -extern mca_pml_base_module_t* mca_pml_uniq_component_init( - int *priority, - bool enable_progress_threads, - bool enable_mpi_threads -); - -extern int mca_pml_uniq_component_fini(void); - - - -/* - * PML interface functions. - */ - -extern int mca_pml_uniq_add_comm( - struct ompi_communicator_t* comm -); - -extern int mca_pml_uniq_del_comm( - struct ompi_communicator_t* comm -); - -extern int mca_pml_uniq_add_procs( - struct ompi_proc_t **procs, - size_t nprocs -); - -extern int mca_pml_uniq_del_procs( - struct ompi_proc_t **procs, - size_t nprocs -); - -extern int mca_pml_uniq_enable( - bool enable -); - -extern int mca_pml_uniq_progress(void); - -extern int mca_pml_uniq_iprobe( - int dst, - int tag, - struct ompi_communicator_t* comm, - int *matched, - ompi_status_public_t* status -); - -extern int mca_pml_uniq_probe( - int dst, - int tag, - struct ompi_communicator_t* comm, - ompi_status_public_t* status -); - -extern int mca_pml_uniq_cancelled( - ompi_request_t* request, - int *flag -); - - -extern int mca_pml_uniq_isend_init( - void *buf, - size_t count, - struct ompi_datatype_t *datatype, - int dst, - int tag, - mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm, - struct ompi_request_t **request -); - -extern int mca_pml_uniq_isend( - void *buf, - size_t count, - struct ompi_datatype_t *datatype, - int dst, - int tag, - mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm, - struct ompi_request_t **request -); - -extern int mca_pml_uniq_send( - void *buf, - size_t count, - struct ompi_datatype_t *datatype, - int dst, - int tag, - mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm -); - -extern int mca_pml_uniq_irecv_init( - void *buf, - size_t count, - struct ompi_datatype_t *datatype, - int src, - int tag, - struct ompi_communicator_t* comm, - struct ompi_request_t **request -); - -extern int mca_pml_uniq_irecv( - void *buf, - size_t count, - struct ompi_datatype_t *datatype, - int src, - int tag, - struct ompi_communicator_t* comm, - struct ompi_request_t **request -); - -extern int mca_pml_uniq_recv( - void *buf, - size_t count, - struct ompi_datatype_t *datatype, - int src, - int tag, - struct ompi_communicator_t* comm, - ompi_status_public_t* status -); - -extern int mca_pml_uniq_progress(void); - -extern int mca_pml_uniq_start( - size_t count, - ompi_request_t** requests -); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#define MCA_PML_UNIQ_FREE(request) \ -{ \ - mca_pml_base_request_t* pml_request = *(mca_pml_base_request_t**)(request); \ - pml_request->req_free_called = true; \ - if( pml_request->req_pml_complete == true) \ - { \ - switch(pml_request->req_type) { \ - case MCA_PML_REQUEST_SEND: \ - { \ - mca_ptl_base_send_request_t* sendreq = (mca_ptl_base_send_request_t*)pml_request; \ - while(sendreq->req_lock > 0); \ - if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { \ - mca_pml_base_bsend_request_fini((ompi_request_t*)sendreq); \ - } \ - MCA_PML_UNIQ_SEND_REQUEST_RETURN(sendreq); \ - break; \ - } \ - case MCA_PML_REQUEST_RECV: \ - { \ - mca_ptl_base_recv_request_t* recvreq = (mca_ptl_base_recv_request_t*)pml_request; \ - MCA_PML_UNIQ_RECV_REQUEST_RETURN(recvreq); \ - break; \ - } \ - default: \ - break; \ - } \ - } \ - *(request) = MPI_REQUEST_NULL; \ -} - -#define MCA_PML_UNIQ_FINI(request) \ -{ \ - mca_pml_base_request_t* pml_request = *(mca_pml_base_request_t**)(request); \ - if( (pml_request->req_persistent) && !(pml_request->req_free_called) ) { \ - pml_request->req_ompi.req_state = OMPI_REQUEST_INACTIVE; \ - } else { \ - MCA_PML_UNIQ_FREE(request); \ - } \ -} - -#endif - diff --git a/ompi/mca/pml/uniq/pml_uniq_cancel.c b/ompi/mca/pml/uniq/pml_uniq_cancel.c deleted file mode 100644 index 40cde2c88b..0000000000 --- a/ompi/mca/pml/uniq/pml_uniq_cancel.c +++ /dev/null @@ -1,30 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_uniq.h" - -int mca_pml_uniq_cancelled(ompi_request_t* request, int* flag) -{ - if(NULL != flag) - *flag = (true == request->req_status._cancelled ? 1 : 0); - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/pml/uniq/pml_uniq_component.c b/ompi/mca/pml/uniq/pml_uniq_component.c deleted file mode 100644 index 2cb4017ac1..0000000000 --- a/ompi/mca/pml/uniq/pml_uniq_component.c +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "opal/event/event.h" -#include "mpi.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/ptl/base/base.h" -#include "opal/mca/base/mca_base_param.h" -#include "ompi/mca/pml/base/pml_base_bsend.h" -#include "pml_uniq.h" -#include "pml_uniq_proc.h" -#include "pml_uniq_sendreq.h" -#include "pml_uniq_recvreq.h" - - -mca_pml_base_component_1_0_0_t mca_pml_uniq_component = { - - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - { - /* Indicate that we are a pml v1.0.0 component (which also implies - a specific MCA version) */ - - MCA_PML_BASE_VERSION_1_0_0, - - "uniq", /* MCA component name */ - OMPI_MAJOR_VERSION, /* MCA component major version */ - OMPI_MINOR_VERSION, /* MCA component minor version */ - OMPI_RELEASE_VERSION, /* MCA component release version */ - mca_pml_uniq_component_open, /* component open */ - mca_pml_uniq_component_close /* component close */ - }, - - /* Next the MCA v1.0.0 component meta data */ - - { - /* Whether the component is checkpointable or not */ - false - }, - - mca_pml_uniq_component_init, /* component init */ - mca_pml_uniq_component_fini /* component finalize */ -}; - - - -static inline int mca_pml_uniq_param_register_int( - const char* param_name, - int default_value) -{ - int id = mca_base_param_register_int("pml","uniq",param_name,NULL,default_value); - int param_value = default_value; - mca_base_param_lookup_int(id,¶m_value); - return param_value; -} - - -int mca_pml_uniq_component_open(void) -{ - OBJ_CONSTRUCT(&mca_pml_uniq.uniq_lock, opal_mutex_t); - OBJ_CONSTRUCT(&mca_pml_uniq.uniq_send_requests, ompi_free_list_t); - OBJ_CONSTRUCT(&mca_pml_uniq.uniq_recv_requests, ompi_free_list_t); - OBJ_CONSTRUCT(&mca_pml_uniq.uniq_procs, opal_list_t); - OBJ_CONSTRUCT(&mca_pml_uniq.uniq_send_pending, opal_list_t); - - mca_pml_uniq.uniq_ptl_components = NULL; - mca_pml_uniq.uniq_num_ptl_components = 0; - mca_pml_uniq.uniq_ptl_modules = NULL; - mca_pml_uniq.uniq_num_ptl_modules = 0; - mca_pml_uniq.uniq_ptl_progress = NULL; - mca_pml_uniq.uniq_num_ptl_progress = 0; - - mca_pml_uniq.uniq_free_list_num = - mca_pml_uniq_param_register_int("free_list_num", 256); - mca_pml_uniq.uniq_free_list_max = - mca_pml_uniq_param_register_int("free_list_max", -1); - mca_pml_uniq.uniq_free_list_inc = - mca_pml_uniq_param_register_int("free_list_inc", 256); - mca_pml_uniq.uniq_poll_iterations = - mca_pml_uniq_param_register_int("poll_iterations", 100000); - - mca_pml_uniq.uniq_priority = - mca_pml_uniq_param_register_int("priority", 0); - return mca_ptl_base_open(); -} - - -int mca_pml_uniq_component_close(void) -{ - int rc; - - if( NULL == mca_pml_uniq.uniq_ptl_components ) /* I was not enabled */ - return OMPI_SUCCESS; - - if( OMPI_SUCCESS != (rc = mca_ptl_base_close()) ) - return rc; - - if(NULL != mca_pml_uniq.uniq_ptl_components) { - free(mca_pml_uniq.uniq_ptl_components); - mca_pml_uniq.uniq_ptl_components = NULL; - } - mca_pml_uniq.uniq_num_ptl_components = 0; - if(NULL != mca_pml_uniq.uniq_ptl_modules) { - free(mca_pml_uniq.uniq_ptl_modules); - mca_pml_uniq.uniq_ptl_modules = NULL; - } - mca_pml_uniq.uniq_num_ptl_modules = 0; - if(NULL != mca_pml_uniq.uniq_ptl_progress) { - free(mca_pml_uniq.uniq_ptl_progress); - mca_pml_uniq.uniq_ptl_progress = NULL; - } - OBJ_DESTRUCT(&mca_pml_uniq.uniq_send_pending); - OBJ_DESTRUCT(&mca_pml_uniq.uniq_send_requests); - OBJ_DESTRUCT(&mca_pml_uniq.uniq_recv_requests); - OBJ_DESTRUCT(&mca_pml_uniq.uniq_procs); - OBJ_DESTRUCT(&mca_pml_uniq.uniq_lock); - return OMPI_SUCCESS; -} - - -mca_pml_base_module_t* mca_pml_uniq_component_init(int* priority, - bool enable_progress_threads, - bool enable_mpi_threads) -{ - int rc; - *priority = mca_pml_uniq.uniq_priority; - - /* buffered send */ - if( OMPI_SUCCESS != mca_pml_base_bsend_init(enable_mpi_threads) ) { - opal_output(0, "mca_pml_uniq_component_init: mca_pml_bsend_init failed\n"); - return NULL; - } - - rc = mca_ptl_base_select( enable_progress_threads, enable_mpi_threads ); - if( rc != OMPI_SUCCESS ) - return NULL; - - return &mca_pml_uniq.super; -} - diff --git a/ompi/mca/pml/uniq/pml_uniq_component.h b/ompi/mca/pml/uniq/pml_uniq_component.h deleted file mode 100644 index fd3c6269c0..0000000000 --- a/ompi/mca/pml/uniq/pml_uniq_component.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_UNIQ_COMPONENT_H -#define MCA_PML_UNIQ_COMPONENT_H - -/* - * PML module functions. - */ - -OMPI_COMP_EXPORT extern mca_pml_base_component_1_0_0_t mca_pml_uniq_component; - -#endif diff --git a/ompi/mca/pml/uniq/pml_uniq_iprobe.c b/ompi/mca/pml/uniq/pml_uniq_iprobe.c deleted file mode 100644 index 3310eab0fb..0000000000 --- a/ompi/mca/pml/uniq/pml_uniq_iprobe.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/request/request.h" -#include "pml_uniq_recvreq.h" - - -int mca_pml_uniq_iprobe(int src, - int tag, - struct ompi_communicator_t *comm, - int *matched, ompi_status_public_t * status) -{ - int rc; - mca_ptl_base_recv_request_t recvreq; - - OBJ_CONSTRUCT( &(recvreq), mca_ptl_base_recv_request_t ); - recvreq.req_recv.req_base.req_ompi.req_type = OMPI_REQUEST_PML; - recvreq.req_recv.req_base.req_type = MCA_PML_REQUEST_IPROBE; - MCA_PML_UNIQ_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char, src, tag, comm, true); - - *matched = 0; - if ((rc = mca_pml_uniq_recv_request_start(&recvreq)) == OMPI_SUCCESS) { - if( recvreq.req_recv.req_base.req_ompi.req_complete == true ) { - if( NULL != status ) { - *status = recvreq.req_recv.req_base.req_ompi.req_status; - } - *matched = 1; - } else { - /* we are supposed to progress ... */ - opal_progress(); - } - } - MCA_PML_BASE_RECV_REQUEST_FINI( &recvreq.req_recv ); - return rc; -} - - -int mca_pml_uniq_probe(int src, - int tag, - struct ompi_communicator_t *comm, - ompi_status_public_t * status) -{ - int rc; - mca_ptl_base_recv_request_t recvreq; - - OBJ_CONSTRUCT( &(recvreq), mca_ptl_base_recv_request_t ); - recvreq.req_recv.req_base.req_ompi.req_type = OMPI_REQUEST_PML; - recvreq.req_recv.req_base.req_type = MCA_PML_REQUEST_PROBE; - MCA_PML_UNIQ_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char, src, tag, comm, true); - - if ((rc = mca_pml_uniq_recv_request_start(&recvreq)) != OMPI_SUCCESS) { - MCA_PML_BASE_RECV_REQUEST_FINI( &recvreq.req_recv ); - return rc; - } - - if (recvreq.req_recv.req_base.req_ompi.req_complete == false) { - /* give up and sleep until completion */ - if (opal_using_threads()) { - opal_mutex_lock(&ompi_request_lock); - ompi_request_waiting++; - while (recvreq.req_recv.req_base.req_ompi.req_complete == false) - opal_condition_wait(&ompi_request_cond, &ompi_request_lock); - ompi_request_waiting--; - opal_mutex_unlock(&ompi_request_lock); - } else { - ompi_request_waiting++; - while (recvreq.req_recv.req_base.req_ompi.req_complete == false) - opal_condition_wait(&ompi_request_cond, &ompi_request_lock); - ompi_request_waiting--; - } - } - - if (NULL != status) { - *status = recvreq.req_recv.req_base.req_ompi.req_status; - } - MCA_PML_BASE_RECV_REQUEST_FINI( &recvreq.req_recv ); - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/pml/uniq/pml_uniq_irecv.c b/ompi/mca/pml/uniq/pml_uniq_irecv.c deleted file mode 100644 index d09d616126..0000000000 --- a/ompi/mca/pml/uniq/pml_uniq_irecv.c +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/request/request.h" -#include "pml_uniq_recvreq.h" - - -int mca_pml_uniq_irecv_init(void *addr, - size_t count, - struct ompi_datatype_t * datatype, - int src, - int tag, - struct ompi_communicator_t *comm, - struct ompi_request_t **request) -{ - int rc; - mca_ptl_base_recv_request_t *recvreq; - MCA_PML_UNIQ_RECV_REQUEST_ALLOC(recvreq, rc); - if (NULL == recvreq) - return rc; - - MCA_PML_UNIQ_RECV_REQUEST_INIT(recvreq, - addr, - count, datatype, src, tag, comm, true); - - *request = (ompi_request_t *) recvreq; - return OMPI_SUCCESS; -} - -int mca_pml_uniq_irecv(void *addr, - size_t count, - struct ompi_datatype_t * datatype, - int src, - int tag, - struct ompi_communicator_t *comm, - struct ompi_request_t **request) -{ - int rc; - - mca_ptl_base_recv_request_t *recvreq; - MCA_PML_UNIQ_RECV_REQUEST_ALLOC(recvreq, rc); - if (NULL == recvreq) - return rc; - - MCA_PML_UNIQ_RECV_REQUEST_INIT(recvreq, - addr, - count, datatype, src, tag, comm, false); - - if ((rc = mca_pml_uniq_recv_request_start(recvreq)) != OMPI_SUCCESS) { - MCA_PML_UNIQ_RECV_REQUEST_RETURN(recvreq); - return rc; - } - *request = (ompi_request_t *) recvreq; - return OMPI_SUCCESS; -} - - -int mca_pml_uniq_recv(void *addr, - size_t count, - struct ompi_datatype_t * datatype, - int src, - int tag, - struct ompi_communicator_t *comm, - ompi_status_public_t * status) -{ - int rc; - mca_ptl_base_recv_request_t *recvreq; - MCA_PML_UNIQ_RECV_REQUEST_ALLOC(recvreq, rc); - if (NULL == recvreq) - return rc; - - MCA_PML_UNIQ_RECV_REQUEST_INIT(recvreq, - addr, - count, datatype, src, tag, comm, false); - - if ((rc = mca_pml_uniq_recv_request_start(recvreq)) != OMPI_SUCCESS) { - goto recv_finish; - } - - if (recvreq->req_recv.req_base.req_ompi.req_complete == false) { - /* give up and sleep until completion */ - if (opal_using_threads()) { - opal_mutex_lock(&ompi_request_lock); - ompi_request_waiting++; - while (recvreq->req_recv.req_base.req_ompi.req_complete == false) - opal_condition_wait(&ompi_request_cond, &ompi_request_lock); - ompi_request_waiting--; - opal_mutex_unlock(&ompi_request_lock); - } else { - ompi_request_waiting++; - while (recvreq->req_recv.req_base.req_ompi.req_complete == false) - opal_condition_wait(&ompi_request_cond, &ompi_request_lock); - ompi_request_waiting--; - } - } - recv_finish: - if (NULL != status) { /* return status */ - *status = recvreq->req_recv.req_base.req_ompi.req_status; - } - - MCA_PML_UNIQ_RECV_REQUEST_RETURN(recvreq); - return recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR; -} diff --git a/ompi/mca/pml/uniq/pml_uniq_isend.c b/ompi/mca/pml/uniq/pml_uniq_isend.c deleted file mode 100644 index 5b101530b9..0000000000 --- a/ompi/mca/pml/uniq/pml_uniq_isend.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_uniq.h" -#include "pml_uniq_proc.h" -#include "pml_uniq_sendreq.h" -#include "pml_uniq_recvreq.h" - - -int mca_pml_uniq_isend_init(void *buf, - size_t count, - struct ompi_datatype_t * datatype, - int dst, - int tag, - mca_pml_base_send_mode_t sendmode, - ompi_communicator_t * comm, - ompi_request_t ** request) -{ - int rc; - - mca_ptl_base_send_request_t *sendreq; - MCA_PML_UNIQ_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc); - if (rc != OMPI_SUCCESS) - return rc; - - MCA_PML_UNIQ_SEND_REQUEST_INIT(sendreq, - buf, - count, - datatype, - dst, tag, - comm, sendmode, true); - - *request = (ompi_request_t *) sendreq; - return OMPI_SUCCESS; -} - - -int mca_pml_uniq_isend(void *buf, - size_t count, - struct ompi_datatype_t * datatype, - int dst, - int tag, - mca_pml_base_send_mode_t sendmode, - ompi_communicator_t * comm, - ompi_request_t ** request) -{ - int rc; - mca_ptl_base_send_request_t *sendreq; - MCA_PML_UNIQ_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc); - if (rc != OMPI_SUCCESS) - return rc; - MCA_PML_UNIQ_SEND_REQUEST_INIT(sendreq, - buf, - count, - datatype, - dst, tag, - comm, sendmode, false); - - MCA_PML_UNIQ_SEND_REQUEST_START(sendreq, rc); - *request = (ompi_request_t *) sendreq; - return rc; -} - - -int mca_pml_uniq_send(void *buf, - size_t count, - struct ompi_datatype_t * datatype, - int dst, - int tag, - mca_pml_base_send_mode_t sendmode, - ompi_communicator_t * comm) -{ - int rc; - mca_ptl_base_send_request_t *sendreq; - MCA_PML_UNIQ_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc); - if (rc != OMPI_SUCCESS) - return rc; - - MCA_PML_UNIQ_SEND_REQUEST_INIT(sendreq, - buf, - count, - datatype, - dst, tag, - comm, sendmode, false); - - MCA_PML_UNIQ_SEND_REQUEST_START(sendreq, rc); - if (rc != OMPI_SUCCESS) { - MCA_PML_UNIQ_FREE((ompi_request_t **) & sendreq); - return rc; - } - - if (sendreq->req_send.req_base.req_ompi.req_complete == false) { - /* give up and sleep until completion */ - if (opal_using_threads()) { - opal_mutex_lock(&ompi_request_lock); - ompi_request_waiting++; - while (sendreq->req_send.req_base.req_ompi.req_complete == false) - opal_condition_wait(&ompi_request_cond, &ompi_request_lock); - ompi_request_waiting--; - opal_mutex_unlock(&ompi_request_lock); - } else { - ompi_request_waiting++; - while (sendreq->req_send.req_base.req_ompi.req_complete == false) - opal_condition_wait(&ompi_request_cond, &ompi_request_lock); - ompi_request_waiting--; - } - } - - /* return request to pool */ - MCA_PML_UNIQ_FREE((ompi_request_t **) & sendreq); - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/pml/uniq/pml_uniq_proc.c b/ompi/mca/pml/uniq/pml_uniq_proc.c deleted file mode 100644 index c21ffba0a4..0000000000 --- a/ompi/mca/pml/uniq/pml_uniq_proc.c +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "opal/sys/atomic.h" -#include "pml_uniq.h" -#include "pml_uniq_proc.h" - - -static void mca_pml_uniq_proc_construct(mca_pml_uniq_proc_t* proc) -{ - proc->base.proc_ompi = NULL; - proc->proc_ptl_flags = 0; - OBJ_CONSTRUCT(&proc->base.proc_lock, opal_mutex_t); - - proc->proc_ptl_first.ptl_peer = NULL; - proc->proc_ptl_first.ptl_base = NULL; - proc->proc_ptl_first.ptl = NULL; -#if PML_UNIQ_ACCEPT_NEXT_PTL - proc->proc_ptl_next.ptl_peer = NULL; - proc->proc_ptl_next.ptl_base = NULL; - proc->proc_ptl_next.ptl = NULL; -#endif /* PML_UNIQ_ACCEPT_NEXT_PTL */ - OPAL_THREAD_LOCK(&mca_pml_uniq.uniq_lock); - opal_list_append(&mca_pml_uniq.uniq_procs, (opal_list_item_t*)proc); - OPAL_THREAD_UNLOCK(&mca_pml_uniq.uniq_lock); -} - - -static void mca_pml_uniq_proc_destruct(mca_pml_uniq_proc_t* proc) -{ - OPAL_THREAD_LOCK(&mca_pml_uniq.uniq_lock); - opal_list_remove_item(&mca_pml_uniq.uniq_procs, (opal_list_item_t*)proc); - OPAL_THREAD_UNLOCK(&mca_pml_uniq.uniq_lock); - - OBJ_DESTRUCT(&proc->base.proc_lock); -} - -OBJ_CLASS_INSTANCE( - mca_pml_uniq_proc_t, - opal_list_item_t, - mca_pml_uniq_proc_construct, - mca_pml_uniq_proc_destruct -); - diff --git a/ompi/mca/pml/uniq/pml_uniq_proc.h b/ompi/mca/pml/uniq/pml_uniq_proc.h deleted file mode 100644 index 42d4fcb709..0000000000 --- a/ompi/mca/pml/uniq/pml_uniq_proc.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PML_PROC_H -#define MCA_PML_PROC_H - -#include "opal/threads/mutex.h" -#include "ompi/communicator/communicator.h" -#include "ompi/group/group.h" -#include "ompi/proc/proc.h" - -/* This define has to move outside of this file. Maybe on some configuration file. - * Anyway by for, for the debugging purpose, here it's quite a safe place. - */ -#define PML_UNIQ_ACCEPT_NEXT_PTL 0 - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - - /** - * A data structure associated with a ompi_proc_t that caches - * addressing/scheduling attributes for a specific PTL instance - * that can be used to reach the process. - */ - struct mca_ptl_proc_t { - struct mca_ptl_base_peer_t* ptl_peer; /**< PTL addressing info */ - struct mca_pml_uniq_ptl_t* ptl_base; /**< PML specific PTL info */ - mca_ptl_base_module_t *ptl; /**< PTL module */ - }; - typedef struct mca_ptl_proc_t mca_ptl_proc_t; - - /** - * Structure associated w/ ompi_proc_t that contains data specific - * to the PML. Note that this name is not PML specific. - */ - struct mca_pml_uniq_proc_t { - mca_pml_proc_t base; - mca_ptl_proc_t proc_ptl_first; /**< ptl for the first fragment */ -#if PML_UNIQ_ACCEPT_NEXT_PTL - mca_ptl_proc_t proc_ptl_next; /**< ptl for the remaining fragments */ -#endif /* PML_UNIQ_ACCEPT_NEXT_PTL */ - uint32_t proc_ptl_flags; /**< aggregate ptl flags */ - }; - typedef struct mca_pml_uniq_proc_t mca_pml_uniq_proc_t; - - - OMPI_COMP_EXPORT extern opal_class_t mca_pml_uniq_proc_t_class; - - - /** - * Return the mca_pml_proc_t instance cached in the communicators local group. - * - * @param comm Communicator - * @param rank Peer rank - * @return mca_pml_proc_t instance - */ - - static inline mca_pml_proc_t* mca_pml_uniq_proc_lookup_local(ompi_communicator_t* comm, int rank) - { - ompi_proc_t* proc = comm->c_local_group->grp_proc_pointers[rank]; - return proc->proc_pml; - } - - /** - * Return the mca_pml_proc_t instance cached on the communicators remote group. - * - * @param comm Communicator - * @param rank Peer rank - * @return mca_pml_proc_t instance - */ - - static inline mca_pml_proc_t* mca_pml_uniq_proc_lookup_remote(ompi_communicator_t* comm, int rank) - { - ompi_proc_t* proc = comm->c_remote_group->grp_proc_pointers[rank]; - return proc->proc_pml; - } - - /** - * Return the mca_ptl_peer_t instance corresponding to the process/ptl combination. - * - * @param comm Communicator - * @param rank Peer rank - * @return mca_pml_proc_t instance - */ - - static inline struct mca_ptl_base_peer_t* - mca_pml_uniq_proc_lookup_remote_peer( ompi_communicator_t* comm, - int rank, - struct mca_ptl_base_module_t* ptl) - { - ompi_proc_t* proc = comm->c_remote_group->grp_proc_pointers[rank]; - mca_pml_uniq_proc_t* proc_pml = (mca_pml_uniq_proc_t*) proc->proc_pml; - if( proc_pml->proc_ptl_first.ptl == ptl ) - return proc_pml->proc_ptl_first.ptl_peer; -#if PML_UNIQ_ACCEPT_NEXT_PTL - if( proc_pml->proc_ptl_next.ptl == ptl ) - return proc_pml->proc_ptl_next.ptl_peer; -#endif /* PML_UNIQ_ACCEPT_NEXT_PTL */ - return NULL; - } - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif - diff --git a/ompi/mca/pml/uniq/pml_uniq_progress.c b/ompi/mca/pml/uniq/pml_uniq_progress.c deleted file mode 100644 index 3dcd876683..0000000000 --- a/ompi/mca/pml/uniq/pml_uniq_progress.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_uniq.h" -#include "pml_uniq_sendreq.h" - - -int mca_pml_uniq_progress(void) -{ - mca_ptl_tstamp_t tstamp = 0; - size_t i; - int count = 0; - - /* - * Progress each of the PTL modules - */ - for(i=0; i 0) { - count += rc; - } - } - return count; -} - diff --git a/ompi/mca/pml/uniq/pml_uniq_ptl.c b/ompi/mca/pml/uniq/pml_uniq_ptl.c deleted file mode 100644 index 61bd9297c0..0000000000 --- a/ompi/mca/pml/uniq/pml_uniq_ptl.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_uniq_ptl.h" - -static void mca_pml_uniq_ptl_construct(mca_pml_uniq_ptl_t* ptl) -{ - OBJ_CONSTRUCT(&ptl->ptl_cache, opal_list_t); - OBJ_CONSTRUCT(&ptl->ptl_cache_lock, opal_mutex_t); - ptl->ptl = NULL; - ptl->ptl_cache_size = 0; - ptl->ptl_cache_alloc = 0; -} - -static void mca_pml_uniq_ptl_destruct(mca_pml_uniq_ptl_t* ptl) -{ - OBJ_DESTRUCT(&ptl->ptl_cache); - OBJ_DESTRUCT(&ptl->ptl_cache_lock); -} - -OBJ_CLASS_INSTANCE( - mca_pml_uniq_ptl_t, - opal_list_t, - mca_pml_uniq_ptl_construct, - mca_pml_uniq_ptl_destruct -); - diff --git a/ompi/mca/pml/uniq/pml_uniq_ptl.h b/ompi/mca/pml/uniq/pml_uniq_ptl.h deleted file mode 100644 index e2bdbdd290..0000000000 --- a/ompi/mca/pml/uniq/pml_uniq_ptl.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef _MCA_PML_BASE_PTL_ -#define _MCA_PML_BASE_PTL_ - -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "opal/threads/condition.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - - -struct mca_pml_uniq_ptl_t { - opal_list_t ptl_cache; /**< cache of send requests */ - size_t ptl_cache_size; /**< maximum size of cache */ - size_t ptl_cache_alloc; /**< current number of allocated items */ - opal_mutex_t ptl_cache_lock; /**< lock for queue access */ - struct mca_ptl_base_module_t* ptl; /**< back pointer to ptl */ -}; -typedef struct mca_pml_uniq_ptl_t mca_pml_uniq_ptl_t; -typedef struct mca_pml_uniq_ptl_t mca_pml_base_ptl_t; - -OBJ_CLASS_DECLARATION(mca_pml_uniq_ptl_t); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif - diff --git a/ompi/mca/pml/uniq/pml_uniq_recvfrag.c b/ompi/mca/pml/uniq/pml_uniq_recvfrag.c deleted file mode 100644 index d07ea02ecd..0000000000 --- a/ompi/mca/pml/uniq/pml_uniq_recvfrag.c +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - */ - -#include "ompi_config.h" - -#include "ompi/mca/pml/pml.h" -#include "pml_uniq_recvfrag.h" -#include "pml_uniq_proc.h" - - -OMPI_DECLSPEC extern opal_class_t mca_ptl_base_recv_frag_t_class; - - -/** - * Called by the PTL to match attempt a match for new fragments. - * - * @param ptl (IN) The PTL pointer - * @param frag (IN) Receive fragment descriptor. - * @param header (IN) Header corresponding to the receive fragment. - * @return OMPI_SUCCESS or error status on failure. - */ -bool mca_pml_uniq_recv_frag_match( - mca_ptl_base_module_t* ptl, - mca_ptl_base_recv_frag_t* frag, - mca_ptl_base_match_header_t* header) -{ - bool matched; - bool matches = false; - opal_list_t matched_frags; - if((matched = mca_ptl_base_match(header, frag, &matched_frags, &matches)) == false) { - frag = (matches ? (mca_ptl_base_recv_frag_t*)opal_list_remove_first(&matched_frags) : NULL); - } - - while(NULL != frag) { - mca_ptl_base_module_t* ptl = frag->frag_base.frag_owner; - mca_ptl_base_recv_request_t *request = frag->frag_request; - mca_ptl_base_match_header_t *header = &frag->frag_base.frag_header.hdr_match; - - /* - * Initialize request status. - */ - request->req_recv.req_bytes_packed = header->hdr_msg_length; - request->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = header->hdr_src; - request->req_recv.req_base.req_ompi.req_status.MPI_TAG = header->hdr_tag; - - /* - * If probe - signal request is complete - but don't notify PTL - */ - if(request->req_recv.req_base.req_type == MCA_PML_REQUEST_PROBE) { - - ptl->ptl_recv_progress( ptl, - request, - header->hdr_msg_length, - header->hdr_msg_length ); - matched = mca_pml_uniq_recv_frag_match( ptl, frag, header ); - - } else { - - /* if required - setup pointer to ptls peer */ - if (NULL == frag->frag_base.frag_peer) { - frag->frag_base.frag_peer = mca_pml_uniq_proc_lookup_remote_peer(request->req_recv.req_base.req_comm,header->hdr_src,ptl); - } - MCA_PML_UNIQ_RECV_MATCHED( ptl, frag ); - - }; - - /* process any additional fragments that arrived out of order */ - frag = (matches ? (mca_ptl_base_recv_frag_t*)opal_list_remove_first(&matched_frags) : NULL); - }; - return matched; -} - - diff --git a/ompi/mca/pml/uniq/pml_uniq_recvfrag.h b/ompi/mca/pml/uniq/pml_uniq_recvfrag.h deleted file mode 100644 index 2d4b25ba4f..0000000000 --- a/ompi/mca/pml/uniq/pml_uniq_recvfrag.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_UNIQ_RECVFRAG_H -#define MCA_PML_UNIQ_RECVFRAG_H - -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/pml/base/pml_base_recvreq.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" -#include "ompi/datatype/convertor.h" - -/** - * Called by the PTL to match attempt a match for new fragments. - * - * @param ptl (IN) The PTL pointer - * @param frag (IN) Receive fragment descriptor. - * @param header (IN) Header corresponding to the receive fragment. - * @return OMPI_SUCCESS or error status on failure. - */ -bool mca_pml_uniq_recv_frag_match( - mca_ptl_base_module_t* ptl, - mca_ptl_base_recv_frag_t* frag, - mca_ptl_base_match_header_t* header -); - -#define MCA_PML_UNIQ_RECV_MATCHED( ptl, frag ) \ -do { \ - mca_pml_base_recv_request_t* _request = (mca_pml_base_recv_request_t*)(frag)->frag_request; \ - /* Now that we have the sender we can create the convertor. Additionally, we know */ \ - /* that the required convertor should start at the position zero as we just match */ \ - /* the first fragment. */ \ - if( 0 != (_request)->req_bytes_packed ) { \ - (_request)->req_base.req_proc = ompi_comm_peer_lookup( \ - (_request)->req_base.req_comm, \ - frag->frag_base.frag_header.hdr_match.hdr_src); \ - ompi_convertor_copy_and_prepare_for_recv( \ - (_request)->req_base.req_proc->proc_convertor, \ - (_request)->req_base.req_datatype, \ - (_request)->req_base.req_count, \ - (_request)->req_base.req_addr, \ - &((_request)->req_convertor) ); \ - } \ - ptl->ptl_matched( (ptl), (frag) ); /* notify ptl of match */ \ -} while (0) -#endif - diff --git a/ompi/mca/pml/uniq/pml_uniq_recvreq.c b/ompi/mca/pml/uniq/pml_uniq_recvreq.c deleted file mode 100644 index 2603ca01eb..0000000000 --- a/ompi/mca/pml/uniq/pml_uniq_recvreq.c +++ /dev/null @@ -1,284 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/ptl/base/ptl_base_comm.h" -#include "pml_uniq_recvreq.h" -#include "pml_uniq_sendreq.h" -#include "pml_uniq_recvfrag.h" - -static mca_ptl_base_recv_frag_t* mca_pml_uniq_recv_request_match_specific_proc( - mca_ptl_base_recv_request_t* request, int proc); - - -static int mca_pml_uniq_recv_request_fini(struct ompi_request_t** request) -{ - MCA_PML_UNIQ_FINI(request); - return OMPI_SUCCESS; -} - -static int mca_pml_uniq_recv_request_free(struct ompi_request_t** request) -{ - MCA_PML_UNIQ_FREE(request); - return OMPI_SUCCESS; -} - - -static int mca_pml_uniq_recv_request_cancel(struct ompi_request_t* request, int complete) -{ - mca_pml_base_request_t* uniq_request = (mca_pml_base_request_t*)request; - ompi_communicator_t* ompi_comm = uniq_request->req_comm; - mca_pml_ptl_comm_t* pml_comm = (mca_pml_ptl_comm_t*)ompi_comm->c_pml_comm; - - if( true == request->req_complete ) { /* way to late to cancel this one */ - return OMPI_SUCCESS; - } - - /* The rest should be protected behind the match logic lock */ - OPAL_THREAD_LOCK(&pml_comm->c_matching_lock); - - if( OMPI_ANY_TAG == request->req_status.MPI_TAG ) { /* the match have not been already done */ - - if( uniq_request->req_peer == OMPI_ANY_SOURCE ) { - opal_list_remove_item( &(pml_comm->c_wild_receives), - (opal_list_item_t*)request ); - } else { - opal_list_remove_item( pml_comm->c_specific_receives + uniq_request->req_peer, - (opal_list_item_t*)request ); - } - } - - OPAL_THREAD_UNLOCK(&pml_comm->c_matching_lock); - - request->req_status._cancelled = true; - request->req_complete = true; /* mark it as completed so all the test/wait functions - * on this particular request will finish */ - /* Now we have a problem if we are in a multi-threaded environment. We should - * broadcast the condition on the request in order to allow the other threads - * to complete their test/wait functions. - */ - ompi_request_completed++; - if(ompi_request_waiting) { - opal_condition_broadcast(&ompi_request_cond); - } - return OMPI_SUCCESS; -} - -static void mca_pml_uniq_recv_request_construct(mca_pml_base_recv_request_t* request) -{ - request->req_base.req_type = MCA_PML_REQUEST_RECV; - request->req_base.req_ompi.req_fini = mca_pml_uniq_recv_request_fini; - request->req_base.req_ompi.req_free = mca_pml_uniq_recv_request_free; - request->req_base.req_ompi.req_cancel = mca_pml_uniq_recv_request_cancel; -} - -static void mca_pml_uniq_recv_request_destruct(mca_pml_base_recv_request_t* request) -{ -} - -OBJ_CLASS_INSTANCE( - mca_pml_uniq_recv_request_t, - mca_pml_base_recv_request_t, - mca_pml_uniq_recv_request_construct, - mca_pml_uniq_recv_request_destruct); - - -/* - * Update the recv request status to reflect the number of bytes - * received and actually delivered to the application. - */ - -void mca_pml_uniq_recv_request_progress( - struct mca_ptl_base_module_t* ptl, - mca_ptl_base_recv_request_t* req, - size_t bytes_received, - size_t bytes_delivered) -{ - OPAL_THREAD_LOCK(&ompi_request_lock); - req->req_bytes_received += bytes_received; - req->req_bytes_delivered += bytes_delivered; - if (req->req_bytes_received >= req->req_recv.req_bytes_packed) { - /* initialize request status */ - req->req_recv.req_base.req_ompi.req_status._count = req->req_bytes_delivered; - req->req_recv.req_base.req_pml_complete = true; - req->req_recv.req_base.req_ompi.req_complete = true; - ompi_request_completed++; - if(ompi_request_waiting) { - opal_condition_broadcast(&ompi_request_cond); - } - } - OPAL_THREAD_UNLOCK(&ompi_request_lock); -} - - - -/* - * This routine is used to match a posted receive when the source process - * is specified. -*/ - -void mca_pml_uniq_recv_request_match_specific(mca_ptl_base_recv_request_t* request) -{ - ompi_communicator_t *comm = request->req_recv.req_base.req_comm; - mca_pml_ptl_comm_t* pml_comm = comm->c_pml_comm; - int req_peer = request->req_recv.req_base.req_peer; - mca_ptl_base_recv_frag_t* frag; - - /* check for a specific match */ - OPAL_THREAD_LOCK(&pml_comm->c_matching_lock); - - /* assign sequence number */ - request->req_recv.req_base.req_sequence = pml_comm->c_recv_seq++; - - if (opal_list_get_size(&pml_comm->c_unexpected_frags[req_peer]) > 0 && - (frag = mca_pml_uniq_recv_request_match_specific_proc(request, req_peer)) != NULL) { - mca_ptl_base_module_t* ptl = frag->frag_base.frag_owner; - /* setup pointer to ptls peer */ - if(NULL == frag->frag_base.frag_peer) - frag->frag_base.frag_peer = mca_pml_uniq_proc_lookup_remote_peer(comm,req_peer,ptl); - OPAL_THREAD_UNLOCK(&pml_comm->c_matching_lock); - if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) || - (MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) { - MCA_PML_UNIQ_RECV_MATCHED( ptl, frag ); - } - return; /* match found */ - } - - /* We didn't find any matches. Record this irecv so we can match - * it when the message comes in. - */ - if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE) { - opal_list_append(pml_comm->c_specific_receives+req_peer, (opal_list_item_t*)request); - } - OPAL_THREAD_UNLOCK(&pml_comm->c_matching_lock); -} - - -/* - * this routine is used to try and match a wild posted receive - where - * wild is determined by the value assigned to the source process -*/ - -void mca_pml_uniq_recv_request_match_wild(mca_ptl_base_recv_request_t* request) -{ - ompi_communicator_t *comm = request->req_recv.req_base.req_comm; - mca_pml_ptl_comm_t* pml_comm = comm->c_pml_comm; - int proc_count = comm->c_remote_group->grp_proc_count; - int proc; - - /* - * Loop over all the outstanding messages to find one that matches. - * There is an outer loop over lists of messages from each - * process, then an inner loop over the messages from the - * process. - */ - OPAL_THREAD_LOCK(&pml_comm->c_matching_lock); - - /* assign sequence number */ - request->req_recv.req_base.req_sequence = pml_comm->c_recv_seq++; - - for (proc = 0; proc < proc_count; proc++) { - mca_ptl_base_recv_frag_t* frag; - - /* continue if no frags to match */ - if (opal_list_get_size(&pml_comm->c_unexpected_frags[proc]) == 0) - continue; - - /* loop over messages from the current proc */ - if ((frag = mca_pml_uniq_recv_request_match_specific_proc(request, proc)) != NULL) { - mca_ptl_base_module_t* ptl = frag->frag_base.frag_owner; - /* if required - setup pointer to ptls peer */ - if(NULL == frag->frag_base.frag_peer) - frag->frag_base.frag_peer = mca_pml_uniq_proc_lookup_remote_peer(comm,proc,ptl); - OPAL_THREAD_UNLOCK(&pml_comm->c_matching_lock); - if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) || - (MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) { - MCA_PML_UNIQ_RECV_MATCHED( ptl, frag ); - } - return; /* match found */ - } - } - - /* We didn't find any matches. Record this irecv so we can match to - * it when the message comes in. - */ - - if(request->req_recv.req_base.req_type != MCA_PML_REQUEST_IPROBE) - opal_list_append(&pml_comm->c_wild_receives, (opal_list_item_t*)request); - OPAL_THREAD_UNLOCK(&pml_comm->c_matching_lock); -} - - -/* - * this routine tries to match a posted receive. If a match is found, - * it places the request in the appropriate matched receive list. -*/ - -static mca_ptl_base_recv_frag_t* mca_pml_uniq_recv_request_match_specific_proc( - mca_ptl_base_recv_request_t* request, int proc) -{ - mca_pml_ptl_comm_t *pml_comm = request->req_recv.req_base.req_comm->c_pml_comm; - opal_list_t* unexpected_frags = pml_comm->c_unexpected_frags+proc; - mca_ptl_base_recv_frag_t* frag; - mca_ptl_base_match_header_t* header; - int tag = request->req_recv.req_base.req_tag; - - if( OMPI_ANY_TAG == tag ) { - for (frag = (mca_ptl_base_recv_frag_t*)opal_list_get_first(unexpected_frags); - frag != (mca_ptl_base_recv_frag_t*)opal_list_get_end(unexpected_frags); - frag = (mca_ptl_base_recv_frag_t*)opal_list_get_next(frag)) { - header = &(frag->frag_base.frag_header.hdr_match); - - /* check first frag - we assume that process matching has been done already */ - if( header->hdr_tag >= 0 ) { - goto find_fragment; - } - } - } else { - for (frag = (mca_ptl_base_recv_frag_t*)opal_list_get_first(unexpected_frags); - frag != (mca_ptl_base_recv_frag_t*)opal_list_get_end(unexpected_frags); - frag = (mca_ptl_base_recv_frag_t*)opal_list_get_next(frag)) { - header = &(frag->frag_base.frag_header.hdr_match); - - /* check first frag - we assume that process matching has been done already */ - if ( tag == header->hdr_tag ) { - /* we assume that the tag is correct from MPI point of view (ie. >= 0 ) */ - goto find_fragment; - } - } - } - return NULL; - find_fragment: - request->req_recv.req_bytes_packed = header->hdr_msg_length; - request->req_recv.req_base.req_ompi.req_status.MPI_TAG = header->hdr_tag; - request->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = header->hdr_src; - - if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) || - (MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) { - opal_list_remove_item(unexpected_frags, (opal_list_item_t*)frag); - frag->frag_request = request; - } else { - /* it's a probe, therefore report it's completion */ - mca_pml_uniq_recv_request_progress( NULL, request, header->hdr_msg_length, header->hdr_msg_length ); - } - return frag; -} - diff --git a/ompi/mca/pml/uniq/pml_uniq_recvreq.h b/ompi/mca/pml/uniq/pml_uniq_recvreq.h deleted file mode 100644 index 74c4628559..0000000000 --- a/ompi/mca/pml/uniq/pml_uniq_recvreq.h +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef OMPI_PML_UNIQ_RECV_REQUEST_H -#define OMPI_PML_UNIQ_RECV_REQUEST_H - -#include "pml_uniq.h" -#include "pml_uniq_proc.h" -#include "ompi/mca/ptl/base/ptl_base_recvreq.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif -typedef mca_pml_base_recv_request_t mca_pml_uniq_recv_request_t; - -OBJ_CLASS_DECLARATION(mca_pml_uniq_recv_request_t); - - -/** - * Allocate a recv request from the modules free list. - * - * @param rc (OUT) OMPI_SUCCESS or error status on failure. - * @return Receive request. - */ -#define MCA_PML_UNIQ_RECV_REQUEST_ALLOC(recvreq, rc) \ - do { \ - opal_list_item_t* item; \ - OMPI_FREE_LIST_GET(&mca_pml_uniq.uniq_recv_requests, item, rc); \ - recvreq = (mca_ptl_base_recv_request_t*)item; \ - } while(0) - -/** - * Initialize a recv request. - */ -#define MCA_PML_UNIQ_RECV_REQUEST_INIT( \ - request, \ - addr, \ - count, \ - datatype, \ - src, \ - tag, \ - comm, \ - persistent) \ -{ \ - MCA_PML_BASE_RECV_REQUEST_INIT( \ - (&(request)->req_recv), \ - addr, \ - count, \ - datatype, \ - src, \ - tag, \ - comm, \ - persistent \ - ); \ -} - -/** - * Return a recv request to the modules free list. - * - * @param request (IN) Receive request. - */ -#define MCA_PML_UNIQ_RECV_REQUEST_RETURN(request) \ - do { \ - MCA_PML_BASE_RECV_REQUEST_FINI( &((request)->req_recv) ); \ - OMPI_FREE_LIST_RETURN(&mca_pml_uniq.uniq_recv_requests, (opal_list_item_t*)(request)); \ - } while(0) - -/** - * Attempt to match the request against the unexpected fragment list - * for all source ranks w/in the communicator. - * - * @param request (IN) Request to match. - */ -void mca_pml_uniq_recv_request_match_wild(mca_ptl_base_recv_request_t* request); - -/** - * Attempt to match the request against the unexpected fragment list - * for a specific source rank. - * - * @param request (IN) Request to match. - */ -void mca_pml_uniq_recv_request_match_specific(mca_ptl_base_recv_request_t* request); - -/** - * Start an initialized request. - * - * @param request Receive request. - * @return OMPI_SUCESS or error status on failure. - */ -static inline int mca_pml_uniq_recv_request_start(mca_ptl_base_recv_request_t* request) -{ - /* init/re-init the request */ - request->req_bytes_received = 0; - request->req_bytes_delivered = 0; - request->req_recv.req_base.req_pml_complete = false; - request->req_recv.req_base.req_ompi.req_complete = false; - request->req_recv.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; - /* always set the req_status.MPI_TAG to ANY_TAG before starting the request. This field - * is used on the cancel part in order to find out if the request has been matched or not. - */ - request->req_recv.req_base.req_ompi.req_status.MPI_TAG = OMPI_ANY_TAG; - request->req_recv.req_base.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; - request->req_recv.req_base.req_ompi.req_status._cancelled = 0; - - /* attempt to match posted recv */ - if(request->req_recv.req_base.req_peer == OMPI_ANY_SOURCE) { - mca_pml_uniq_recv_request_match_wild(request); - } else { - mca_pml_uniq_recv_request_match_specific(request); - } - return OMPI_SUCCESS; -} - -/** - * Update status of a recv request based on the completion status of - * the receive fragment. - * - * @param ptl (IN) The PTL pointer. - * @param request (IN) Receive request. - * @param bytes_received (IN) Bytes received from peer. - * @param bytes_delivered (IN) Bytes delivered to application. - */ -void mca_pml_uniq_recv_request_progress( - struct mca_ptl_base_module_t* ptl, - mca_ptl_base_recv_request_t* request, - size_t bytes_received, - size_t bytes_delivered -); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif /* OMPI_PML_UNIQ_RECV_REQUEST_H */ - diff --git a/ompi/mca/pml/uniq/pml_uniq_sendreq.c b/ompi/mca/pml/uniq/pml_uniq_sendreq.c deleted file mode 100644 index 6b46dadf1a..0000000000 --- a/ompi/mca/pml/uniq/pml_uniq_sendreq.c +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ - -#include "ompi_config.h" -#include "ompi/constants.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "pml_uniq.h" -#include "pml_uniq_proc.h" -#include "pml_uniq_sendreq.h" -#include "pml_uniq_recvreq.h" - - - -static int mca_pml_uniq_send_request_fini(struct ompi_request_t** request) -{ - MCA_PML_UNIQ_FINI(request); - return OMPI_SUCCESS; -} - -static int mca_pml_uniq_send_request_free(struct ompi_request_t** request) -{ - MCA_PML_UNIQ_FREE(request); - return OMPI_SUCCESS; -} - -static int mca_pml_uniq_send_request_cancel(struct ompi_request_t* request, int complete) -{ - /* we dont cancel send requests by now */ - return OMPI_SUCCESS; -} - - -static void mca_pml_uniq_send_request_construct(mca_pml_base_send_request_t* req) -{ - req->req_base.req_type = MCA_PML_REQUEST_SEND; - req->req_base.req_ompi.req_fini = mca_pml_uniq_send_request_fini; - req->req_base.req_ompi.req_free = mca_pml_uniq_send_request_free; - req->req_base.req_ompi.req_cancel = mca_pml_uniq_send_request_cancel; -} - - -static void mca_pml_uniq_send_request_destruct(mca_pml_base_send_request_t* req) -{ -} - - -OBJ_CLASS_INSTANCE( - mca_pml_uniq_send_request_t, - mca_pml_base_send_request_t, - mca_pml_uniq_send_request_construct, - mca_pml_uniq_send_request_destruct); - - - -/** - * Schedule message delivery across potentially multiple PTLs. - * - * @param request (IN) Request to schedule - * @return status Error status - * - */ - - -int mca_pml_uniq_send_request_schedule(mca_ptl_base_send_request_t* req) -{ - int rc; - size_t bytes_remaining; - - /* - * Only allow one thread in this routine for a given request. - * However, we cannot block callers on a mutex, so simply keep track - * of the number of times the routine has been called and run through - * the scheduling logic once for every call. - */ - if(OPAL_THREAD_ADD32(&req->req_lock,1) == 1) { - mca_pml_uniq_proc_t* proc_pml = (mca_pml_uniq_proc_t*) - mca_pml_uniq_proc_lookup_remote( req->req_send.req_base.req_comm, - req->req_send.req_base.req_peer ); - -#if PML_UNIQ_ACCEPT_NEXT_PTL - mca_ptl_proc_t* ptl_proc = &(proc_pml->proc_ptl_next); -#else - mca_ptl_proc_t* ptl_proc = &(proc_pml->proc_ptl_first); -#endif /* PML_UNIQ_ACCEPT_NEXT_PTL */ - mca_ptl_base_module_t* ptl = ptl_proc->ptl; - /* allocate remaining bytes to PTLs */ - bytes_remaining = req->req_send.req_bytes_packed - req->req_offset; - /* The rest of the message will be scheduled over the same PTL (the one in the next field). We try - * to be PTL friendly here so we will respect the maximum size accepted by the PTL. - */ - if( bytes_remaining > ptl->ptl_max_frag_size) { - bytes_remaining = ptl->ptl_max_frag_size; - } - - rc = ptl->ptl_put(ptl, ptl_proc->ptl_peer, req, req->req_offset, bytes_remaining, 0); - if(rc == OMPI_SUCCESS) { - bytes_remaining = req->req_send.req_bytes_packed - req->req_offset; - } else { /* unable to complete send - queue for later */ - OPAL_THREAD_LOCK(&mca_pml_uniq.uniq_lock); - opal_list_append(&mca_pml_uniq.uniq_send_pending, (opal_list_item_t*)req); - OPAL_THREAD_UNLOCK(&mca_pml_uniq.uniq_lock); - req->req_lock = 0; - return OMPI_ERR_OUT_OF_RESOURCE; - } - OPAL_THREAD_ADD32(&req->req_lock,-1); - /* free the request if completed while in the scheduler */ - if (req->req_send.req_base.req_free_called && req->req_send.req_base.req_pml_complete) { - MCA_PML_UNIQ_FREE((ompi_request_t**)&req); - } - return OMPI_SUCCESS; - } - OPAL_THREAD_ADD32(&req->req_lock,-1); - return OMPI_SUCCESS; -} - - -/** - * Update the status of the send request to reflect the number of bytes - * "actually" sent (and acknowledged). This should be called by the - * lower layer PTL after the fragment is actually delivered and has been - * acknowledged (if required). Note that this routine should NOT be called - * directly by the PTL, a function pointer is setup on the PTL at init to - * enable upcalls into the PML w/out directly linking to a specific PML - * implementation. - */ - -void mca_pml_uniq_send_request_progress( - struct mca_ptl_base_module_t* ptl, - mca_ptl_base_send_request_t* req, - size_t bytes_sent) -{ - bool schedule = false; - - OPAL_THREAD_LOCK(&ompi_request_lock); - req->req_bytes_sent += bytes_sent; - if (req->req_bytes_sent >= req->req_send.req_bytes_packed) { - req->req_send.req_base.req_pml_complete = true; - if (req->req_send.req_base.req_ompi.req_complete == false) { - req->req_send.req_base.req_ompi.req_status.MPI_SOURCE = req->req_send.req_base.req_comm->c_my_rank; - req->req_send.req_base.req_ompi.req_status.MPI_TAG = req->req_send.req_base.req_tag; - req->req_send.req_base.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; - req->req_send.req_base.req_ompi.req_status._count = req->req_bytes_sent; - req->req_send.req_base.req_ompi.req_complete = true; - ompi_request_completed++; - if(ompi_request_waiting) { - opal_condition_broadcast(&ompi_request_cond); - } - } else if(req->req_send.req_base.req_free_called) { - /* don't free the request if in the scheduler */ - if(req->req_lock == 0) { - MCA_PML_UNIQ_FREE((ompi_request_t**)&req); - } - } else if (req->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { - mca_pml_base_bsend_request_fini((ompi_request_t*)req); - } - /* test to see if we have scheduled the entire request */ - } else if (req->req_offset < req->req_send.req_bytes_packed) { - schedule = true; - } - OPAL_THREAD_UNLOCK(&ompi_request_lock); - - /* schedule remaining fragments of this request */ - if(schedule) { - mca_pml_uniq_send_request_schedule(req); - } - - /* check for pending requests that need to be progressed */ - while(opal_list_get_size(&mca_pml_uniq.uniq_send_pending) != 0) { - OPAL_THREAD_LOCK(&mca_pml_uniq.uniq_lock); - req = (mca_ptl_base_send_request_t*)opal_list_remove_first(&mca_pml_uniq.uniq_send_pending); - OPAL_THREAD_UNLOCK(&mca_pml_uniq.uniq_lock); - if(req == NULL) - break; - if(mca_pml_uniq_send_request_schedule(req) != OMPI_SUCCESS) - break; - } -} - diff --git a/ompi/mca/pml/uniq/pml_uniq_sendreq.h b/ompi/mca/pml/uniq/pml_uniq_sendreq.h deleted file mode 100644 index 98a04058a3..0000000000 --- a/ompi/mca/pml/uniq/pml_uniq_sendreq.h +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef OMPI_PML_UNIQ_SEND_REQUEST_H -#define OMPI_PML_UNIQ_SEND_REQUEST_H - -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/ptl/base/ptl_base_sendreq.h" -#include "ompi/mca/ptl/base/ptl_base_sendfrag.h" -#include "ompi/mca/ptl/base/ptl_base_comm.h" -#include "pml_uniq_proc.h" -#include "pml_uniq_ptl.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif -typedef mca_pml_base_send_request_t mca_pml_uniq_send_request_t; - -OBJ_CLASS_DECLARATION(mca_pml_uniq_send_request_t); - - -#define MCA_PML_UNIQ_SEND_REQUEST_ALLOC( \ - comm, \ - dst, \ - sendreq, \ - rc) \ -{ \ - mca_pml_uniq_proc_t *proc = \ - (mca_pml_uniq_proc_t*) mca_pml_uniq_proc_lookup_remote(comm,dst); \ - mca_pml_uniq_ptl_t* ptl_base; \ - \ - if(NULL == proc) { \ - return OMPI_ERR_OUT_OF_RESOURCE; \ - } \ - ptl_base = proc->proc_ptl_first.ptl_base; \ - /* \ - * check to see if there is a cache of send requests associated with \ - * this ptl - if so try the allocation from there. \ - */ \ - if(NULL != ptl_base) { \ - OPAL_THREAD_LOCK(&ptl_base->ptl_cache_lock); \ - sendreq = (mca_ptl_base_send_request_t*) \ - opal_list_remove_first(&ptl_base->ptl_cache); \ - if(NULL != sendreq) { \ - OPAL_THREAD_UNLOCK(&ptl_base->ptl_cache_lock); \ - rc = OMPI_SUCCESS; \ - } else if (ptl_base->ptl_cache_alloc < ptl_base->ptl_cache_size) { \ - /* \ - * allocate an additional request to the cache \ - */ \ - mca_ptl_base_module_t* ptl = ptl_base->ptl; \ - opal_list_item_t* item; \ - OMPI_FREE_LIST_WAIT(&mca_pml_uniq.uniq_send_requests, item, rc); \ - sendreq = (mca_ptl_base_send_request_t*)item; \ - sendreq->req_ptl = ptl; \ - if(ptl->ptl_request_init(ptl, sendreq) == OMPI_SUCCESS) { \ - sendreq->req_cached = true; \ - ptl_base->ptl_cache_alloc++; \ - } \ - OPAL_THREAD_UNLOCK(&ptl_base->ptl_cache_lock); \ - } else { \ - /* \ - * take a request from the global pool \ - */ \ - opal_list_item_t* item; \ - OPAL_THREAD_UNLOCK(&ptl_base->ptl_cache_lock); \ - OMPI_FREE_LIST_WAIT(&mca_pml_uniq.uniq_send_requests, item, rc); \ - sendreq = (mca_ptl_base_send_request_t*)item; \ - sendreq->req_ptl = proc->proc_ptl_first.ptl; \ - } \ - \ - /* otherwise - take the allocation from the global list */ \ - } else { \ - opal_list_item_t* item; \ - OMPI_FREE_LIST_WAIT(&mca_pml_uniq.uniq_send_requests, item, rc); \ - sendreq = (mca_ptl_base_send_request_t*)item; \ - sendreq->req_ptl = proc->proc_ptl_first.ptl; \ - } \ - /* update request to point to current peer */ \ - sendreq->req_peer = proc->proc_ptl_first.ptl_peer; \ - sendreq->req_send.req_base.req_proc = proc->base.proc_ompi; \ -} - -#define MCA_PML_UNIQ_SEND_REQUEST_INIT( request, \ - addr, \ - count, \ - datatype, \ - peer, \ - tag, \ - comm, \ - mode, \ - persistent) \ -{ \ - MCA_PML_BASE_SEND_REQUEST_INIT((&request->req_send), \ - addr, \ - count, \ - datatype, \ - peer, \ - tag, \ - comm, \ - mode, \ - persistent \ - ); \ -} - -#define MCA_PML_UNIQ_SEND_REQUEST_RETURN(sendreq) \ -{ \ - mca_ptl_base_module_t* ptl = (sendreq)->req_ptl; \ - mca_pml_uniq_ptl_t* ptl_base = (mca_pml_uniq_ptl_t*)ptl->ptl_base; \ - \ - /* Let the base handle the reference counts */ \ - MCA_PML_BASE_SEND_REQUEST_FINI( &((sendreq)->req_send) ); \ - \ - /* \ - * If there is a cache associated with the ptl - first attempt \ - * to return the send descriptor to the cache. \ - */ \ - if(NULL != ptl->ptl_base && (sendreq)->req_cached) { \ - OPAL_THREAD_LOCK(&ptl_base->ptl_cache_lock); \ - opal_list_prepend(&ptl_base->ptl_cache, \ - (opal_list_item_t*)sendreq); \ - OPAL_THREAD_UNLOCK(&ptl_base->ptl_cache_lock); \ - } else { \ - OMPI_FREE_LIST_RETURN( \ - &mca_pml_uniq.uniq_send_requests, (opal_list_item_t*)(sendreq)); \ - } \ -} - - -/** - * Start a send request. - */ -#define MCA_PML_UNIQ_SEND_REQUEST_START(req, rc) \ -{ \ - mca_ptl_base_module_t* ptl = req->req_ptl; \ - size_t first_fragment_size = ptl->ptl_first_frag_size; \ - int flags; \ - \ - req->req_offset = 0; \ - req->req_lock = 0; \ - req->req_bytes_sent = 0; \ - req->req_peer_match.lval = 0; \ - req->req_peer_addr.lval = 0; \ - req->req_peer_size = 0; \ - req->req_send.req_base.req_pml_complete = false; \ - req->req_send.req_base.req_ompi.req_complete = false; \ - req->req_send.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \ - req->req_send.req_base.req_sequence = mca_pml_ptl_comm_send_sequence( \ - req->req_send.req_base.req_comm->c_pml_comm, req->req_send.req_base.req_peer); \ - \ - /* handle buffered send */ \ - if(req->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { \ - mca_pml_base_bsend_request_start(&req->req_send.req_base.req_ompi); \ - } \ - \ - /* start the first fragment */ \ - if (first_fragment_size == 0 || \ - req->req_send.req_bytes_packed <= first_fragment_size) { \ - first_fragment_size = req->req_send.req_bytes_packed; \ - flags = (req->req_send.req_send_mode == MCA_PML_BASE_SEND_SYNCHRONOUS) ? \ - MCA_PTL_FLAGS_ACK : 0; \ - } else { \ - /* require match for first fragment of a multi-fragment */ \ - flags = MCA_PTL_FLAGS_ACK; \ - } \ - rc = ptl->ptl_send(ptl, req->req_peer, req, 0, first_fragment_size, \ - flags); \ -} - - -/** - * Schedule any data that was not delivered in the first fragment - * across the available PTLs. - */ -int mca_pml_uniq_send_request_schedule(mca_ptl_base_send_request_t* req); - - -/** - * Update the request to reflect the number of bytes delivered. If this - * was the first fragment - schedule the rest of the data. - */ -void mca_pml_uniq_send_request_progress( - struct mca_ptl_base_module_t* ptl, - mca_ptl_base_send_request_t* send_request, - size_t bytes_sent -); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif - diff --git a/ompi/mca/pml/uniq/pml_uniq_start.c b/ompi/mca/pml/uniq/pml_uniq_start.c deleted file mode 100644 index 97f41419b9..0000000000 --- a/ompi/mca/pml/uniq/pml_uniq_start.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_uniq.h" -#include "pml_uniq_recvreq.h" -#include "pml_uniq_sendreq.h" - - -int mca_pml_uniq_start(size_t count, ompi_request_t** requests) -{ - int rc; - size_t i; - for(i=0; ireq_ompi.req_state) { - case OMPI_REQUEST_INACTIVE: - if(pml_request->req_pml_complete == true) - break; - /* otherwise fall through */ - case OMPI_REQUEST_ACTIVE: { - - ompi_request_t *request; - OPAL_THREAD_LOCK(&ompi_request_lock); - if (pml_request->req_pml_complete == false) { - /* free request after it completes */ - pml_request->req_free_called = true; - } else { - /* can reuse the existing request */ - OPAL_THREAD_UNLOCK(&ompi_request_lock); - break; - } - - /* allocate a new request */ - switch(pml_request->req_type) { - case MCA_PML_REQUEST_SEND: { - mca_pml_base_send_mode_t sendmode = - ((mca_pml_base_send_request_t*)pml_request)->req_send_mode; - rc = mca_pml_uniq_isend_init( - pml_request->req_addr, - pml_request->req_count, - pml_request->req_datatype, - pml_request->req_peer, - pml_request->req_tag, - sendmode, - pml_request->req_comm, - &request); - break; - } - case MCA_PML_REQUEST_RECV: - rc = mca_pml_uniq_irecv_init( - pml_request->req_addr, - pml_request->req_count, - pml_request->req_datatype, - pml_request->req_peer, - pml_request->req_tag, - pml_request->req_comm, - &request); - break; - default: - rc = OMPI_ERR_REQUEST; - break; - } - OPAL_THREAD_UNLOCK(&ompi_request_lock); - if(OMPI_SUCCESS != rc) - return rc; - pml_request = (mca_pml_base_request_t*)request; - requests[i] = request; - break; - } - default: - return OMPI_ERR_REQUEST; - } - - /* start the request */ - switch(pml_request->req_type) { - case MCA_PML_REQUEST_SEND: - { - mca_ptl_base_send_request_t* sendreq = (mca_ptl_base_send_request_t*)pml_request; - MCA_PML_UNIQ_SEND_REQUEST_START(sendreq, rc); - if(rc != OMPI_SUCCESS) - return rc; - break; - } - case MCA_PML_REQUEST_RECV: - { - mca_ptl_base_recv_request_t* recvreq = (mca_ptl_base_recv_request_t*)pml_request; - if((rc = mca_pml_uniq_recv_request_start(recvreq)) != OMPI_SUCCESS) - return rc; - break; - } - default: - return OMPI_ERR_REQUEST; - } - } - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/ptl/Makefile.am b/ompi/mca/ptl/Makefile.am deleted file mode 100644 index af1920bcc0..0000000000 --- a/ompi/mca/ptl/Makefile.am +++ /dev/null @@ -1,41 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# main library setup -noinst_LTLIBRARIES = libmca_ptl.la -libmca_ptl_la_SOURCES = - -# header setup -nobase_ompi_HEADERS = - -# local files -headers = ptl.h -libmca_ptl_la_SOURCES += $(headers) - -# Conditionally install the header files -if WANT_INSTALL_HEADERS -nobase_ompi_HEADERS += $(headers) -ompidir = $(includedir)/openmpi/ompi/mca/ptl -else -ompidir = $(includedir) -endif - -include base/Makefile.am - -distclean-local: - rm -f base/static-components.h diff --git a/ompi/mca/ptl/base/Makefile.am b/ompi/mca/ptl/base/Makefile.am deleted file mode 100644 index 36b4596fb8..0000000000 --- a/ompi/mca/ptl/base/Makefile.am +++ /dev/null @@ -1,40 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -headers += \ - base/base.h \ - base/ptl_base_comm.h \ - base/ptl_base_fragment.h \ - base/ptl_base_header.h \ - base/ptl_base_match.h \ - base/ptl_base_recvfrag.h \ - base/ptl_base_recvreq.h \ - base/ptl_base_sendfrag.h \ - base/ptl_base_sendreq.h - -libmca_ptl_la_SOURCES += \ - base/ptl_base_close.c \ - base/ptl_base_comm.c \ - base/ptl_base_fragment.c \ - base/ptl_base_match.c \ - base/ptl_base_open.c \ - base/ptl_base_recvfrag.c \ - base/ptl_base_recvreq.c \ - base/ptl_base_select.c \ - base/ptl_base_sendfrag.c \ - base/ptl_base_sendreq.c diff --git a/ompi/mca/ptl/base/base.h b/ompi/mca/ptl/base/base.h deleted file mode 100644 index 54ffddb18f..0000000000 --- a/ompi/mca/ptl/base/base.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_BASE_H -#define MCA_PTL_BASE_H - -#include "ompi_config.h" - -#include "opal/class/opal_list.h" -#include "opal/mca/mca.h" -#include "ompi/mca/ptl/ptl.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -struct mca_ptl_base_selected_module_t { - opal_list_item_t super; - - mca_ptl_base_component_t *pbsm_component; - mca_ptl_base_module_t *pbsm_module; -}; -typedef struct mca_ptl_base_selected_module_t mca_ptl_base_selected_module_t; - - -/* - * Global functions for MCA: overall PTL open and close - */ - -OMPI_DECLSPEC int mca_ptl_base_open(void); -OMPI_DECLSPEC int mca_ptl_base_select(bool enable_progress_threads, - bool enable_mpi_threads); -OMPI_DECLSPEC int mca_ptl_base_close(void); - - -/* - * Globals - */ -OMPI_DECLSPEC extern int mca_ptl_base_output; -OMPI_DECLSPEC extern char* mca_ptl_base_include; -OMPI_DECLSPEC extern char* mca_ptl_base_exclude; -OMPI_DECLSPEC extern opal_list_t mca_ptl_base_components_opened; -OMPI_DECLSPEC extern opal_list_t mca_ptl_base_components_initialized; -OMPI_DECLSPEC extern opal_list_t mca_ptl_base_modules_initialized; - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif /* MCA_PTL_BASE_H */ diff --git a/ompi/mca/ptl/base/ptl_base_close.c b/ompi/mca/ptl/base/ptl_base_close.c deleted file mode 100644 index 2f10be9583..0000000000 --- a/ompi/mca/ptl/base/ptl_base_close.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include - -#include "ompi/constants.h" -#include "opal/event/event.h" -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/ptl/base/base.h" - -extern int mca_ptl_base_open_called; - -int mca_ptl_base_close(void) -{ - opal_list_item_t *item; - mca_ptl_base_selected_module_t *sm; - - if( 0 == mca_ptl_base_open_called ) return OMPI_ERROR; - mca_ptl_base_open_called = 0; - - /* disable event processing while cleaning up ptls */ - opal_event_disable(); - - /* Finalize all the ptl components and free their list items */ - - for (item = opal_list_remove_first(&mca_ptl_base_modules_initialized); - NULL != item; - item = opal_list_remove_first(&mca_ptl_base_modules_initialized)) { - sm = (mca_ptl_base_selected_module_t *) item; - - /* Blatently ignore the return code (what would we do to recover, - anyway? This component is going away, so errors don't matter - anymore) */ - - sm->pbsm_module->ptl_finalize(sm->pbsm_module); - free(sm); - } - - /* Close all remaining opened components (may be one if this is a - OMPI RTE program, or [possibly] multiple if this is ompi_info) */ - - if (0 != opal_list_get_size(&mca_ptl_base_components_initialized)) { - mca_base_components_close(mca_ptl_base_output, - &mca_ptl_base_components_initialized, NULL); - } - OBJ_DESTRUCT( &mca_ptl_base_components_initialized ); - OBJ_DESTRUCT( &mca_ptl_base_components_opened ); - - /* cleanup */ - if( NULL != mca_ptl_base_include ) { - free(mca_ptl_base_include); - mca_ptl_base_include = NULL; - } - if( NULL != mca_ptl_base_exclude ) { - free(mca_ptl_base_exclude); - mca_ptl_base_exclude = NULL; - } - - /* restore event processing */ - opal_event_enable(); - - /* All done */ - return OMPI_SUCCESS; -} diff --git a/ompi/mca/ptl/base/ptl_base_comm.c b/ompi/mca/ptl/base/ptl_base_comm.c deleted file mode 100644 index 8345de75cd..0000000000 --- a/ompi/mca/ptl/base/ptl_base_comm.c +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include - -#include "ompi/constants.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/base/ptl_base_comm.h" - -static void mca_pml_ptl_comm_construct(mca_pml_ptl_comm_t* comm); -static void mca_pml_ptl_comm_destruct(mca_pml_ptl_comm_t* comm); - - -opal_class_t mca_pml_ptl_comm_t_class = { - "mca_pml_ptl_comm_t", - OBJ_CLASS(opal_object_t), - (opal_construct_t)mca_pml_ptl_comm_construct, - (opal_destruct_t)mca_pml_ptl_comm_destruct -}; - - -static void mca_pml_ptl_comm_construct(mca_pml_ptl_comm_t* comm) -{ - OBJ_CONSTRUCT(&comm->c_wild_receives, opal_list_t); - OBJ_CONSTRUCT(&comm->c_matching_lock, opal_mutex_t); - comm->c_recv_seq = 0; -} - - -static void mca_pml_ptl_comm_destruct(mca_pml_ptl_comm_t* comm) -{ - free(comm->c_msg_seq); - free(comm->c_next_msg_seq); - free(comm->c_unexpected_frags); - free(comm->c_frags_cant_match); - free(comm->c_specific_receives); - OBJ_DESTRUCT(&comm->c_wild_receives); - OBJ_DESTRUCT(&comm->c_matching_lock); -} - - -int mca_pml_ptl_comm_init_size(mca_pml_ptl_comm_t* comm, size_t size) -{ - size_t i; - - /* send message sequence-number support - sender side */ - comm->c_msg_seq = malloc(sizeof(uint32_t) * size); - if(NULL == comm->c_msg_seq) - return OMPI_ERR_OUT_OF_RESOURCE; - memset(comm->c_msg_seq, 0, sizeof(uint32_t) * size); - - /* send message sequence-number support - receiver side */ - comm->c_next_msg_seq = malloc(sizeof(uint16_t) * size); - if(NULL == comm->c_next_msg_seq) - return OMPI_ERR_OUT_OF_RESOURCE; - memset(comm->c_next_msg_seq, 0, sizeof(uint16_t) * size); - - /* unexpected fragments queues */ - comm->c_unexpected_frags = malloc(sizeof(opal_list_t) * size); - if(NULL == comm->c_unexpected_frags) - return OMPI_ERR_OUT_OF_RESOURCE; - for(i=0; ic_unexpected_frags+i; - OBJ_CONSTRUCT(object, opal_list_t); - } - - /* out-of-order fragments queues */ - comm->c_frags_cant_match = malloc(sizeof(opal_list_t) * size); - if(NULL == comm->c_frags_cant_match) - return OMPI_ERR_OUT_OF_RESOURCE; - for(i=0; ic_frags_cant_match+i; - OBJ_CONSTRUCT(object, opal_list_t); - } - - /* queues of unmatched specific (source process specified) receives */ - comm->c_specific_receives = malloc(sizeof(opal_list_t) * size); - if(NULL == comm->c_specific_receives) - return OMPI_ERR_OUT_OF_RESOURCE; - for(i=0; ic_specific_receives+i; - OBJ_CONSTRUCT(object, opal_list_t); - } - return OMPI_SUCCESS; -} - - diff --git a/ompi/mca/ptl/base/ptl_base_comm.h b/ompi/mca/ptl/base/ptl_base_comm.h deleted file mode 100644 index 97a4f0723a..0000000000 --- a/ompi/mca/ptl/base/ptl_base_comm.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PML_COMM_H -#define MCA_PML_COMM_H - -#include "opal/threads/mutex.h" -#include "opal/threads/condition.h" -#include "ompi/mca/ptl/ptl.h" -#include "opal/class/opal_list.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif -OMPI_DECLSPEC extern opal_class_t mca_pml_ptl_comm_t_class; - -/** - * Cached on ompi_communicator_t to hold queues/state - * used by the PML<->PTL interface for matching logic. - */ -struct mca_pml_comm_t { - opal_object_t super; - uint32_t *c_msg_seq; /**< send message sequence number - sender side */ - uint16_t *c_next_msg_seq; /**< send message sequence number - receiver side */ - mca_ptl_sequence_t c_recv_seq; /**< recv request sequence number - receiver side */ - opal_mutex_t c_matching_lock; /**< matching lock */ - opal_list_t *c_unexpected_frags; /**< unexpected fragment queues */ - opal_list_t *c_frags_cant_match; /**< out-of-order fragment queues */ - opal_list_t *c_specific_receives; /**< queues of unmatched specific (source process specified) receives */ - opal_list_t c_wild_receives; /**< queue of unmatched wild (source process not specified) receives */ -}; -typedef struct mca_pml_comm_t mca_pml_ptl_comm_t; - - -/** - * Initialize an instance of mca_pml_ptl_comm_t based on the communicator size. - * - * @param comm Instance of mca_pml_ptl_comm_t - * @param size Size of communicator - * @return OMPI_SUCCESS or error status on failure. - */ - -OMPI_DECLSPEC extern int mca_pml_ptl_comm_init_size(mca_pml_ptl_comm_t* comm, size_t size); - -/** - * Obtain the next sequence number (MPI) for a given destination rank. - * - * @param comm Instance of mca_pml_ptl_comm_t - * @param dst Rank of destination. - * @return Next available sequence number. - */ - -static inline mca_ptl_sequence_t mca_pml_ptl_comm_send_sequence(mca_pml_ptl_comm_t* comm, int dst) -{ - volatile int32_t *msg_seq = (volatile int32_t*)(comm->c_msg_seq+dst); - return (mca_ptl_sequence_t)OPAL_THREAD_ADD32(msg_seq, 1)-1; -} - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif - diff --git a/ompi/mca/ptl/base/ptl_base_fragment.c b/ompi/mca/ptl/base/ptl_base_fragment.c deleted file mode 100644 index ea234635c1..0000000000 --- a/ompi/mca/ptl/base/ptl_base_fragment.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ - -#include "ompi_config.h" -#include "opal/class/opal_list.h" -#include "ompi/mca/ptl/base/ptl_base_fragment.h" - -static void mca_ptl_base_frag_construct(mca_ptl_base_frag_t* frag); -static void mca_ptl_base_frag_destruct(mca_ptl_base_frag_t* frag); - - -opal_class_t mca_ptl_base_frag_t_class = { - "mca_ptl_base_frag_t", - OBJ_CLASS(opal_list_item_t), - (opal_construct_t) mca_ptl_base_frag_construct, - (opal_destruct_t) mca_ptl_base_frag_destruct -}; - -static void mca_ptl_base_frag_construct(mca_ptl_base_frag_t* frag) -{ - OBJ_CONSTRUCT(&frag->frag_convertor, ompi_convertor_t); -} - -static void mca_ptl_base_frag_destruct(mca_ptl_base_frag_t* frag) -{ -} - diff --git a/ompi/mca/ptl/base/ptl_base_fragment.h b/ompi/mca/ptl/base/ptl_base_fragment.h deleted file mode 100644 index 666208307b..0000000000 --- a/ompi/mca/ptl/base/ptl_base_fragment.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_BASE_FRAGMENT_H -#define MCA_PTL_BASE_FRAGMENT_H - -#include "opal/class/opal_list.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/datatype/datatype.h" -#include "ompi/datatype/convertor.h" -#include "ompi/mca/ptl/base/ptl_base_header.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/** - * Type of fragment - */ - -typedef enum { - MCA_PTL_FRAGMENT_SEND, - MCA_PTL_FRAGMENT_RECV -} mca_ptl_base_frag_type_t; - -/** - * Base type for fragment descriptors. - */ -struct mca_ptl_base_frag_t { - opal_list_item_t super; /**< allow the fragment to be placed on a list */ - mca_ptl_base_header_t frag_header; /**< header used for fragment matching */ - struct mca_ptl_base_module_t* frag_owner; /**< PTL that allocated this fragment */ - struct mca_ptl_base_peer_t* frag_peer; /**< PTL specific addressing info */ - void *frag_addr; /**< pointer into request buffer at fragment offset */ - size_t frag_size; /**< number of bytes available in request buffer */ - mca_ptl_base_frag_type_t frag_type; /**< fragment derived type */ - ompi_convertor_t frag_convertor; /**< datatype convertor for fragment packing/unpacking */ -}; -typedef struct mca_ptl_base_frag_t mca_ptl_base_frag_t; - -OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_ptl_base_frag_t); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif - diff --git a/ompi/mca/ptl/base/ptl_base_header.h b/ompi/mca/ptl/base/ptl_base_header.h deleted file mode 100644 index 0cff62287d..0000000000 --- a/ompi/mca/ptl/base/ptl_base_header.h +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_BASE_HEADER_H -#define MCA_PTL_BASE_HEADER_H - -#include "ompi_config.h" -#include "ompi/mca/ptl/ptl.h" -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_NETINET_IN_H -#include -#endif - -#define MCA_PTL_HDR_TYPE_MATCH 1 -#define MCA_PTL_HDR_TYPE_RNDV 2 -#define MCA_PTL_HDR_TYPE_FRAG 3 -#define MCA_PTL_HDR_TYPE_ACK 4 -#define MCA_PTL_HDR_TYPE_NACK 5 -#define MCA_PTL_HDR_TYPE_GET 6 -#define MCA_PTL_HDR_TYPE_FIN 7 -#define MCA_PTL_HDR_TYPE_FIN_ACK 8 -#define MCA_PTL_HDR_TYPE_MAX 9 - -#define MCA_PTL_FLAGS_ACK 1 /* is an ack required */ -#define MCA_PTL_FLAGS_NBO 2 /* is the header in network byte order */ - - -/* - * Convert a 64 bit value to network byte order. - */ - -static inline uint64_t hton64(uint64_t val) -{ - union { uint64_t ll; - uint32_t l[2]; - } w, r; - - /* platform already in network byte order? */ - if(htonl(1) == 1L) - return val; - w.ll = val; - r.l[0] = htonl(w.l[1]); - r.l[1] = htonl(w.l[0]); - return r.ll; -} - - -/* - * Convert a 64 bit value from network to host byte order. - */ - -static inline uint64_t ntoh64(uint64_t val) -{ - union { uint64_t ll; - uint32_t l[2]; - } w, r; - - /* platform already in network byte order? */ - if(htonl(1) == 1L) - return val; - w.ll = val; - r.l[0] = ntohl(w.l[1]); - r.l[1] = ntohl(w.l[0]); - return r.ll; -} - - -/** - * Common header attributes - must be first element in each header type - */ -struct mca_ptl_base_common_header_t { - uint8_t hdr_type; /**< type of envelope */ - uint8_t hdr_flags; /**< flags indicating how fragment should be processed */ -}; -typedef struct mca_ptl_base_common_header_t mca_ptl_base_common_header_t; - -#define MCA_PTL_BASE_COMMON_HDR_NTOH(h) - -#define MCA_PTL_BASE_COMMON_HDR_HTON(h) - -/** - * Header definition for the first fragment, contains the - * attributes required to match the corresponding posted receive. - */ -struct mca_ptl_base_match_header_t { - mca_ptl_base_common_header_t hdr_common; /**< common attributes */ - uint16_t hdr_contextid; /**< communicator index */ - int32_t hdr_src; /**< source rank */ - int32_t hdr_dst; /**< destination rank */ - int32_t hdr_tag; /**< user tag */ - uint64_t hdr_msg_length; /**< message length */ - uint16_t hdr_msg_seq; /**< message sequence number */ -}; -typedef struct mca_ptl_base_match_header_t mca_ptl_base_match_header_t; - -#define MCA_PTL_BASE_MATCH_HDR_NTOH(h) \ - do { \ - MCA_PTL_BASE_COMMON_HDR_NTOH((h).hdr_common); \ - (h).hdr_contextid = ntohs((h).hdr_contextid); \ - (h).hdr_src = ntohl((h).hdr_src); \ - (h).hdr_dst = ntohl((h).hdr_dst); \ - (h).hdr_tag = ntohl((h).hdr_tag); \ - (h).hdr_msg_length = ntoh64((h).hdr_msg_length); \ - (h).hdr_msg_seq = ntohs((h).hdr_msg_seq); \ - } while (0) - -#define MCA_PTL_BASE_MATCH_HDR_HTON(h) \ - do { \ - MCA_PTL_BASE_COMMON_HDR_HTON((h).hdr_common); \ - (h).hdr_contextid = htons((h).hdr_contextid); \ - (h).hdr_src = htonl((h).hdr_src); \ - (h).hdr_dst = htonl((h).hdr_dst); \ - (h).hdr_tag = htonl((h).hdr_tag); \ - (h).hdr_msg_length = hton64((h).hdr_msg_length); \ - (h).hdr_msg_seq = htons((h).hdr_msg_seq); \ - } while (0) - -/** - * Header definition for the first fragment when an acknowledgment - * is required. This could be the first fragment of a large message - * or a short message that requires an ack (synchronous). - */ -struct mca_ptl_base_rendezvous_header_t { - mca_ptl_base_match_header_t hdr_match; - uint64_t hdr_frag_length; /**< fragment length */ - ompi_ptr_t hdr_src_ptr; /**< pointer to source fragment - returned in ack */ -}; -typedef struct mca_ptl_base_rendezvous_header_t mca_ptl_base_rendezvous_header_t; - -#define MCA_PTL_BASE_RNDV_HDR_NTOH(h) \ - do { \ - MCA_PTL_BASE_MATCH_HDR_NTOH((h).hdr_match); \ - (h).hdr_frag_length = ntoh64((h).hdr_frag_length); \ - } while (0) - -#define MCA_PTL_BASE_RNDV_HDR_HTON(h) \ - do { \ - MCA_PTL_BASE_MATCH_HDR_HTON((h).hdr_match); \ - (h).hdr_frag_length = hton64((h).hdr_frag_length); \ - } while (0) - -/** - * Header for subsequent fragments. - */ -struct mca_ptl_base_frag_header_t { - mca_ptl_base_common_header_t hdr_common; /**< common attributes */ - uint64_t hdr_frag_length; /**< fragment length */ - uint64_t hdr_frag_offset; /**< offset into message */ - ompi_ptr_t hdr_src_ptr; /**< pointer to source fragment */ - ompi_ptr_t hdr_dst_ptr; /**< pointer to matched receive */ -}; -typedef struct mca_ptl_base_frag_header_t mca_ptl_base_frag_header_t; - -#define MCA_PTL_BASE_FRAG_HDR_NTOH(h) \ - do { \ - MCA_PTL_BASE_COMMON_HDR_NTOH((h).hdr_common); \ - (h).hdr_frag_length = ntoh64((h).hdr_frag_length); \ - (h).hdr_frag_offset = ntoh64((h).hdr_frag_offset); \ - } while (0) - -#define MCA_PTL_BASE_FRAG_HDR_HTON(h) \ - do { \ - MCA_PTL_BASE_COMMON_HDR_HTON((h).hdr_common); \ - (h).hdr_frag_length = hton64((h).hdr_frag_length); \ - (h).hdr_frag_offset = hton64((h).hdr_frag_offset); \ - } while (0) - - -/** - * Header used to acknowledgment outstanding fragment(s). - */ -struct mca_ptl_base_ack_header_t { - mca_ptl_base_common_header_t hdr_common; /**< common attributes */ - ompi_ptr_t hdr_src_ptr; /**< source fragment */ - ompi_ptr_t hdr_dst_match; /**< matched receive request */ - ompi_ptr_t hdr_dst_addr; /**< posted receive buffer */ - uint64_t hdr_dst_size; /**< size of posted buffer */ - /* sequence range? */ -}; -typedef struct mca_ptl_base_ack_header_t mca_ptl_base_ack_header_t; - -#define MCA_PTL_BASE_ACK_HDR_NTOH(h) \ - do { \ - MCA_PTL_BASE_COMMON_HDR_NTOH(h.hdr_common); \ - (h).hdr_dst_size = ntoh64((h).hdr_dst_size); \ - } while (0) - -#define MCA_PTL_BASE_ACK_HDR_HTON(h) \ - do { \ - MCA_PTL_BASE_COMMON_HDR_HTON((h).hdr_common); \ - (h).hdr_dst_size = hton64((h).hdr_dst_size); \ - } while (0) - -/** - * Union of defined header types. - */ -union mca_ptl_base_header_t { - mca_ptl_base_common_header_t hdr_common; - mca_ptl_base_match_header_t hdr_match; - mca_ptl_base_rendezvous_header_t hdr_rndv; - mca_ptl_base_frag_header_t hdr_frag; - mca_ptl_base_ack_header_t hdr_ack; -}; -typedef union mca_ptl_base_header_t mca_ptl_base_header_t; - - -#endif diff --git a/ompi/mca/ptl/base/ptl_base_match.c b/ompi/mca/ptl/base/ptl_base_match.c deleted file mode 100644 index 996f1d434d..0000000000 --- a/ompi/mca/ptl/base/ptl_base_match.c +++ /dev/null @@ -1,674 +0,0 @@ -/** @file */ - -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include - -#include "opal/class/opal_list.h" -#include "opal/threads/mutex.h" -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/ptl/base/ptl_base_comm.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" -#include "ompi/mca/ptl/base/ptl_base_header.h" -#include "ompi/mca/ptl/base/ptl_base_match.h" - - -/** - * Try and match the incoming message fragment to the list of - * "wild" receives - * - * @param frag_header Matching data from recived fragment (IN) - * - * @param pml_comm Pointer to the communicator structure used for - * matching purposes. (IN) - * - * @return Matched receive - * - * This routine assumes that the appropriate matching locks are - * set by the upper level routine. - */ - -#define MCA_PTL_BASE_CHECK_WILD_RECEIVES_FOR_MATCH(frag_header,pml_comm,return_match) \ -do { \ - /* local parameters */ \ - opal_list_t* wild_receives = &pml_comm->c_wild_receives; \ - mca_ptl_base_recv_request_t *wild_recv; \ - int frag_tag,recv_tag; \ - \ - /* initialization */ \ - frag_tag=frag_header->hdr_tag; \ - \ - /* \ - * Loop over the wild irecvs - no need to lock, the upper level \ - * locking is protecting from having other threads trying to \ - * change this list. \ - */ \ - for(wild_recv = (mca_ptl_base_recv_request_t *) \ - opal_list_get_first(wild_receives); \ - wild_recv != (mca_ptl_base_recv_request_t *) \ - opal_list_get_end(wild_receives); \ - wild_recv = (mca_ptl_base_recv_request_t *) \ - ((opal_list_item_t *)wild_recv)->opal_list_next) { \ - \ - recv_tag = wild_recv->req_recv.req_base.req_tag; \ - if ( \ - /* exact tag match */ \ - (frag_tag == recv_tag) || \ - /* wild tag match - negative tags (except for \ - * OMPI_ANY_TAG) are reserved for internal use, and will \ - * not be matched with OMPI_ANY_TAG */ \ - ( (recv_tag == OMPI_ANY_TAG) && (0 <= frag_tag) ) ) \ - \ - { \ - /* \ - * Mark that this is the matching irecv, and go to process it. \ - */ \ - return_match = wild_recv; \ - \ - /* remove this irecv from the postd wild ireceive list */ \ - opal_list_remove_item(wild_receives, \ - (opal_list_item_t *)wild_recv); \ -\ - /* found match - no need to continue */ \ - break; \ - } \ - } \ -} while(0) - - -/** - * Try and match the incoming message fragment to the list of - * "specific" receives - * - * @param frag_header Matching data from recived fragment (IN) - * - * @param pml_comm Pointer to the communicator structure used for - * matching purposes. (IN) - * - * @return Matched receive - * - * This routine assumes that the appropriate matching locks are - * set by the upper level routine. - */ -#define MCA_PTL_BASE_CHECK_SPECIFIC_RECEIVES_FOR_MATCH(frag_header, pml_comm, return_match) \ -do { \ - /* local variables */ \ - opal_list_t* specific_receives = (pml_comm->c_specific_receives)+frag_src; \ - mca_ptl_base_recv_request_t *specific_recv; \ - int frag_src,recv_tag,frag_tag; \ - \ - /* initialization */ \ - frag_src = frag_header->hdr_src; \ - frag_tag=frag_header->hdr_tag; \ - \ - /* \ - * Loop over the specific irecvs. \ - */ \ - for(specific_recv = (mca_ptl_base_recv_request_t *) \ - opal_list_get_first(specific_receives); \ - specific_recv != (mca_ptl_base_recv_request_t *) \ - opal_list_get_end(specific_receives); \ - specific_recv = (mca_ptl_base_recv_request_t *) \ - ((opal_list_item_t *)specific_recv)->opal_list_next) { \ - /* \ - * Check for a match \ - */ \ - recv_tag = specific_recv->req_recv.req_base.req_tag; \ - if ( (frag_tag == recv_tag) || \ - ( (recv_tag == OMPI_ANY_TAG) && (0 <= frag_tag) ) ) { \ - \ - /* \ - * Match made \ - */ \ - return_match = specific_recv; \ - \ - /* remove descriptor from posted specific ireceive list */ \ - opal_list_remove_item(specific_receives, \ - (opal_list_item_t *)specific_recv); \ - \ - break; \ - } \ - } \ -} while(0) - -/** - * Try and match the incoming message fragment to the list of - * "wild" receives and "specific" receives. Used when both types - * of receives have been posted, i.e. when we need to coordinate - * between multiple lists to make sure ordered delivery occurs. - * - * @param frag_header Matching data from recived fragment (IN) - * - * @param pml_comm Pointer to the communicator structure used for - * matching purposes. (IN) - * - * @return Matched receive - * - * This routine assumes that the appropriate matching locks are - * set by the upper level routine. - */ - -#define MCA_PTL_BASE_CHECK_SPECIFIC_AND_WILD_RECEIVES_FOR_MATCH( \ - frag_header, pml_comm, return_match) \ -do { \ - /* local variables */ \ - mca_ptl_base_recv_request_t *specific_recv, *wild_recv; \ - mca_ptl_sequence_t wild_recv_seq, specific_recv_seq; \ - int frag_src,frag_tag, wild_recv_tag, specific_recv_tag; \ - \ - /* initialization */ \ - frag_src = frag_header->hdr_src; \ - frag_tag=frag_header->hdr_tag; \ - \ - /* \ - * We know that when this is called, both specific and wild irecvs \ - * have been posted. \ - */ \ - specific_recv = (mca_ptl_base_recv_request_t *) \ - opal_list_get_first((pml_comm->c_specific_receives)+frag_src); \ - wild_recv = (mca_ptl_base_recv_request_t *) \ - opal_list_get_first(&(pml_comm->c_wild_receives)); \ - \ - specific_recv_seq = specific_recv->req_recv.req_base.req_sequence; \ - wild_recv_seq = wild_recv->req_recv.req_base.req_sequence; \ - \ - while (true) { \ - if (wild_recv_seq < specific_recv_seq) { \ - /* \ - * wild recv is earlier than the specific one. \ - */ \ - /* \ - * try and match \ - */ \ - wild_recv_tag = wild_recv->req_recv.req_base.req_tag; \ - if ( (frag_tag == wild_recv_tag) || \ - ( (wild_recv_tag == OMPI_ANY_TAG) && (0 <= frag_tag) ) ) { \ - /* \ - * Match made \ - */ \ - return_match=wild_recv; \ - \ - /* remove this recv from the wild receive queue */ \ - opal_list_remove_item(&(pml_comm->c_wild_receives), \ - (opal_list_item_t *)wild_recv); \ - break; \ - } \ - \ - /* \ - * No match, go to the next. \ - */ \ - wild_recv=(mca_ptl_base_recv_request_t *) \ - ((opal_list_item_t *)wild_recv)->opal_list_next; \ - \ - /* \ - * If that was the last wild one, just look at the \ - * rest of the specific ones. \ - */ \ - if (wild_recv == (mca_ptl_base_recv_request_t *) \ - opal_list_get_end(&(pml_comm->c_wild_receives)) ) \ - { \ - MCA_PTL_BASE_CHECK_SPECIFIC_RECEIVES_FOR_MATCH(frag_header, pml_comm, return_match); \ - break; \ - } \ - \ - /* \ - * Get the sequence number for this recv, and go \ - * back to the top of the loop. \ - */ \ - wild_recv_seq = wild_recv->req_recv.req_base.req_sequence; \ - \ - } else { \ - /* \ - * specific recv is earlier than the wild one. \ - */ \ - specific_recv_tag=specific_recv->req_recv.req_base.req_tag; \ - if ( (frag_tag == specific_recv_tag) || \ - ( (specific_recv_tag == OMPI_ANY_TAG) && (0<=frag_tag)) ) \ - { \ - /* \ - * Match made \ - */ \ - return_match = specific_recv; \ - /* remove descriptor from specific receive list */ \ - opal_list_remove_item((pml_comm->c_specific_receives)+frag_src, \ - (opal_list_item_t *)specific_recv); \ - break; \ - } \ - \ - /* \ - * No match, go on to the next specific irecv. \ - */ \ - specific_recv = (mca_ptl_base_recv_request_t *) \ - ((opal_list_item_t *)specific_recv)->opal_list_next; \ - \ - /* \ - * If that was the last specific irecv, process the \ - * rest of the wild ones. \ - */ \ - if (specific_recv == (mca_ptl_base_recv_request_t *) \ - opal_list_get_end((pml_comm->c_specific_receives)+frag_src) ) \ - { \ - MCA_PTL_BASE_CHECK_WILD_RECEIVES_FOR_MATCH(frag_header, pml_comm, return_match); \ - break; \ - } \ - /* \ - * Get the sequence number for this recv, and go \ - * back to the top of the loop. \ - */ \ - specific_recv_seq = specific_recv->req_recv.req_base.req_sequence; \ - } \ - } \ -} while(0) - - -/* - * Specialized matching routines for internal use only. - */ - -static bool mca_ptl_base_check_cantmatch_for_match( - opal_list_t *additional_matches, - mca_pml_ptl_comm_t *pml_comm, int frag_src); - - -/** - * RCS/CTS receive side matching - * - * @param frag_header list of parameters needed for matching - * This list is also embeded in frag_desc, - * but this allows to save a memory copy when - * a match is made in this routine. (IN) - * @param frag_desc pointer to receive fragment which we want - * to match (IN/OUT). If a match is not made, - * frag_header is copied to frag_desc. - * @param match_made parameter indicating if we matched frag_desc/ - * frag_header (OUT) - * @param additional_matches if a match is made with frag_desc, we - * may be able to match fragments that previously - * have arrived out-of-order. If this is the - * case, the associated fragment descriptors are - * put on this list for further processing. (OUT) - * - * @return OMPI error code - * - * This routine is used to try and match a newly arrived message fragment - * to pre-posted receives. The following assumptions are made - * - fragments are received out of order - * - for long messages, e.g. more than one fragment, a RTS/CTS algorithm - * is used. - * - 2nd and greater fragments include a receive descriptor pointer - * - fragments may be dropped - * - fragments may be corrupt - * - this routine may be called simultaneously by more than one thread - */ -bool mca_ptl_base_match( - mca_ptl_base_match_header_t *frag_header, - mca_ptl_base_recv_frag_t *frag_desc, - opal_list_t *additional_matches, - bool* additional_match) -{ - /* local variables */ - uint16_t next_msg_seq_expected, frag_msg_seq; - - ompi_communicator_t *comm_ptr; - mca_ptl_base_recv_request_t *matched_receive = NULL; - mca_pml_ptl_comm_t *pml_comm; - int frag_src; - bool match_made=false; - - /* communicator pointer */ - comm_ptr=ompi_comm_lookup(frag_header->hdr_contextid); - pml_comm=(mca_pml_ptl_comm_t *)comm_ptr->c_pml_comm; - - /* source sequence number */ - frag_msg_seq = frag_header->hdr_msg_seq; - - /* get fragment communicator source rank */ - frag_src = frag_header->hdr_src; - - /* get next expected message sequence number - if threaded - * run, lock to make sure that if another thread is processing - * a frag from the same message a match is made only once. - * Also, this prevents other posted receives (for a pair of - * end points) from being processed, and potentially "loosing" - * the fragment. - */ - OPAL_THREAD_LOCK(&pml_comm->c_matching_lock); - - /* get sequence number of next message that can be processed */ - next_msg_seq_expected = (uint16_t)*((pml_comm->c_next_msg_seq)+frag_src); - if (frag_msg_seq == next_msg_seq_expected) { - - /* - * This is the sequence number we were expecting, - * so we can try matching it to already posted - * receives. - */ - - /* We're now expecting the next sequence number. */ - (pml_comm->c_next_msg_seq[frag_src])++; - - /* - * figure out what sort of matching logic to use, if need to - * look only at "specific" receives, or "wild" receives, - * or if we need to traverse both sets at the same time. - */ - if (opal_list_get_size((pml_comm->c_specific_receives)+frag_src) == 0 ){ - /* - * There are only wild irecvs, so specialize the algorithm. - */ - MCA_PTL_BASE_CHECK_WILD_RECEIVES_FOR_MATCH(frag_header, pml_comm, matched_receive); - - } else if (opal_list_get_size(&(pml_comm->c_wild_receives)) == 0 ) { - /* - * There are only specific irecvs, so specialize the algorithm. - */ - MCA_PTL_BASE_CHECK_SPECIFIC_RECEIVES_FOR_MATCH(frag_header, pml_comm, matched_receive); - } else { - /* - * There are some of each. - */ - MCA_PTL_BASE_CHECK_SPECIFIC_AND_WILD_RECEIVES_FOR_MATCH(frag_header, pml_comm, matched_receive); - } - - /* if match found, process data */ - if (matched_receive) { - - /* set flag indicating the input fragment was matched */ - match_made = true; - - /* associate the receive descriptor with the fragment descriptor */ - frag_desc->frag_request=matched_receive; - - /* set lenght of incoming message */ - matched_receive->req_recv.req_bytes_packed = frag_header->hdr_msg_length; - - /* - * update delivered sequence number information, if needed. - */ - if( (matched_receive->req_recv.req_base.req_type == MCA_PML_REQUEST_PROBE) ) { - /* Match a probe, rollback the next expected sequence number */ - (pml_comm->c_next_msg_seq[frag_src])--; - } - } else { - /* if no match found, place on unexpected queue */ - opal_list_append( ((pml_comm->c_unexpected_frags)+frag_src), - (opal_list_item_t *)frag_desc ); - } - - /* - * Now that new message has arrived, check to see if - * any fragments on the c_c_frags_cant_match list - * may now be used to form new matchs - */ - if (0 < opal_list_get_size((pml_comm->c_frags_cant_match)+frag_src)) { - - *additional_match = mca_ptl_base_check_cantmatch_for_match(additional_matches,pml_comm,frag_src); - - } - - } else { - - /* - * This message comes after the next expected, so it - * is ahead of sequence. Save it for later. - */ - opal_list_append( ((pml_comm->c_frags_cant_match)+frag_src), - (opal_list_item_t *)frag_desc); - } - - OPAL_THREAD_UNLOCK(&pml_comm->c_matching_lock); - return match_made; -} - - -/** - * Scan the list of frags that came in ahead of time to see if any - * can be processed at this time. If they can, try and match the - * frags. - * - * @param additional_matches List to hold new matches with fragments - * from the c_frags_cant_match list. (IN/OUT) - * - * @param pml_comm Pointer to the communicator structure used for - * matching purposes. (IN) - * - * This routine assumes that the appropriate matching locks are - * set by the upper level routine. - */ - -static bool mca_ptl_base_check_cantmatch_for_match(opal_list_t *additional_matches, - mca_pml_ptl_comm_t *pml_comm, int frag_src) -{ - /* local parameters */ - int match_found; - uint16_t next_msg_seq_expected, frag_seq; - mca_ptl_base_recv_frag_t *frag_desc; - mca_ptl_base_recv_request_t *matched_receive = NULL; - bool match_made = false; - - /* - * Loop over all the out of sequence messages. No ordering is assumed - * in the c_frags_cant_match list. - */ - - match_found = 1; - while ((0 < opal_list_get_size((pml_comm->c_frags_cant_match)+frag_src)) && - match_found) { - - /* initialize match flag for this search */ - match_found = 0; - - /* get sequence number of next message that can be processed */ - next_msg_seq_expected = *((pml_comm->c_next_msg_seq)+frag_src); - - /* search the list for a fragment from the send with sequence - * number next_msg_seq_expected - */ - for(frag_desc = (mca_ptl_base_recv_frag_t *) - opal_list_get_first((pml_comm->c_frags_cant_match)+frag_src); - frag_desc != (mca_ptl_base_recv_frag_t *) - opal_list_get_end((pml_comm->c_frags_cant_match)+frag_src); - frag_desc = (mca_ptl_base_recv_frag_t *) - opal_list_get_next(frag_desc)) - { - /* - * If the message has the next expected seq from that proc... - */ - frag_seq=frag_desc->frag_base.frag_header.hdr_match.hdr_msg_seq; - if (frag_seq == next_msg_seq_expected) { - mca_ptl_base_match_header_t* frag_header = - &frag_desc->frag_base.frag_header.hdr_match; - - /* We're now expecting the next sequence number. */ - (pml_comm->c_next_msg_seq[frag_src])++; - - /* signal that match was made */ - match_found = 1; - - /* - * remove frag_desc from list - */ - opal_list_remove_item((pml_comm->c_frags_cant_match)+frag_src, - (opal_list_item_t *)frag_desc); - - /* - * figure out what sort of matching logic to use, if need to - * look only at "specific" receives, or "wild" receives, - * or if we need to traverse both sets at the same time. - */ - frag_src = frag_header->hdr_src; - if (opal_list_get_size((pml_comm->c_specific_receives)+frag_src) == 0 ) { - /* - * There are only wild irecvs, so specialize the algorithm. - */ - MCA_PTL_BASE_CHECK_WILD_RECEIVES_FOR_MATCH(frag_header, pml_comm, matched_receive); - } else if (opal_list_get_size(&(pml_comm->c_wild_receives)) == 0 ) { - /* - * There are only specific irecvs, so specialize the algorithm. - */ - MCA_PTL_BASE_CHECK_SPECIFIC_RECEIVES_FOR_MATCH(frag_header, pml_comm, matched_receive); - } else { - /* - * There are some of each. - */ - MCA_PTL_BASE_CHECK_SPECIFIC_AND_WILD_RECEIVES_FOR_MATCH(frag_header, pml_comm, matched_receive); - } - - /* if match found, process data */ - if (matched_receive) { - - /* associate the receive descriptor with the fragment - * descriptor */ - frag_desc->frag_request=matched_receive; - - /* add this fragment descriptor to the list of - * descriptors to be processed later - */ - if(match_made == false) { - match_made = true; - OBJ_CONSTRUCT(additional_matches, opal_list_t); - } - opal_list_append(additional_matches, (opal_list_item_t *)frag_desc); - - } else { - - /* if no match found, place on unexpected queue */ - opal_list_append( ((pml_comm->c_unexpected_frags)+frag_src), - (opal_list_item_t *)frag_desc); - - } - - /* c_frags_cant_match is not an ordered list, so exit loop - * and re-start search for next sequence number */ - break; - - } /* end if (frag_seq == next_msg_seq_expected) */ - - } /* end for (frag_desc) loop */ - - } /* end while loop */ - - return match_made; -} - -/** - * RCS/CTS receive side matching - * - * @param frag_header list of parameters needed for matching - * This list is also embeded in frag_desc, - * but this allows to save a memory copy when - * a match is made in this routine. (IN) - * @param frag_desc pointer to receive fragment which we want - * to match (IN/OUT). If a match is not made, - * frag_header is copied to frag_desc. - * @param match_made parameter indicating if we matched frag_desc/ - * frag_header (OUT) - * @return indication if match was made or not. - * - * This routine is used to try and match a newly arrived message fragment - * to pre-posted receives. The following assumptions are made - * - fragments are received in order, so no explicit sequence - * tracking is needed. - * - for long messages, e.g. more than one fragment, a RTS/CTS algorithm - * is used. - * - 2nd and greater fragments include a receive descriptor pointer - * - this routine may be called simoultaneously by more than one thread - * - * On return, if match is made: - * neither the fragment, nor the matched receive descriptor - * are on any list - * if match is not made: - * The fragment is placed on the unexpected fragment list - */ -bool mca_ptl_base_match_in_order_network_delivery( - mca_ptl_base_match_header_t *frag_header, - struct mca_ptl_base_recv_frag_t *frag_desc) -{ - /* local variables */ - ompi_communicator_t *comm_ptr; - mca_ptl_base_recv_request_t *matched_receive = NULL; - mca_pml_ptl_comm_t *pml_comm; - int frag_src; - - bool match_made=false; - - /* communicator pointer */ - comm_ptr=ompi_comm_lookup(frag_header->hdr_contextid); - pml_comm=(mca_pml_ptl_comm_t *)comm_ptr->c_pml_comm; - - /* get fragment communicator source rank */ - frag_src = frag_header->hdr_src; - - /* get next expected message sequence number - if threaded - * run, lock to make sure that if another thread is processing - * a frag from the same message a match is made only once. - * Also, this prevents other posted receives (for a pair of - * end points) from being processed, and potentially "loosing" - * the fragment. - */ - OPAL_THREAD_LOCK(&pml_comm->c_matching_lock); - - /* - * figure out what sort of matching logic to use, if need to - * look only at "specific" receives, or "wild" receives, - * or if we need to traverse both sets at the same time. - */ - if (opal_list_get_size((pml_comm->c_specific_receives)+frag_src) == 0 ){ - /* - * There are only wild irecvs, so specialize the algorithm. - */ - MCA_PTL_BASE_CHECK_WILD_RECEIVES_FOR_MATCH(frag_header, pml_comm, matched_receive); - - } else if (opal_list_get_size(&(pml_comm->c_wild_receives)) == 0 ) { - /* - * There are only specific irecvs, so specialize the algorithm. - */ - MCA_PTL_BASE_CHECK_SPECIFIC_RECEIVES_FOR_MATCH(frag_header, pml_comm, matched_receive); - } else { - /* - * There are some of each. - */ - MCA_PTL_BASE_CHECK_SPECIFIC_AND_WILD_RECEIVES_FOR_MATCH(frag_header, pml_comm, matched_receive); - } - - /* if match found, process data */ - if (matched_receive) { - /* set flag indicating the input fragment was matched */ - match_made=true; - - /* associate the receive descriptor with the fragment descriptor */ - frag_desc->frag_request=matched_receive; - - /* set lenght of incoming message */ - matched_receive->req_recv.req_bytes_packed=frag_header->hdr_msg_length; - - } else { - /* if no match found, place on unexpected queue */ - opal_list_append( ((pml_comm->c_unexpected_frags)+frag_src), - (opal_list_item_t *)frag_desc); - } - - - OPAL_THREAD_UNLOCK(&pml_comm->c_matching_lock); - return match_made; -} diff --git a/ompi/mca/ptl/base/ptl_base_match.h b/ompi/mca/ptl/base/ptl_base_match.h deleted file mode 100644 index 58130aecca..0000000000 --- a/ompi/mca/ptl/base/ptl_base_match.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_BASE_MATCH_H -#define MCA_PTL_BASE_MATCH_H -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -struct mca_ptl_base_recv_frag_t; - -/** - * RCS/CTS receive side matching - * Match incoming fragments against posted receives. Out of order - * delivery. - * - * @param frag_header (IN) Header of received fragment. - * @param frag_desc (IN) Received fragment descriptor. - * @param match_made (OUT) Flag indicating wether a match was made. - * @param additional_matches (OUT) List of additional matches - * @return OMPI_SUCCESS or error status on failure. - */ -OMPI_DECLSPEC bool mca_ptl_base_match( - mca_ptl_base_match_header_t *frag_header, - struct mca_ptl_base_recv_frag_t *frag_desc, - opal_list_t *additional_matches, - bool* additional_matched); - -/** - * RCS/CTS receive side matching - * - * @param frag_header list of parameters needed for matching - * This list is also embeded in frag_desc, - * but this allows to save a memory copy when - * a match is made in this routine. (IN) - * @param frag_desc pointer to receive fragment which we want - * to match (IN/OUT). If a match is not made, - * frag_header is copied to frag_desc. - * @param match_made parameter indicating if we matched frag_desc/ - * frag_header (OUT) - * @return indication if match was made or not. - * - * This routine is used to try and match a newly arrived message fragment - * to pre-posted receives. The following assumptions are made - * - fragments are received in order, so no explicit sequence - * tracking is needed. - * - for long messages, e.g. more than one fragment, a RTS/CTS algorithm - * is used. - * - 2nd and greater fragments include a receive descriptor pointer - * - this routine may be called simoultaneously by more than one thread - */ -OMPI_DECLSPEC bool mca_ptl_base_match_in_order_network_delivery( - mca_ptl_base_match_header_t *frag_header, - struct mca_ptl_base_recv_frag_t *frag_desc); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif /* MCA_PTL_BASE_MATCH_H */ - diff --git a/ompi/mca/ptl/base/ptl_base_open.c b/ompi/mca/ptl/base/ptl_base_open.c deleted file mode 100644 index f8d783b192..0000000000 --- a/ompi/mca/ptl/base/ptl_base_open.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" -#include - -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/mca/base/mca_base_param.h" -#include "ompi/constants.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/ptl/base/base.h" - - -/* - * The following file was created by configure. It contains extern - * statements and the definition of an array of pointers to each - * component's public mca_base_component_t struct. - */ - -#include "ompi/mca/ptl/base/static-components.h" - - -/* - * Global variables - */ -int mca_ptl_base_output = -1; -char* mca_ptl_base_include = NULL; -char* mca_ptl_base_exclude = NULL; -opal_list_t mca_ptl_base_components_opened; -opal_list_t mca_ptl_base_components_initialized; -opal_list_t mca_ptl_base_modules_initialized; -int mca_ptl_base_open_called = 0; - -/** - * Function for finding and opening either all MCA components, or the one - * that was specifically requested via a MCA parameter. - */ -int mca_ptl_base_open(void) -{ - - if( 0 != mca_ptl_base_open_called ) return OMPI_SUCCESS; - mca_ptl_base_open_called = 1; - - /* Open up all available components */ - if (OMPI_SUCCESS != - mca_base_components_open("ptl", 0, mca_ptl_base_static_components, - &mca_ptl_base_components_opened, true)) { - return OMPI_ERROR; - } - - /* Initialize the list containing all the PTL's where the init function has been called */ - OBJ_CONSTRUCT( &mca_ptl_base_components_initialized, opal_list_t ); - - /* Initialize the list so that in mca_ptl_base_close(), we can - iterate over it (even if it's empty, as in the case of - ompi_info) */ - - OBJ_CONSTRUCT(&mca_ptl_base_modules_initialized, opal_list_t); - - /* register parameters */ - mca_base_param_lookup_string( - mca_base_param_register_string("ptl","base","include",NULL,NULL), &mca_ptl_base_include); - mca_base_param_lookup_string( - mca_base_param_register_string("ptl","base","exclude",NULL,NULL), &mca_ptl_base_exclude); - - /* All done */ - return OMPI_SUCCESS; -} diff --git a/ompi/mca/ptl/base/ptl_base_recvfrag.c b/ompi/mca/ptl/base/ptl_base_recvfrag.c deleted file mode 100644 index f7e6a74d40..0000000000 --- a/ompi/mca/ptl/base/ptl_base_recvfrag.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ - -#include "ompi_config.h" - -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" -#include "ompi/mca/ptl/base/ptl_base_match.h" - -static void mca_ptl_base_recv_frag_construct(mca_ptl_base_recv_frag_t* frag); -static void mca_ptl_base_recv_frag_destruct(mca_ptl_base_recv_frag_t* frag); - - -OBJ_CLASS_INSTANCE( - mca_ptl_base_recv_frag_t, - mca_ptl_base_frag_t, - mca_ptl_base_recv_frag_construct, - mca_ptl_base_recv_frag_destruct -); - - -void mca_ptl_base_recv_frag_construct(mca_ptl_base_recv_frag_t* frag) -{ - frag->frag_base.frag_type = MCA_PTL_FRAGMENT_RECV; -} - -void mca_ptl_base_recv_frag_destruct(mca_ptl_base_recv_frag_t* frag) -{ -} - diff --git a/ompi/mca/ptl/base/ptl_base_recvfrag.h b/ompi/mca/ptl/base/ptl_base_recvfrag.h deleted file mode 100644 index 98d1ea3034..0000000000 --- a/ompi/mca/ptl/base/ptl_base_recvfrag.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_BASE_RECVFRAG_H -#define MCA_PTL_BASE_RECVFRAG_H - -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/ptl/base/ptl_base_recvreq.h" -#include "ompi/mca/ptl/base/ptl_base_fragment.h" -#include "ompi/mca/ptl/base/ptl_base_match.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif -OMPI_DECLSPEC extern opal_class_t mca_ptl_base_recv_frag_t_class; - -/** - * Base type for receive fragment descriptors. - */ -struct mca_ptl_base_recv_frag_t { - mca_ptl_base_frag_t frag_base; /**< base fragment descriptor */ - mca_ptl_base_recv_request_t *frag_request; /**< matched posted receive */ - bool frag_is_buffered; /**< does fragment need to be unpacked into users buffer */ -}; -typedef struct mca_ptl_base_recv_frag_t mca_ptl_base_recv_frag_t; - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif - diff --git a/ompi/mca/ptl/base/ptl_base_recvreq.c b/ompi/mca/ptl/base/ptl_base_recvreq.c deleted file mode 100644 index e493c0368e..0000000000 --- a/ompi/mca/ptl/base/ptl_base_recvreq.c +++ /dev/null @@ -1,10 +0,0 @@ -#include "ompi/mca/ptl/base/ptl_base_recvreq.h" - - -OBJ_CLASS_INSTANCE( - mca_ptl_base_recv_request_t, - mca_pml_base_recv_request_t, - NULL, - NULL -); - diff --git a/ompi/mca/ptl/base/ptl_base_recvreq.h b/ompi/mca/ptl/base/ptl_base_recvreq.h deleted file mode 100644 index a4cbbefd79..0000000000 --- a/ompi/mca/ptl/base/ptl_base_recvreq.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_BASE_RECV_REQUEST_H -#define MCA_PTL_BASE_RECV_REQUEST_H - -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/pml/base/pml_base_recvreq.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/** - * Base type for recv requests - */ -struct mca_ptl_base_recv_request_t { - mca_pml_base_recv_request_t req_recv; - size_t req_bytes_received; /**< number of bytes received from network */ - size_t req_bytes_delivered; /**< number of bytes delivered to user */ -}; -typedef struct mca_ptl_base_recv_request_t mca_ptl_base_recv_request_t; - - -OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_ptl_base_recv_request_t); - - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif - diff --git a/ompi/mca/ptl/base/ptl_base_select.c b/ompi/mca/ptl/base/ptl_base_select.c deleted file mode 100644 index a137d19edb..0000000000 --- a/ompi/mca/ptl/base/ptl_base_select.c +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "opal/util/argv.h" -#include "opal/util/show_help.h" -#include "opal/util/output.h" -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "orte/runtime/runtime.h" -#include "ompi/constants.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/ptl/base/base.h" - -/** - * Function for weeding out ptl components that don't want to run. - * - * Call the init function on all available components to find out if - * they want to run. Select all components that don't fail. Failing - * components will be closed and unloaded. The selected modules will - * be returned to the caller in a opal_list_t. - */ -int mca_ptl_base_select(bool enable_progress_threads, - bool enable_mpi_threads) -{ - int i, num_ptls; - opal_list_item_t *item; - opal_list_t* useless = OBJ_NEW(opal_list_t); - mca_base_component_list_item_t *cli; - mca_ptl_base_component_t *component; - mca_ptl_base_module_t **modules; - mca_ptl_base_selected_module_t *sm; - - char** include = opal_argv_split(mca_ptl_base_include, ','); - char** exclude = opal_argv_split(mca_ptl_base_exclude, ','); - - /* Traverse the list of opened modules; call their init - functions. */ - - while( NULL != (item = opal_list_remove_first(&mca_ptl_base_components_opened)) ) { - bool keep_me = true; - - cli = (mca_base_component_list_item_t *) item; - component = (mca_ptl_base_component_t *) cli->cli_component; - - /* if there is an include list - item must be in the list to be included */ - if ( NULL != include ) { - char** argv = include; - keep_me = false; /* all PTLs not in the include list cannot be selected */ - while(argv && *argv) { - if(strcmp(component->ptlm_version.mca_component_name,*argv) == 0) { - keep_me = true; - break; - } - argv++; - } - /* otherwise - check the exclude list to see if this item has been specifically excluded */ - } else if ( NULL != exclude ) { - char** argv = exclude; - keep_me = true; /* all PTL's not in except list should be keeped */ - while(argv && *argv) { - if(strcmp(component->ptlm_version.mca_component_name,*argv) == 0) { - keep_me = false; - break; - } - argv++; - } - } - if( keep_me == false) { - opal_list_append( useless, item ); - continue; - } - - opal_output_verbose(10, mca_ptl_base_output, - "select: initializing %s component %s", - component->ptlm_version.mca_type_name, - component->ptlm_version.mca_component_name); - if (NULL == component->ptlm_init) { - opal_output_verbose(10, mca_ptl_base_output, - "select: no init function; ignoring component"); - opal_list_append( useless, item ); - continue; - } else { - modules = component->ptlm_init(&num_ptls, enable_progress_threads, - enable_mpi_threads); - - /* If the component didn't initialize, remove it from the opened - list and remove it from the component repository */ - - if (NULL == modules) { - opal_output_verbose( 10, mca_ptl_base_output, - "select: %s PTL init returned failure", - component->ptlm_version.mca_component_name); - opal_list_append( useless, item ); - continue; - } - - /* Otherwise, it initialized properly. Save it. */ - - else { - opal_output_verbose(10, mca_ptl_base_output, - "select: init returned success"); - opal_list_append( &mca_ptl_base_components_initialized, item ); - for (i = 0; i < num_ptls; ++i) { - sm = malloc(sizeof(mca_ptl_base_selected_module_t)); - if (NULL == sm) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - OBJ_CONSTRUCT(sm, opal_list_item_t); - sm->pbsm_component = component; - sm->pbsm_module = modules[i]; - opal_list_append(&mca_ptl_base_modules_initialized, - (opal_list_item_t*) sm); - } - free(modules); - } - } - } - - /* All useless components have to be cleanly removed */ - mca_base_components_close( mca_ptl_base_output, useless, NULL ); - OBJ_RELEASE( useless ); - - opal_argv_free( include ); - opal_argv_free( exclude ); - /* Finished querying all components. Check for the bozo case. */ - if (0 == opal_list_get_size(&mca_ptl_base_modules_initialized)) { - opal_show_help("help-mca-base.txt", "find-available:none-found", true, - "ptl"); - orte_abort(1, ""); - } - - /* All done */ - return OMPI_SUCCESS; -} diff --git a/ompi/mca/ptl/base/ptl_base_sendfrag.c b/ompi/mca/ptl/base/ptl_base_sendfrag.c deleted file mode 100644 index 0fb83a9947..0000000000 --- a/ompi/mca/ptl/base/ptl_base_sendfrag.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/base/ptl_base_sendfrag.h" - -static void mca_ptl_base_send_frag_construct(mca_ptl_base_send_frag_t* frag); -static void mca_ptl_base_send_frag_destruct(mca_ptl_base_send_frag_t* frag); - - -opal_class_t mca_ptl_base_send_frag_t_class = { - "mca_ptl_base_send_frag_t", - OBJ_CLASS(mca_ptl_base_frag_t), - (opal_construct_t) mca_ptl_base_send_frag_construct, - (opal_destruct_t) mca_ptl_base_send_frag_destruct -}; - - -static void mca_ptl_base_send_frag_construct(mca_ptl_base_send_frag_t* frag) -{ - frag->frag_base.frag_type = MCA_PTL_FRAGMENT_SEND; -} - -static void mca_ptl_base_send_frag_destruct(mca_ptl_base_send_frag_t* frag) -{ -} - diff --git a/ompi/mca/ptl/base/ptl_base_sendfrag.h b/ompi/mca/ptl/base/ptl_base_sendfrag.h deleted file mode 100644 index 92b7464e86..0000000000 --- a/ompi/mca/ptl/base/ptl_base_sendfrag.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_BASE_SEND_FRAG_H -#define MCA_PTL_BASE_SEND_FRAG_H - -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/ptl/base/ptl_base_fragment.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif -OMPI_DECLSPEC extern opal_class_t mca_ptl_base_send_frag_t_class; - -/** - * Base type for send fragment descriptors - */ -struct mca_ptl_base_send_frag_t { - mca_ptl_base_frag_t frag_base; /**< base fragment descriptor */ - struct mca_ptl_base_send_request_t *frag_request; /**< pointer to send request */ -}; -typedef struct mca_ptl_base_send_frag_t mca_ptl_base_send_frag_t; - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif - diff --git a/ompi/mca/ptl/base/ptl_base_sendreq.c b/ompi/mca/ptl/base/ptl_base_sendreq.c deleted file mode 100644 index 77a98a99a3..0000000000 --- a/ompi/mca/ptl/base/ptl_base_sendreq.c +++ /dev/null @@ -1,10 +0,0 @@ -#include "ompi/mca/ptl/base/ptl_base_sendreq.h" - - -OBJ_CLASS_INSTANCE( - mca_ptl_base_send_request_t, - mca_pml_base_send_request_t, - NULL, - NULL -); - diff --git a/ompi/mca/ptl/base/ptl_base_sendreq.h b/ompi/mca/ptl/base/ptl_base_sendreq.h deleted file mode 100644 index d48f2e3097..0000000000 --- a/ompi/mca/ptl/base/ptl_base_sendreq.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_BASE_SEND_REQUEST_H -#define MCA_PTL_BASE_SEND_REQUEST_H - -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/pml/base/pml_base_sendreq.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/** - * Base type for send requests - */ -struct mca_ptl_base_send_request_t { - mca_pml_base_send_request_t req_send; - size_t req_offset; /**< number of bytes that have been scheduled */ - size_t req_bytes_sent; /**< number of bytes that have been sent */ - ompi_ptr_t req_peer_match; /**< matched receive at peer */ - ompi_ptr_t req_peer_addr; /**< peers remote buffer address */ - uint64_t req_peer_size; /**< size of peers remote buffer */ - bool req_cached; /**< has this request been obtained from the ptls cache */ - volatile int32_t req_lock; /**< lock used by the scheduler */ - struct mca_ptl_base_module_t* req_ptl; /**< ptl allocated for first fragment */ - struct mca_ptl_base_peer_t* req_peer; /**< peer associated w/ this ptl */ -}; -typedef struct mca_ptl_base_send_request_t mca_ptl_base_send_request_t; - - -OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_ptl_base_send_request_t); - - -/** - * Atomically increase the request offset. - * - * @param request (IN) Send request. - * @param offset (IN) Increment. - */ - -static inline void mca_ptl_base_send_request_offset( - mca_ptl_base_send_request_t* request, - size_t offset) -{ - OPAL_THREAD_ADD_SIZE_T((&request->req_offset), offset); -} - - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif - diff --git a/ompi/mca/ptl/gm/Makefile.am b/ompi/mca/ptl/gm/Makefile.am deleted file mode 100644 index 52107aa44d..0000000000 --- a/ompi/mca/ptl/gm/Makefile.am +++ /dev/null @@ -1,73 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004 The Ohio State University. -# All rights reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Use the top-level Makefile.options - -include $(top_ompi_srcdir)/config/Makefile.options - -AM_CPPFLAGS = $(ptl_gm_CPPFLAGS) - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -#if OMPI_ENABLE_GM_CACHE -# additional_source = ptl_gm_regcache.c -#else -# additional_source = -#endif - -libmca_ptl_gm_la_sources = $(additional_source) \ - ptl_gm.c \ - ptl_gm.h \ - ptl_gm_component.c \ - ptl_gm_peer.h \ - ptl_gm_priv.h \ - ptl_gm_priv.c \ - ptl_gm_proc.c \ - ptl_gm_proc.h \ - ptl_gm_sendfrag.c \ - ptl_gm_sendfrag.h \ - ptl_gm_memory.c - -if OMPI_BUILD_ptl_gm_DSO -component_noinst = -component_install = mca_ptl_gm.la -else -component_noinst = libmca_ptl_gm.la -component_install = -endif - -mcacomponentdir = $(libdir)/openmpi -mcacomponent_LTLIBRARIES = $(component_install) - -mca_ptl_gm_la_SOURCES = $(libmca_ptl_gm_la_sources) -mca_ptl_gm_la_LIBADD = \ - $(ptl_gm_LIBS) \ - $(top_ompi_builddir)/ompi/libmpi.la \ - $(top_ompi_builddir)/orte/liborte.la \ - $(top_ompi_builddir)/opal/libopal.la -mca_ptl_gm_la_LDFLAGS = -module -avoid-version $(ptl_gm_LDFLAGS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_ptl_gm_la_SOURCES = $(libmca_ptl_gm_la_sources) -libmca_ptl_gm_la_LIBADD = $(ptl_gm_LIBS) -libmca_ptl_gm_la_LDFLAGS = -module -avoid-version $(ptl_gm_LDFLAGS) - diff --git a/ompi/mca/ptl/gm/configure.m4 b/ompi/mca/ptl/gm/configure.m4 deleted file mode 100644 index 1d1efdbb8a..0000000000 --- a/ompi/mca/ptl/gm/configure.m4 +++ /dev/null @@ -1,43 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - - -# MCA_ptl_gm_CONFIG([action-if-can-compile], -# [action-if-cant-compile]) -# ------------------------------------------------ -AC_DEFUN([MCA_ptl_gm_CONFIG],[ - OMPI_CHECK_GM([ptl_gm], - [ptl_gm_happy="yes"], - [ptl_gm_happy="no"]) - - AS_IF([test "$ptl_gm_happy" = "yes"], - [ptl_gm_WRAPPER_EXTRA_LDFLAGS="$ptl_gm_LDFLAGS" - ptl_gm_WRAPPER_EXTRA_LIBS="$ptl_gm_LIBS" - $1], - [$2]) - - # substitute in the things needed to build gm - AC_SUBST([ptl_gm_CFLAGS]) - AC_SUBST([ptl_gm_CPPFLAGS]) - AC_SUBST([ptl_gm_LDFLAGS]) - AC_SUBST([ptl_gm_LIBS]) - # Define it for internal use. - AC_DEFINE_UNQUOTED(OMPI_MCA_PTL_GM_CACHE_ENABLE, 0, - [Whether we want the internal GM cache to be activated.]) -])dnl diff --git a/ompi/mca/ptl/gm/configure.params b/ompi/mca/ptl/gm/configure.params deleted file mode 100644 index 8ae605178d..0000000000 --- a/ompi/mca/ptl/gm/configure.params +++ /dev/null @@ -1,25 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004 The Ohio State University. -# All rights reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Specific to this module - -PARAM_INIT_FILE=ptl_gm.c -PARAM_CONFIG_FILES="Makefile" diff --git a/ompi/mca/ptl/gm/ptl_gm.c b/ompi/mca/ptl/gm/ptl_gm.c deleted file mode 100644 index 01c28cd340..0000000000 --- a/ompi/mca/ptl/gm/ptl_gm.c +++ /dev/null @@ -1,414 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ - -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004 The Ohio State University. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include -#include "ompi/class/ompi_bitmap.h" -#include "opal/util/output.h" -#include "orte/util/proc_info.h" -#include "orte/mca/ns/ns.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/ptl/base/ptl_base_header.h" -#include "ptl_gm.h" -#include "ptl_gm_proc.h" -#include "ptl_gm_priv.h" -#include "ptl_gm_peer.h" -#include "ptl_gm_sendfrag.h" -#include "ompi/proc/proc.h" - -mca_ptl_gm_module_t mca_ptl_gm_module = { - { - &mca_ptl_gm_component.super, - 1, /* max size of request cache */ - sizeof(mca_ptl_gm_send_frag_t), /* bytes required by ptl for a request */ - 0, /* max size of first fragment */ - 0, /* min fragment size */ - 0, /* max fragment size */ - 0, /* exclusivity */ - 50, /* latency */ - 0, /* bandwidth */ - MCA_PTL_PUT, /* ptl flags */ - /* collection of interfaces */ - mca_ptl_gm_add_procs, - mca_ptl_gm_del_procs, - mca_ptl_gm_finalize, - mca_ptl_gm_peer_send, - mca_ptl_gm_put, - mca_ptl_gm_get, - mca_ptl_gm_matched, - mca_ptl_gm_request_init, - mca_ptl_gm_request_fini, - NULL, - NULL, - NULL - } -}; - -OBJ_CLASS_INSTANCE (mca_ptl_gm_send_request_t, - mca_ptl_base_send_request_t, NULL, NULL); -OBJ_CLASS_INSTANCE (mca_ptl_gm_peer_t, opal_list_item_t, NULL, NULL); - -int -mca_ptl_gm_add_procs (struct mca_ptl_base_module_t *ptl, - size_t nprocs, - struct ompi_proc_t **orte_procs, - struct mca_ptl_base_peer_t **peers, - ompi_bitmap_t * reachable) -{ - uint32_t i, j, num_peer_ptls = 1; - struct ompi_proc_t *orte_proc; - mca_ptl_gm_proc_t *ptl_proc; - mca_ptl_gm_peer_t *ptl_peer; - ompi_proc_t* local_proc = ompi_proc_local(); - - for (i = 0; i < nprocs; i++) { - orte_proc = orte_procs[i]; - if( orte_proc == local_proc ) continue; - ptl_proc = mca_ptl_gm_proc_create ((mca_ptl_gm_module_t *) ptl, orte_proc); - if (NULL == ptl_proc) { - opal_output( 0, "[%s:%d] cannot allocate memory for the GM module", __FILE__, __LINE__ ); - continue; - } - - OPAL_THREAD_LOCK (&ptl_proc->proc_lock); - if (ptl_proc->proc_addr_count == ptl_proc->proc_peer_count) { - OPAL_THREAD_UNLOCK (&ptl_proc->proc_lock); - opal_output( 0, "[%s:%d] modex exchange failed for GM module", __FILE__, __LINE__ ); - continue; - } - ptl_peer = NULL; /* force it to NULL before looping through the ptls */ - /* TODO: make this extensible to multiple nics */ - for( j = 0; j < num_peer_ptls; j++ ) { - ptl_peer = OBJ_NEW (mca_ptl_gm_peer_t); - if (NULL == ptl_peer) { - OPAL_THREAD_UNLOCK (&ptl_proc->proc_lock); - opal_output( 0, "[%s:%d] cannot allocate memory for one of the GM ptl", __FILE__, __LINE__ ); - continue; - } - - ptl_peer->peer_ptl = (mca_ptl_gm_module_t *) ptl; - ptl_peer->peer_proc = ptl_proc; - ptl_peer->peer_addr.port_id = ptl_proc->proc_addrs->port_id; -#if GM_API_VERSION > 0x200 - ptl_peer->peer_addr.global_id = ptl_proc->proc_addrs->global_id; - if (GM_SUCCESS != gm_global_id_to_node_id(((mca_ptl_gm_module_t *) ptl)->gm_port, - ptl_proc->proc_addrs[j].global_id, - &(ptl_peer->peer_addr.local_id))) { - opal_output( 0, "[%s:%d] error in converting global to local id \n", - __FILE__, __LINE__ ); - OBJ_RELEASE( ptl_peer ); - assert( NULL == ptl_peer ); - continue; - } -#else - strncpy( ptl_peer->peer_addr.global_id, ptl_proc->proc_addrs->global_id, GM_MAX_HOST_NAME_LEN ); - ptl_peer->peer_addr.local_id = gm_host_name_to_node_id( ((mca_ptl_gm_module_t *) ptl)->gm_port, - ptl_proc->proc_addrs[j].global_id ); - if( GM_NO_SUCH_NODE_ID == ptl_peer->peer_addr.local_id ) { - opal_output( 0, "Unable to convert the remote host name (%s) to a host id", - ptl_proc->proc_addrs[j].global_id ); - OBJ_RELEASE( ptl_peer ); - assert( NULL == ptl_peer ); - continue; - } -#endif /* GM_API_VERSION > 0x200 */ - ptl_proc->peer_arr[ptl_proc->proc_peer_count] = ptl_peer; - ptl_proc->proc_peer_count++; - ompi_bitmap_set_bit (reachable, i); /* set the bit again and again */ - } - OPAL_THREAD_UNLOCK (&ptl_proc->proc_lock); - peers[i] = (struct mca_ptl_base_peer_t*)ptl_peer; - } - - return OMPI_SUCCESS; -} - -/* - * - */ -int -mca_ptl_gm_del_procs (struct mca_ptl_base_module_t *ptl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_ptl_base_peer_t **peers) -{ - size_t i; - for (i = 0; i < nprocs; i++) { - OBJ_RELEASE (peers[i]); - } - return OMPI_SUCCESS; -} - -/* - * - */ -int -mca_ptl_gm_finalize (struct mca_ptl_base_module_t *base_ptl) -{ - uint32_t index; - mca_ptl_gm_module_t* ptl = (mca_ptl_gm_module_t*)base_ptl; - - for( index = 0; index < mca_ptl_gm_component.gm_num_ptl_modules; index++ ) { - if( mca_ptl_gm_component.gm_ptl_modules[index] == ptl ) { - mca_ptl_gm_component.gm_ptl_modules[index] = NULL; - break; - } - } - - if( index == mca_ptl_gm_component.gm_num_ptl_modules ) { - opal_output( 0, "%p is not a GM PTL !!!\n", (void*)base_ptl ); - return OMPI_ERROR; - } - - /* we should do the same things as in the init step in reverse order. - * First we shutdown all threads if there are any. - */ -#if OMPI_HAVE_POSIX_THREADS - if( 0 != ptl->thread.t_handle ) { - void* thread_return; - - pthread_cancel( ptl->thread.t_handle ); - opal_thread_join( &(ptl->thread), &thread_return ); - } -#endif /* OMPI_HAVE_POSIX_THREADS */ - - /* Closing each port require several steps. As there is no way to cancel all - * already posted messages we start by unregistering all memory and then close - * the port. After we can release all internal data. - */ - if( ptl->gm_send_dma_memory != NULL ) { - gm_dma_free( ptl->gm_port, ptl->gm_send_dma_memory ); - ptl->gm_send_dma_memory = NULL; - } - - if( ptl->gm_recv_dma_memory != NULL ) { - gm_dma_free( ptl->gm_port, ptl->gm_recv_dma_memory ); - ptl->gm_recv_dma_memory = NULL; - } - - /* Now close the port if one is open */ - if( ptl->gm_port != NULL ) { - gm_close( ptl->gm_port ); - ptl->gm_port = NULL; - } - - /* And now release all internal ressources. */ - OBJ_DESTRUCT( &(ptl->gm_send_frags) ); - if( ptl->gm_send_fragments != NULL ) { - free( ptl->gm_send_fragments ); - ptl->gm_send_fragments = NULL; - } - - OBJ_DESTRUCT( &(ptl->gm_recv_frags_free) ); - if( ptl->gm_recv_fragments != NULL ) { - free( ptl->gm_recv_fragments ); - ptl->gm_recv_fragments = NULL; - } - - /* These are supposed to be empty by now */ - OBJ_DESTRUCT( &(ptl->gm_send_frags_queue) ); - OBJ_DESTRUCT( &(ptl->gm_pending_acks) ); - OBJ_DESTRUCT( &(ptl->gm_recv_outstanding_queue) ); - - /* And finally release the PTL itself */ - free( ptl ); - - return OMPI_SUCCESS; -} - -int -mca_ptl_gm_request_init( struct mca_ptl_base_module_t *ptl, - struct mca_ptl_base_send_request_t *request ) -{ - -#if 0 - mca_ptl_gm_send_frag_t *frag; - struct mca_ptl_gm_send_request_t *req; - frag = mca_ptl_gm_alloc_send_frag(ptl, request); - - if (NULL == frag) { - opal_output(0,"[%s:%d] Unable to allocate a gm send fragment\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } else { - req = (mca_ptl_gm_send_request_t *)request; - req->req_frag = frag; - frag->status = 0; /*MCA_PTL_GM_FRAG_CACHED;*/ - } - return OMPI_SUCCESS; -#endif - - return OMPI_SUCCESS; -} - -/* - * - */ -void -mca_ptl_gm_request_fini (struct mca_ptl_base_module_t *ptl, - struct mca_ptl_base_send_request_t *request) -{ - -#if 0 - mca_ptl_gm_send_frag_t *frag; - frag = ((mca_ptl_gm_send_request_t *)request)->req_frag; - OMPI_FREE_LIST_RETURN(&(((mca_ptl_gm_module_t *)ptl)->gm_send_frags), - (opal_list_item_t *)frag); - frag->status = 0; -#endif - - OBJ_DESTRUCT(request+1); -} - -/* - * Initiate a put - */ - -int -mca_ptl_gm_put (struct mca_ptl_base_module_t *ptl, - struct mca_ptl_base_peer_t *ptl_peer, - struct mca_ptl_base_send_request_t *sendreq, - size_t offset, size_t size, int flags) -{ - int rc; - mca_ptl_gm_send_frag_t *putfrag; - - rc = mca_ptl_gm_put_frag_init( &putfrag, - (mca_ptl_gm_peer_t*)ptl_peer, (mca_ptl_gm_module_t*)ptl, - sendreq, offset, &size, flags ); - - rc = mca_ptl_gm_peer_send_continue( (mca_ptl_gm_peer_t *)ptl_peer, putfrag, - sendreq, offset, &size, flags ); - return OMPI_SUCCESS; -} - -/* - * initiate a get. - */ - -int -mca_ptl_gm_get (struct mca_ptl_base_module_t *ptl, - struct mca_ptl_base_peer_t *ptl_base_peer, - struct mca_ptl_base_recv_request_t *request, - size_t offset, size_t size, int flags) -{ - return OMPI_SUCCESS; -} - -static void mca_ptl_gm_basic_ack_callback( struct gm_port* port, void* context, gm_status_t status ) -{ - mca_ptl_gm_module_t* gm_ptl; - mca_ptl_base_header_t* header; - - header = (mca_ptl_base_header_t*)context; - - gm_ptl = (mca_ptl_gm_module_t*)header->hdr_ack.hdr_dst_addr.pval; - - OMPI_GM_FREE_LIST_RETURN( &(gm_ptl->gm_send_dma_frags), ((opal_list_item_t*)header) ); - /* release the send token */ - opal_atomic_add( &(gm_ptl->num_send_tokens), 1 ); -} - -/* A posted receive has been matched - if required send an - * ack back to the peer and process the fragment. - */ -void -mca_ptl_gm_matched( mca_ptl_base_module_t* ptl, - mca_ptl_base_recv_frag_t* frag ) -{ - mca_ptl_base_recv_request_t *request; - mca_ptl_base_header_t *hdr; - int32_t rc; - mca_ptl_gm_module_t *gm_ptl; - mca_ptl_gm_recv_frag_t *recv_frag; - mca_ptl_gm_peer_t* peer; - struct iovec iov = { NULL, 0 }; - - gm_ptl = (mca_ptl_gm_module_t *)ptl; - request = frag->frag_request; - recv_frag = (mca_ptl_gm_recv_frag_t *)frag; - peer = (mca_ptl_gm_peer_t*)recv_frag->frag_recv.frag_base.frag_peer; - - if( frag->frag_base.frag_header.hdr_common.hdr_flags & MCA_PTL_FLAGS_ACK ) { /* need to send an ack back */ - opal_list_item_t *item; - - OMPI_FREE_LIST_WAIT( &(gm_ptl->gm_send_dma_frags), item, rc ); - if( NULL == item ) { - opal_output(0,"[%s:%d] unable to alloc a gm fragment\n", __FILE__,__LINE__); - OPAL_THREAD_LOCK (&mca_ptl_gm_component.gm_lock); - opal_list_append (&mca_ptl_gm_module.gm_pending_acks, (opal_list_item_t *)frag); - OPAL_THREAD_UNLOCK (&mca_ptl_gm_component.gm_lock); - } else { - opal_atomic_sub( &(gm_ptl->num_send_tokens), 1 ); - assert( gm_ptl->num_send_tokens >= 0 ); - hdr = (mca_ptl_base_header_t*)item; - - hdr->hdr_ack.hdr_common.hdr_type = MCA_PTL_HDR_TYPE_ACK; - hdr->hdr_ack.hdr_common.hdr_flags = frag->frag_base.frag_header.hdr_common.hdr_flags; - hdr->hdr_ack.hdr_src_ptr = frag->frag_base.frag_header.hdr_rndv.hdr_src_ptr; - hdr->hdr_ack.hdr_dst_match.lval = 0L; - hdr->hdr_ack.hdr_dst_match.pval = request; - hdr->hdr_ack.hdr_dst_addr.lval = 0L; - hdr->hdr_ack.hdr_dst_addr.pval = ptl; /* local use */ - hdr->hdr_ack.hdr_dst_size = request->req_recv.req_bytes_packed; - - gm_send_with_callback( ((mca_ptl_gm_module_t*)ptl)->gm_port, hdr, - GM_SIZE, sizeof(mca_ptl_base_ack_header_t), - GM_LOW_PRIORITY, - peer->peer_addr.local_id, - peer->peer_addr.port_id, - mca_ptl_gm_basic_ack_callback, - (void *)hdr ); - } - } - - if( frag->frag_base.frag_size > 0 ) { - ompi_convertor_t* convertor; - uint32_t out_size; - int32_t freeAfter; - size_t max_data; - - iov.iov_len = recv_frag->attached_data_length; - /* Here we expect that frag_addr is the begin of the buffer header included */ - iov.iov_base = frag->frag_base.frag_addr; - - convertor = &(request->req_recv.req_convertor); - - out_size = 1; - max_data = iov.iov_len; - rc = ompi_convertor_unpack( convertor, &(iov), &out_size, &max_data, &freeAfter ); - assert( rc >= 0 ); - recv_frag->frag_bytes_processed += max_data; - } - - /* update progress*/ - ptl->ptl_recv_progress( ptl, request, iov.iov_len, iov.iov_len ); - - /* Now update the status of the fragment */ - if( ((mca_ptl_gm_recv_frag_t*)frag)->have_allocated_buffer == true ) { - mca_ptl_gm_release_local_buffer( ((mca_ptl_gm_recv_frag_t*)frag)->frag_recv.frag_base.frag_addr ); - ((mca_ptl_gm_recv_frag_t*)frag)->have_allocated_buffer = false; - } - - /* I'm done with this fragment. Return it to the free list */ - OMPI_FREE_LIST_RETURN( &(gm_ptl->gm_recv_frags_free), (opal_list_item_t*)frag ); -} - diff --git a/ompi/mca/ptl/gm/ptl_gm.h b/ompi/mca/ptl/gm/ptl_gm.h deleted file mode 100644 index 5f0ec6f57f..0000000000 --- a/ompi/mca/ptl/gm/ptl_gm.h +++ /dev/null @@ -1,251 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ - -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004 The Ohio State University. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_GM_H -#define MCA_PTL_GM_H - -#include "ompi_config.h" -#include "ompi/class/ompi_free_list.h" -#include "ompi/mca/ptl/ptl.h" -#include "ptl_gm_priv.h" -#include "ptl_gm_peer.h" - -#define MCA_PTL_GM_STATISTICS 0 -#define MAX_RECV_TOKENS 256 -#define PTL_GM_ADMIN_SEND_TOKENS 0 -#define PTL_GM_ADMIN_RECV_TOKENS 0 -#define GM_SIZE 30 -#define NUM_RECV_FRAGS 256 -#define MCA_PTL_GM_FRAG_CACHED -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - - /** - * GM PTL component - */ - struct mca_ptl_gm_component_t { - mca_ptl_base_component_1_0_0_t super; /**< base PTL module */ - struct mca_ptl_gm_module_t **gm_ptl_modules; /**< array of available PTL modules */ - size_t gm_num_ptl_modules; /**< number of ptls actually used */ - size_t gm_max_ptl_modules; /**< maximum number of ptls - available */ - uint32_t gm_max_port_number; /**< maximum number of ports by board */ - uint32_t gm_max_boards_number; /**< maximum number of boards on the node */ - uint32_t gm_free_list_num; /**< initial size of free lists */ - uint32_t gm_free_list_max; /**< maximum size of free lists */ - uint32_t gm_free_list_inc; /**< number of elements to alloc when growing free lists */ - uint32_t gm_segment_size; /**< size of the allocated segment */ - uint32_t gm_eager_limit; /**< number of bytes before the rendez-vous protocol. If the - **< size of the message is less than this number then GM - **< use a eager protocol. */ -#if OMPI_MCA_PTL_GM_HAVE_RDMA_GET - uint32_t gm_rndv_burst_limit; - uint32_t gm_rdma_frag_size; /**< maximum fragment size used to transfer data over RDMA */ -#endif /* OMPI_MCA_PTL_GM_HAVE_RDMA_GET */ - char* gm_port_name; /**< the name used to get the port */ - - struct mca_ptl_gm_proc_t* gm_local; - opal_list_t gm_procs; - opal_list_t gm_send_req; - ompi_free_list_t gm_unexpected_frags_data; - - opal_mutex_t gm_lock; /**< lock for accessing module state */ - }; - - typedef struct mca_ptl_gm_component_t mca_ptl_gm_component_t; - extern mca_ptl_gm_component_t mca_ptl_gm_component; - - /** - * GM PTL Interface - */ - struct mca_ptl_gm_module_t { - mca_ptl_base_module_t super; /**< base PTL module interface */ - struct gm_port *gm_port; - mca_ptl_gm_addr_t local_addr; - unsigned int num_send_tokens; - unsigned int num_recv_tokens; - unsigned int max_send_tokens; - unsigned int max_recv_tokens; - void* gm_send_dma_memory; /**< pointer to the send DMA registered memory attached to the PTL */ - void* gm_recv_dma_memory; /**< pointer to the recv DMA registered memory attached to the PTL */ - struct mca_ptl_gm_send_frag_t* gm_send_fragments; - struct mca_ptl_gm_recv_frag_t* gm_recv_fragments; - - ompi_free_list_t gm_send_frags; - ompi_free_list_t gm_send_dma_frags; - ompi_free_list_t gm_recv_frags_free; - opal_list_t gm_send_frags_queue; - opal_list_t gm_pending_acks; - opal_list_t gm_recv_outstanding_queue; - - opal_thread_t thread; -#if MCA_PTL_GM_STATISTICS - size_t ptl_bytes_sent; - size_t ptl_bytes_recv; -#endif /* MCA_PTL_GM_STATISTICS */ - }; - - typedef struct mca_ptl_gm_module_t mca_ptl_gm_module_t; - extern mca_ptl_gm_module_t mca_ptl_gm_module; - - /** - * Register GM module parameters with the MCA framework - */ - extern int mca_ptl_gm_component_open (void); - - /** - * Any final cleanup before being unloaded. - */ - extern int mca_ptl_gm_component_close (void); - - /** - * GM module initialization. - * - * @param num_ptls (OUT) Number of PTLs returned in PTL array. - * @param allow_multi_user_threads (OUT) Flag indicating wether PTL supports user threads (TRUE) - * @param have_hidden_threads (OUT) Flag indicating wether PTL uses threads (TRUE) - */ - extern mca_ptl_base_module_t **mca_ptl_gm_component_init (int *num_ptl_modules, - bool enable_progress_threads, - bool enable_mpi_threads); - - /** - * GM module control. - */ - extern int mca_ptl_gm_component_control (int param, void *value, size_t size); - - /** - * GM module progress. - */ - extern int mca_ptl_gm_component_progress (mca_ptl_tstamp_t tstamp); - - /** - * GM put - */ - extern int mca_ptl_gm_put( struct mca_ptl_base_module_t *ptl, - struct mca_ptl_base_peer_t *ptl_peer, - struct mca_ptl_base_send_request_t *sendreq, - size_t offset, size_t size, int flags); - - /** - * GM get - */ - extern int mca_ptl_gm_get (struct mca_ptl_base_module_t *ptl, - struct mca_ptl_base_peer_t *ptl_peer, - struct mca_ptl_base_recv_request_t *sendreq, - size_t offset, size_t size, int flags); - - /** - * PML->PTL notification of change in the process list. - * - * @param ptl (IN) - * @param nprocs (IN) Number of processes - * @param procs (IN) Set of processes - * @param peers (OUT) Set of (optional) peer addressing info. - * @param peers (IN/OUT) Set of processes that are reachable via this PTL. - * @return OMPI_SUCCESS or error status on failure. - * - */ - extern int mca_ptl_gm_add_procs (struct mca_ptl_base_module_t *ptl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_ptl_base_peer_t **peers, - struct ompi_bitmap_t * reachable); - - /** - * PML->PTL notification of change in the process list. - * - * @param ptl (IN) PTL instance - * @param nproc (IN) Number of processes. - * @param procs (IN) Set of processes. - * @param peers (IN) Set of peer data structures. - * @return Status indicating if cleanup was successful - * - */ - extern int mca_ptl_gm_del_procs( struct mca_ptl_base_module_t *ptl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_ptl_base_peer_t **peers ); - - /** - * PML->PTL Allocate a send request from the PTL modules free list. - * - * @param ptl (IN) PTL instance - * @param request (OUT) Pointer to allocated request. - * @return Status indicating if allocation was successful. - * - */ - extern int mca_ptl_gm_request_init( struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_send_request_t* req); - - /** - * - */ - extern void mca_ptl_gm_request_fini( struct mca_ptl_base_module_t *ptl, - struct mca_ptl_base_send_request_t* req); - - /** - * PML->PTL Notification that a receive fragment has been matched. - * - * @param ptl (IN) PTL instance - * @param recv_frag (IN) Receive fragment - * - */ - extern void mca_ptl_gm_matched (struct mca_ptl_base_module_t *ptl, - struct mca_ptl_base_recv_frag_t *frag); - - /** - * - */ - extern int mca_ptl_gm_finalize (struct mca_ptl_base_module_t *ptl); - - /** - * Internally allocate memory for the unexpected messages. We will manage a list - * of such buffers in order to avoid too many memory allocations. - */ - extern char* mca_ptl_gm_get_local_buffer( void ); - extern void mca_ptl_gm_release_local_buffer( char* ptr ); - - union mca_ptl_base_header_t; - void mca_ptl_gm_dump_header( char* str, union mca_ptl_base_header_t* hdr ); - -#if OMPI_ENABLE_DEBUG -#include "opal/class/opal_list.h" -/* If debug is enabled we have to work around the item validity checks. */ -#define OMPI_GM_FREE_LIST_RETURN( LIST, ITEM ) \ -do { \ - (ITEM)->opal_list_item_refcount = 0; \ - (ITEM)->opal_list_item_belong_to = NULL; \ - (ITEM)->super.cls_init_file_name = __FILE__; \ - (ITEM)->super.cls_init_lineno = __LINE__; \ - OMPI_FREE_LIST_RETURN( (LIST), (ITEM) ); \ -} while(0) -#else -#define OMPI_GM_FREE_LIST_RETURN OMPI_FREE_LIST_RETURN -#endif /* OMPI_ENABLE_DEBUG */ - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif diff --git a/ompi/mca/ptl/gm/ptl_gm_component.c b/ompi/mca/ptl/gm/ptl_gm_component.c deleted file mode 100644 index 613d873681..0000000000 --- a/ompi/mca/ptl/gm/ptl_gm_component.c +++ /dev/null @@ -1,600 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ - -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004 The Ohio State University. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#include "ompi/constants.h" -#include "opal/util/output.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/pml/base/pml_base_module_exchange.h" -#include "ptl_gm.h" -#include "ptl_gm_priv.h" -#include "ptl_gm_peer.h" -#include "ptl_gm_sendfrag.h" - -mca_ptl_gm_component_t mca_ptl_gm_component = { - { - /* First, the mca_base_component_t struct containing meta information - about the component itself */ - { - /* Indicate that we are a pml v1.0.0 component (which also implies a - specific MCA version) */ - MCA_PTL_BASE_VERSION_1_0_0, - "gm", /* MCA component name */ - OMPI_MAJOR_VERSION, /* MCA component major version */ - OMPI_MINOR_VERSION, /* MCA component minor version */ - OMPI_RELEASE_VERSION, /* MCA component release version */ - mca_ptl_gm_component_open, /* component open */ - mca_ptl_gm_component_close /* component close */ - } - , - /* Next the MCA v1.0.0 component meta data */ - { - /* Whether the component is checkpointable or not */ - false - }, - mca_ptl_gm_component_init, - mca_ptl_gm_component_control, - mca_ptl_gm_component_progress - } -}; - -/* - * utility routines for parameter registration - */ - -static inline char * -mca_ptl_gm_param_register_string( const char *param_name, - const char *default_value ) -{ - char *param_value; - int id = mca_base_param_register_string( "ptl", "gm", param_name, NULL, - default_value) ; - mca_base_param_lookup_string (id, ¶m_value); - return param_value; -} - -static inline int -mca_ptl_gm_param_register_int( const char *param_name, int default_value ) -{ - int id = - mca_base_param_register_int ("ptl", "gm", param_name, NULL, - default_value); - int param_value = default_value; - mca_base_param_lookup_int (id, ¶m_value); - return param_value; -} - -/* - * Called by MCA framework to open the module, registers - * module parameters. - */ - -int -mca_ptl_gm_component_open(void) -{ - uint32_t default_first_frag_size; - - /* initialize state */ - mca_ptl_gm_component.gm_ptl_modules = NULL; - mca_ptl_gm_component.gm_num_ptl_modules = 0; - - /* initialize objects */ - OBJ_CONSTRUCT (&mca_ptl_gm_component.gm_lock, opal_mutex_t); - OBJ_CONSTRUCT (&mca_ptl_gm_component.gm_procs, opal_list_t); - OBJ_CONSTRUCT (&mca_ptl_gm_component.gm_send_req, opal_list_t); - - /* register GM component parameters */ - mca_ptl_gm_component.gm_port_name = - mca_ptl_gm_param_register_string( "port_name", "OMPI_GM" ); - mca_ptl_gm_component.gm_max_port_number = - mca_ptl_gm_param_register_int ("max_ports_number", 16 ); - mca_ptl_gm_component.gm_max_boards_number = - mca_ptl_gm_param_register_int ("max_boards_number", 4 ); - mca_ptl_gm_component.gm_max_ptl_modules = - mca_ptl_gm_param_register_int( "max_ptl_modules", 1 ); - - mca_ptl_gm_component.gm_segment_size = - mca_ptl_gm_param_register_int( "segment_size", 32 * 1024 ); - default_first_frag_size = mca_ptl_gm_component.gm_segment_size - sizeof(mca_ptl_base_rendezvous_header_t); - - mca_ptl_gm_module.super.ptl_first_frag_size = - mca_ptl_gm_param_register_int ("first_frag_size", default_first_frag_size ); - /* the first_frag_size should be always less than the gm_segment_size by at least the - * header sizeof. - */ - if( mca_ptl_gm_module.super.ptl_first_frag_size > default_first_frag_size ) { - mca_ptl_gm_module.super.ptl_first_frag_size = default_first_frag_size; - } - - mca_ptl_gm_module.super.ptl_min_frag_size = - mca_ptl_gm_param_register_int ("min_frag_size", 64 * 1024); - mca_ptl_gm_module.super.ptl_max_frag_size = - mca_ptl_gm_param_register_int ("max_frag_size", 256 * 1024 * 1024); - /* Parameters setting the message limits. */ - mca_ptl_gm_component.gm_eager_limit = - mca_ptl_gm_param_register_int( "eager_limit", 128 * 1024 ); -#if OMPI_MCA_PTL_GM_HAVE_RDMA_GET - mca_ptl_gm_component.gm_rndv_burst_limit = - mca_ptl_gm_param_register_int( "rndv_burst_limit", 512 * 1024 ); - mca_ptl_gm_component.gm_rdma_frag_size = - mca_ptl_gm_param_register_int ("rdma_frag_size", 128 * 1024); -#endif /* OMPI_MCA_PTL_GM_HAVE_RDMA_GET */ - - mca_ptl_gm_component.gm_free_list_num = - mca_ptl_gm_param_register_int ("free_list_num", 256); - mca_ptl_gm_component.gm_free_list_inc = - mca_ptl_gm_param_register_int ("free_list_inc", 32); - - return OMPI_SUCCESS; -} - -/* - * component close - */ -int mca_ptl_gm_component_close (void) -{ - uint32_t index; - mca_ptl_base_module_t* ptl; - - for( index = 0; index < mca_ptl_gm_component.gm_num_ptl_modules; index++ ) { - ptl = (mca_ptl_base_module_t*)mca_ptl_gm_component.gm_ptl_modules[index]; - if( NULL != ptl ) - ptl->ptl_finalize( ptl ); - } - mca_ptl_gm_component.gm_num_ptl_modules = 0; - - if (NULL != mca_ptl_gm_component.gm_ptl_modules) - free (mca_ptl_gm_component.gm_ptl_modules); - - OBJ_DESTRUCT( &mca_ptl_gm_component.gm_procs ); - OBJ_DESTRUCT( &mca_ptl_gm_component.gm_send_req ); - OBJ_DESTRUCT( &mca_ptl_gm_component.gm_lock ); - - return OMPI_SUCCESS; -} - -/* - * Create a ptl instance and add to components list. - */ - -static int -mca_ptl_gm_create( mca_ptl_gm_module_t** pptl ) -{ - mca_ptl_gm_module_t *ptl; - - ptl = (mca_ptl_gm_module_t *)malloc( sizeof(mca_ptl_gm_module_t) ); - if (NULL == ptl) { - opal_output( 0, " ran out of resource to allocate ptl_instance \n" ); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* copy the basic informations in the new PTL */ - memcpy (ptl, &mca_ptl_gm_module, sizeof(mca_ptl_gm_module_t) ); -#if OMPI_HAVE_POSIX_THREADS - ptl->thread.t_handle = (pthread_t)-1; -#endif /* OMPI_HAVE_POSIX_THREADS */ - *pptl = ptl; - - return OMPI_SUCCESS; -} - -/* - * Register GM component addressing information. The MCA framework - * will make this available to all peers. - */ -static int -mca_ptl_gm_module_store_data_toexchange (void) -{ - int rc; - size_t i; - size_t size; - mca_ptl_gm_addr_t *addrs; - - size = mca_ptl_gm_component.gm_num_ptl_modules * sizeof (mca_ptl_gm_addr_t); - addrs = (mca_ptl_gm_addr_t *)malloc (size); - - if (NULL == addrs) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - for (i = 0; i < mca_ptl_gm_component.gm_num_ptl_modules; i++) { - mca_ptl_gm_module_t *ptl = mca_ptl_gm_component.gm_ptl_modules[i]; - addrs[i].local_id = ptl->local_addr.local_id; -#if GM_API_VERSION > 0x200 - addrs[i].global_id = ptl->local_addr.global_id; -#else - strncpy( addrs[i].global_id, ptl->local_addr.global_id, GM_MAX_HOST_NAME_LEN ); -#endif /* GM_API_VERSION > 0x200 */ - addrs[i].port_id = ptl->local_addr.port_id; - } - rc = mca_pml_base_modex_send (&mca_ptl_gm_component.super.ptlm_version, addrs, size); - free (addrs); - return rc; -} - -#if OMPI_HAVE_POSIX_THREADS -static void* -mca_ptl_gm_thread_progress( opal_thread_t* thread ) -{ - gm_recv_event_t *event; - mca_ptl_gm_module_t* ptl = thread->t_arg; - - /* This thread enter in a cancel enabled state */ - pthread_setcancelstate( PTHREAD_CANCEL_ENABLE, NULL ); - pthread_setcanceltype( PTHREAD_CANCEL_ASYNCHRONOUS, NULL ); - - while(1) { - event = gm_blocking_receive(ptl->gm_port); - if( GM_NO_RECV_EVENT != gm_ntohc(event->recv.type) ) - mca_ptl_gm_analyze_recv_event( ptl, event ); - } - return PTHREAD_CANCELED; -} -#endif /* OMPI_HAVE_POSIX_THREADS */ - - -/* Scan all ports on the boards. As it's difficult to find the total number of boards - * we use a predefined maximum. - * Return the number of discovered boards where opening a port was a succesfull operation. - */ -static int32_t -mca_ptl_gm_discover_boards( mca_ptl_gm_module_t** pptl, - uint32_t max_ptls, uint32_t max_boards, uint32_t max_port ) -{ - uint32_t board_no, port_no, index = 0, local_id; - struct gm_port* gm_port; -#if GM_API_VERSION > 0x200 - uint32_t global_id; -#else - char global_id[GM_MAX_HOST_NAME_LEN]; -#endif /* GM_API_VERSION > 0x200 */ - - for( board_no = 0; board_no < max_boards; board_no++ ) { - - /* open the first available gm port for this board */ - for( port_no = 2; port_no < max_port; port_no++ ) { - if (3 == port_no) { - continue; /* port 0,1,3 reserved */ - } else if (GM_SUCCESS == - gm_open(&gm_port, board_no, port_no, - mca_ptl_gm_component.gm_port_name, - OMPI_MCA_PTL_GM_API_VERSION) ) { - break; - } - } - if( port_no == max_port ) { - continue; - } - - /* Get node local Id */ - if( GM_SUCCESS != gm_get_node_id( gm_port, &local_id) ) { - opal_output (0, " failure to get local_id \n"); - continue; - } - /* Gather an unique id for the node */ -#if GM_API_VERSION > 0x200 - if (GM_SUCCESS != gm_node_id_to_global_id( gm_port, local_id, &global_id) ) { - opal_output (0, " Error: Unable to get my GM global unique id \n"); - continue; - } -#else - { - if( GM_SUCCESS != gm_get_host_name( gm_port, global_id ) ) { - opal_output( 0, "Error: Unable to get the GM host name\n" ); - continue; - } - } -#endif /* GM_API_VERSION > 0x200 */ - - /* Create the ptl. If fail return the number of already created */ - if( OMPI_SUCCESS != mca_ptl_gm_create( &(pptl[index]) ) ) { - return index; - } - - pptl[index]->gm_port = gm_port; - pptl[index]->local_addr.port_id = port_no; - pptl[index]->local_addr.local_id = local_id; -#if GM_API_VERSION > 0x200 - pptl[index]->local_addr.global_id = global_id; -#else - strncpy( pptl[index]->local_addr.global_id, global_id, GM_MAX_HOST_NAME_LEN ); -#endif /* GM_API_VERSION > 0x200 */ - - /* everything is OK let's mark it as usable and go to the next one */ - if( (++index) >= max_ptls ) { - break; - } - } - - return index; -} - -static int -mca_ptl_gm_init_sendrecv (mca_ptl_gm_module_t * ptl) -{ - uint32_t i; - mca_ptl_gm_send_frag_t *sfragment; - mca_ptl_gm_recv_frag_t *free_rfragment; - - ptl->num_send_tokens = gm_num_send_tokens (ptl->gm_port); - ptl->max_send_tokens = ptl->num_send_tokens; - ptl->num_send_tokens -= PTL_GM_ADMIN_SEND_TOKENS; - ptl->num_recv_tokens = gm_num_receive_tokens (ptl->gm_port); - ptl->max_recv_tokens = ptl->num_recv_tokens; - ptl->num_recv_tokens -= PTL_GM_ADMIN_RECV_TOKENS; - - /****************SEND****************************/ - /* construct a list of send fragments */ - OBJ_CONSTRUCT (&(ptl->gm_send_frags), ompi_free_list_t); - OBJ_CONSTRUCT (&(ptl->gm_send_dma_frags), ompi_free_list_t); - OBJ_CONSTRUCT (&(ptl->gm_send_frags_queue), opal_list_t); - - /* We need a free list just to handle the send fragment that we provide. - * Just to make sure that we dont waste memory, we dont allow this list to - * grow anymore. - */ - ompi_free_list_init( &(ptl->gm_send_frags), - sizeof (mca_ptl_gm_send_frag_t), - OBJ_CLASS (mca_ptl_gm_send_frag_t), - 0, /* do not allocate any items I'll provide them */ - 0, /* maximum number of list allocated elements will be zero */ - 0, - NULL ); /* not using mpool */ - /* A free list containing all DMA allocate memory. - * This free list does not have the right to allocate any new item - * as they should be allocated with a special GM function. - */ - ompi_free_list_init( &(ptl->gm_send_dma_frags), - mca_ptl_gm_component.gm_segment_size, - OBJ_CLASS (opal_list_item_t), - 0, /* do not allocate any items I'll provide them */ - 0, /* maximum number of list allocated elements will be zero */ - 0, - NULL ); /* not using mpool */ - - /* allocate the elements */ - sfragment = (mca_ptl_gm_send_frag_t *)calloc( ptl->num_send_tokens, sizeof(mca_ptl_gm_send_frag_t) ); - ptl->gm_send_fragments = sfragment; - /* allocate the registered memory */ - ptl->gm_send_dma_memory = gm_dma_malloc( ptl->gm_port, - (mca_ptl_gm_component.gm_segment_size * ptl->num_send_tokens) + GM_PAGE_LEN ); - if( NULL == ptl->gm_send_dma_memory ) { - opal_output( 0, "unable to allocate registered memory\n" ); - return OMPI_ERR_OUT_OF_RESOURCE; - } - for (i = 0; i < ptl->num_send_tokens; i++) { - sfragment->send_buf = NULL; - OMPI_GM_FREE_LIST_RETURN( &(ptl->gm_send_frags), (opal_list_item_t*)sfragment ); - OMPI_GM_FREE_LIST_RETURN( &(ptl->gm_send_dma_frags), - (opal_list_item_t*)((char*)ptl->gm_send_dma_memory + - i * mca_ptl_gm_component.gm_segment_size) ); - sfragment++; - } - - /*****************RECEIVE*****************************/ - /* allow remote memory access */ - if( GM_SUCCESS != gm_allow_remote_memory_access (ptl->gm_port) ) { - opal_output (0, "unable to allow remote memory access\n"); - } - - OBJ_CONSTRUCT (&(ptl->gm_recv_outstanding_queue), opal_list_t); - - /* construct the list of recv fragments free */ - OBJ_CONSTRUCT (&(ptl->gm_recv_frags_free), ompi_free_list_t); - ompi_free_list_init( &(ptl->gm_recv_frags_free), - sizeof (mca_ptl_gm_recv_frag_t), - OBJ_CLASS (mca_ptl_gm_recv_frag_t), - 0, /* by default I will provide all items */ - ptl->num_recv_tokens * 10, /* the maximum number of items in the free list */ - ptl->num_recv_tokens, /* if it need to allocate some more */ - NULL ); - - /* allocate the elements */ - free_rfragment = (mca_ptl_gm_recv_frag_t *) - calloc( ptl->num_recv_tokens, sizeof(mca_ptl_gm_recv_frag_t) ); - ptl->gm_recv_fragments = free_rfragment; - - /*allocate the registered memory */ - ptl->gm_recv_dma_memory = - gm_dma_malloc( ptl->gm_port, (mca_ptl_gm_component.gm_segment_size * ptl->num_recv_tokens) + GM_PAGE_LEN ); - if( NULL == ptl->gm_recv_dma_memory ) { - opal_output( 0, "unable to allocate registered memory for receive\n" ); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - for( i = 0; i < 2; i++ ) { - OMPI_GM_FREE_LIST_RETURN( &(ptl->gm_recv_frags_free), (opal_list_item_t *)free_rfragment ); - free_rfragment++; - - gm_provide_receive_buffer( ptl->gm_port, (char*)ptl->gm_recv_dma_memory + i * mca_ptl_gm_component.gm_segment_size, - GM_SIZE, GM_HIGH_PRIORITY ); - } - for( i = 2; i < ptl->num_recv_tokens; i++ ) { - OMPI_GM_FREE_LIST_RETURN( &(ptl->gm_recv_frags_free), (opal_list_item_t *)free_rfragment ); - free_rfragment++; - - gm_provide_receive_buffer( ptl->gm_port, (char*)ptl->gm_recv_dma_memory + i * mca_ptl_gm_component.gm_segment_size, - GM_SIZE, GM_LOW_PRIORITY ); - } - - OBJ_CONSTRUCT( &(ptl->gm_pending_acks), opal_list_t ); - - return OMPI_SUCCESS; -} - -static int -mca_ptl_gm_init( mca_ptl_gm_component_t * gm ) -{ - uint32_t index; - mca_ptl_gm_module_t* ptl; - uint32_t save_counter; - - /* let's try to find if GM is available */ - if( GM_SUCCESS != gm_init() ) { - opal_output( 0, "[%s:%d] error in initializing the gm library\n", __FILE__, __LINE__ ); - return OMPI_ERR_OUT_OF_RESOURCE; - } - /* First discover all available boards. For each board we will create a unique PTL */ - mca_ptl_gm_component.gm_ptl_modules = calloc( mca_ptl_gm_component.gm_max_ptl_modules, - sizeof (mca_ptl_gm_module_t *)); - if (NULL == mca_ptl_gm_component.gm_ptl_modules) { - opal_output( 0, "[%s:%d] error in initializing the gm PTL's.\n", __FILE__, __LINE__ ); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - mca_ptl_gm_component.gm_num_ptl_modules = - mca_ptl_gm_discover_boards( mca_ptl_gm_component.gm_ptl_modules, - mca_ptl_gm_component.gm_max_ptl_modules, - mca_ptl_gm_component.gm_max_boards_number, - mca_ptl_gm_component.gm_max_port_number ); - - /* In the case when we are in a multi-threaded environment each - * PTL will have its own thread. At this point all structures are - * correctly initialized, each thread will grab one and use it. - */ - for( index = 0; index < mca_ptl_gm_component.gm_num_ptl_modules; index++ ) { - ptl = mca_ptl_gm_component.gm_ptl_modules[index]; - /* Now prepost some received and allocate some sends. After - * this step the PTL is fully initialized. - */ - if( OMPI_SUCCESS != mca_ptl_gm_init_sendrecv( ptl ) ) - break; - if( opal_using_threads() ) { -#if OMPI_HAVE_POSIX_THREADS - ptl->thread.t_run = (opal_thread_fn_t)mca_ptl_gm_thread_progress; - ptl->thread.t_arg = (void*)ptl; -#endif /* OMPI_HAVE_POSIX_THREADS */ - if( OMPI_SUCCESS != opal_thread_start( &(ptl->thread) ) ) { - break; - } - } - } - save_counter = index; - /* If we are unable to start all the required threads we update the total - * number of threads and call finalize for the others PTLs. - */ - for( ; index < mca_ptl_gm_component.gm_num_ptl_modules; index++ ) { - mca_ptl_base_module_t* ptl = (mca_ptl_base_module_t*)mca_ptl_gm_component.gm_ptl_modules[index]; - ptl->ptl_finalize( ptl ); - } - mca_ptl_gm_component.gm_num_ptl_modules = save_counter; - - /* A free list containing all memory used for keep data for unexpected requests. */ - OBJ_CONSTRUCT( &(mca_ptl_gm_component.gm_unexpected_frags_data), ompi_free_list_t ); - ompi_free_list_init( &(mca_ptl_gm_component.gm_unexpected_frags_data), - mca_ptl_gm_component.gm_segment_size, - OBJ_CLASS (opal_list_item_t), - 16, /* keep is small in the begining */ - 128, /* maximum number of list elements */ - 16, /* Number of elements to grow by per allocation */ - NULL ); /* not using mpool */ -#if OMPI_MCA_PTL_GM_CACHE_ENABLE - mca_ptl_gm_regcache_init(); -#endif /* OMPI_MCA_PTL_GM_CACHE_ENABLE */ - return (mca_ptl_gm_component.gm_num_ptl_modules > 0 ? OMPI_SUCCESS : OMPI_ERR_OUT_OF_RESOURCE); -} - -/* - * Initialize the GM component, - * check how many boards are available and open ports on them. - */ - -mca_ptl_base_module_t ** -mca_ptl_gm_component_init (int *num_ptl_modules, - bool enable_progress_threads, - bool enable_mpi_threads) -{ - mca_ptl_base_module_t **ptls; - - *num_ptl_modules = 0; - - if (OMPI_SUCCESS != mca_ptl_gm_init (&mca_ptl_gm_component)) { - /*opal_output( 0, "[%s:%d] error in initializing gm state and PTL's. (%d PTL's)\n", - __FILE__, __LINE__, mca_ptl_gm_component.gm_num_ptl_modules );*/ - return NULL; - } - - /* publish GM parameters with the MCA framework */ - if (OMPI_SUCCESS != mca_ptl_gm_module_store_data_toexchange ()) - return 0; - - /* return array of PTLs */ - ptls = (mca_ptl_base_module_t**) malloc ( - mca_ptl_gm_component.gm_num_ptl_modules * sizeof(mca_ptl_base_module_t *)); - if (NULL == ptls) { - return NULL; - } - - memcpy (ptls, mca_ptl_gm_component.gm_ptl_modules, - mca_ptl_gm_component.gm_num_ptl_modules * sizeof(mca_ptl_gm_module_t *)); - *num_ptl_modules = mca_ptl_gm_component.gm_num_ptl_modules; - return ptls; -} - -/* - * GM module control - */ - -int -mca_ptl_gm_component_control (int param, void *value, size_t size) -{ - return OMPI_SUCCESS; -} - -char* mca_ptl_gm_get_local_buffer( void ) -{ - opal_list_item_t* item; - int rc; - - OMPI_FREE_LIST_WAIT( &(mca_ptl_gm_component.gm_unexpected_frags_data), item, rc ); - return (char*)item; -} - -void mca_ptl_gm_release_local_buffer( char* ptr ) -{ - OMPI_GM_FREE_LIST_RETURN( &(mca_ptl_gm_component.gm_unexpected_frags_data), (opal_list_item_t*)ptr ); -} - -/* - * GM module progress. - */ - -int -mca_ptl_gm_component_progress (mca_ptl_tstamp_t tstamp) -{ - uint32_t i; - gm_recv_event_t *event; - mca_ptl_gm_module_t *ptl; - - for( i = 0; i < mca_ptl_gm_component.gm_num_ptl_modules;) { - ptl = mca_ptl_gm_component.gm_ptl_modules[i]; - event = gm_receive(ptl->gm_port); - /* If there are no receive events just skip the function call */ - if( GM_NO_RECV_EVENT != gm_ntohc(event->recv.type) ) { - if( 1 == mca_ptl_gm_analyze_recv_event( ptl, event ) ) { - /* we try to empty the GM event queue */ - continue; - } - } - i++; - } - return OMPI_SUCCESS; -} diff --git a/ompi/mca/ptl/gm/ptl_gm_memory.c b/ompi/mca/ptl/gm/ptl_gm_memory.c deleted file mode 100644 index 6395bc7b20..0000000000 --- a/ompi/mca/ptl/gm/ptl_gm_memory.c +++ /dev/null @@ -1,52 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ - -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004 The Ohio State University. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" - -#include "ptl_gm.h" -#include "ptl_gm_priv.h" - -gm_status_t mca_ptl_gm_register_memory(struct gm_port *port, void *ptr, unsigned len) -{ -#if OMPI_MCA_PTL_GM_SUPPORT_REGISTERING -#if OMPI_MCA_PTL_GM_CACHE_ENABLE - gmpi_use_interval( port, (gm_up_t)ptr, len ); - return GM_SUCCESS; -#else - return gm_register_memory( port, ptr, len ); -#endif /* OMPI_MCA_PTL_GM_CACHE_ENABLE */ -#else - return GM_FAILURE; -#endif /* OMPI_MCA_PTL_GM_SUPPORT_REGISTERING */ -} - -gm_status_t mca_ptl_gm_deregister_memory( struct gm_port *port, void *ptr, unsigned len ) -{ -#if OMPI_MCA_PTL_GM_SUPPORT_REGISTERING -#if OMPI_MCA_PTL_GM_CACHE_ENABLE - return gmpi_unuse_interval( port, (gm_up_t)ptr, len ); -#else - return gm_deregister_memory( port, ptr, len ); -#endif /* OMPI_MCA_PTL_GM_CACHE_ENABLE */ -#else - return GM_FAILURE; -#endif /* OMPI_MCA_PTL_GM_SUPPORT_REGISTERING */ -} diff --git a/ompi/mca/ptl/gm/ptl_gm_peer.h b/ompi/mca/ptl/gm/ptl_gm_peer.h deleted file mode 100644 index 28bb083858..0000000000 --- a/ompi/mca/ptl/gm/ptl_gm_peer.h +++ /dev/null @@ -1,72 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ - -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004 The Ohio State University. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_GM_PEER_H -#define MCA_PTL_GM_PEER_H - -#include "opal/class/opal_list.h" -#include "ompi/types.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/** - * Structure used to publish GM id information to peers. - */ -struct mca_ptl_gm_addr_t { -#if GM_API_VERSION > 0x200 - unsigned int global_id; -#else - char global_id[GM_MAX_HOST_NAME_LEN]; -#endif /* GM_API_VERSION > 0x200 */ - unsigned int local_id; - unsigned int port_id; -}; - -typedef struct mca_ptl_gm_addr_t mca_ptl_gm_addr_t; - -/** - * An abstraction that represents a connection to a peer process. - */ -struct mca_ptl_gm_peer_t { - opal_list_item_t super; - struct mca_ptl_gm_module_t* peer_ptl; - struct mca_ptl_gm_proc_t* peer_proc; - struct mca_ptl_gm_addr_t peer_addr; /**< address of peer */ - int num_credits; - int max_credits; - int resending; - int num_resend; - bool get_started; -}; -typedef struct mca_ptl_gm_peer_t mca_ptl_gm_peer_t; - -OBJ_CLASS_DECLARATION(mca_ptl_gm_peer_t); - - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif diff --git a/ompi/mca/ptl/gm/ptl_gm_priv.c b/ompi/mca/ptl/gm/ptl_gm_priv.c deleted file mode 100644 index e0a39f152e..0000000000 --- a/ompi/mca/ptl/gm/ptl_gm_priv.c +++ /dev/null @@ -1,1005 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ - -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004 The Ohio State University. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" - -#include "ompi/types.h" -#include "ompi/mca/ptl/base/ptl_base_sendreq.h" -#include "ompi/mca/ptl/base/ptl_base_header.h" -#include "ptl_gm.h" -#include "ptl_gm_peer.h" -#include "ptl_gm_proc.h" -#include "ptl_gm_sendfrag.h" -#include "ptl_gm_priv.h" -#include "ompi/mca/pml/teg/pml_teg_proc.h" - -static int mca_ptl_gm_send_quick_fin_message( struct mca_ptl_gm_peer_t* ptl_peer, - struct mca_ptl_base_frag_t* frag ); - -static void mca_ptl_gm_basic_frag_callback( struct gm_port* port, void* context, gm_status_t status ) -{ - mca_ptl_gm_module_t* gm_ptl; - mca_ptl_base_frag_t* frag_base; - mca_ptl_base_header_t* header; - - header = (mca_ptl_base_header_t*)context; - - frag_base = (mca_ptl_base_frag_t*)header->hdr_frag.hdr_src_ptr.pval; - gm_ptl = (mca_ptl_gm_module_t *)frag_base->frag_owner; - - switch( status ) { - case GM_SUCCESS: - OMPI_GM_FREE_LIST_RETURN( &(gm_ptl->gm_send_dma_frags), ((opal_list_item_t*)header) ); - /* release the send token */ - opal_atomic_add( &(gm_ptl->num_send_tokens), 1 ); - break; - case GM_SEND_TIMED_OUT: - opal_output( 0, "send_continue timed out\n" ); - break; - case GM_SEND_DROPPED: - opal_output( 0, "send_continue dropped\n" ); - break; - default: - opal_output( 0, "send_continue other error %d\n", status ); - } -} - -#define DO_DEBUG( INST ) - -#if OMPI_MCA_PTL_GM_HAVE_RDMA_GET -static inline -int mca_ptl_gm_receiver_advance_pipeline( mca_ptl_gm_recv_frag_t* frag, int onlyifget ); - -/* This function get called when the gm_get is finish (i.e. when the read from remote memory - * is completed. We have to send back the ack. If the original data was too large for just one - * fragment it will be split in severals. We have to send back for each of these fragments one - * ack. - */ -static void mca_ptl_gm_get_callback( struct gm_port *port, void * context, gm_status_t status ) -{ - mca_ptl_gm_recv_frag_t* frag = (mca_ptl_gm_recv_frag_t*)context; - mca_ptl_gm_peer_t* peer = (mca_ptl_gm_peer_t*)frag->frag_recv.frag_base.frag_peer; - - switch( status ) { - case GM_SUCCESS: - DO_DEBUG( opal_output( 0, "receiver %d %p get_callback processed %lld validated %lld", - orte_process_info.my_name->vpid, frag, frag->frag_bytes_processed, frag->frag_bytes_validated ); ) - /* send an ack message to the sender */ - mca_ptl_gm_send_quick_fin_message( peer, &(frag->frag_recv.frag_base) ); - peer->get_started = false; - /* mark the memory as being ready to be deregistered */ - frag->pipeline.lines[frag->pipeline.pos_deregister].flags |= PTL_GM_PIPELINE_DEREGISTER; - mca_ptl_gm_receiver_advance_pipeline( frag, 0 ); - break; - case GM_SEND_TIMED_OUT: - opal_output( 0, "mca_ptl_gm_get_callback timed out\n" ); - break; - case GM_SEND_DROPPED: - opal_output( 0, "mca_ptl_gm_get_callback dropped\n" ); - break; - default: - opal_output( 0, "mca_ptl_gm_get_callback other error %d\n", status ); - } -} - -static inline -int mca_ptl_gm_receiver_advance_pipeline( mca_ptl_gm_recv_frag_t* frag, int onlyifget ) -{ - mca_ptl_gm_peer_t* peer; - gm_status_t status; - mca_ptl_gm_pipeline_line_t *get_line, *reg_line, *dereg_line; - uint64_t length; - DO_DEBUG( int count = 0; char buffer[128]; ) - - peer = (mca_ptl_gm_peer_t*)frag->frag_recv.frag_base.frag_peer; - DO_DEBUG( count = sprintf( buffer, " %p", (void*)frag ); ) - /* start the current get */ - get_line = &(frag->pipeline.lines[frag->pipeline.pos_transfert]); - if( (PTL_GM_PIPELINE_TRANSFERT & get_line->flags) == PTL_GM_PIPELINE_TRANSFERT ) { - peer->get_started = true; - gm_get( peer->peer_ptl->gm_port, get_line->remote_memory.lval, - get_line->local_memory.pval, get_line->length, - GM_LOW_PRIORITY, peer->peer_addr.local_id, peer->peer_addr.port_id, - mca_ptl_gm_get_callback, frag ); - get_line->flags ^= PTL_GM_PIPELINE_REMOTE; - DO_DEBUG( count += sprintf( buffer + count, " start get %lld (%d)", get_line->length, frag->pipeline.pos_transfert ); ); - frag->pipeline.pos_transfert = (frag->pipeline.pos_transfert + 1) % GM_PIPELINE_DEPTH; - } else if( 1 == onlyifget ) goto check_completion_status; - - /* register the next segment */ - reg_line = &(frag->pipeline.lines[frag->pipeline.pos_register]); - length = frag->frag_recv.frag_base.frag_size - frag->frag_bytes_processed; - if( (0 != length) && !(reg_line->flags & PTL_GM_PIPELINE_REGISTER) ) { - reg_line->hdr_flags = get_line->hdr_flags; - reg_line->offset = get_line->offset + get_line->length; - reg_line->length = length; - if( reg_line->length > mca_ptl_gm_component.gm_rdma_frag_size ) - reg_line->length = mca_ptl_gm_component.gm_rdma_frag_size; - reg_line->local_memory.lval = 0L; - reg_line->local_memory.pval = (char*)frag->frag_recv.frag_base.frag_addr + - reg_line->offset; - status = mca_ptl_gm_register_memory( peer->peer_ptl->gm_port, reg_line->local_memory.pval, - reg_line->length ); - if( GM_SUCCESS != status ) { - opal_output( 0, "Cannot register receiver memory (%p, %ld) bytes offset %ld\n", - reg_line->local_memory.pval, reg_line->length, reg_line->offset ); - return OMPI_ERROR; - } - DO_DEBUG( count += sprintf( buffer + count, " start register %lld offset %lld processed %lld(%d)", - reg_line->length, reg_line->offset, frag->frag_bytes_processed, - frag->pipeline.pos_register ); ); - reg_line->flags |= PTL_GM_PIPELINE_REGISTER; - frag->frag_bytes_processed += reg_line->length; - frag->pipeline.pos_register = (frag->pipeline.pos_register + 1) % GM_PIPELINE_DEPTH; - } - - /* deregister the previous one */ - dereg_line = &(frag->pipeline.lines[frag->pipeline.pos_deregister]); - if( dereg_line->flags & PTL_GM_PIPELINE_DEREGISTER ) { /* something usefull */ - status = mca_ptl_gm_deregister_memory( peer->peer_ptl->gm_port, - dereg_line->local_memory.pval, dereg_line->length ); - if( GM_SUCCESS != status ) { - opal_output( 0, "unpinning receiver memory from get (%p, %u) failed \n", - dereg_line->local_memory.pval, - dereg_line->length ); - } - dereg_line->flags ^= (PTL_GM_PIPELINE_DEREGISTER|PTL_GM_PIPELINE_REGISTER); - assert( dereg_line->flags == 0 ); - frag->frag_bytes_validated += dereg_line->length; - DO_DEBUG( count += sprintf( buffer + count, " start deregister %lld offset %lld (%d)", dereg_line->length, - dereg_line->offset, frag->pipeline.pos_deregister ); ) - frag->pipeline.pos_deregister = (frag->pipeline.pos_deregister + 1) % GM_PIPELINE_DEPTH; - } - check_completion_status: - if( frag->frag_recv.frag_base.frag_size <= frag->frag_bytes_validated ) { - peer->peer_ptl->super.ptl_recv_progress( (mca_ptl_base_module_t*)peer->peer_ptl, - frag->frag_recv.frag_request, frag->frag_recv.frag_base.frag_size, - frag->frag_recv.frag_base.frag_size ); - OMPI_FREE_LIST_RETURN( &(peer->peer_ptl->gm_recv_frags_free), (opal_list_item_t*)frag ); - DO_DEBUG( count += sprintf( buffer + count, " finish" ); ) - } - DO_DEBUG( opal_output( 0, "receiver %d %s", orte_process_info.my_name->vpid, buffer ); ) - return OMPI_SUCCESS; -} - -static inline -int mca_ptl_gm_sender_advance_pipeline( mca_ptl_gm_send_frag_t* frag ) -{ - mca_ptl_gm_peer_t* peer; - gm_status_t status; - mca_ptl_gm_pipeline_line_t *send_line, *reg_line, *dereg_line; - mca_ptl_gm_frag_header_t* hdr; - DO_DEBUG( int count = 0; char buffer[256]; ) - - peer = (mca_ptl_gm_peer_t*)frag->frag_send.frag_base.frag_peer; - DO_DEBUG( count = sprintf( buffer, " %p", (void*)frag ); ) - /* send current segment */ - send_line = &(frag->pipeline.lines[frag->pipeline.pos_transfert]); - if( (send_line->flags & PTL_GM_PIPELINE_TRANSFERT) == PTL_GM_PIPELINE_TRANSFERT ) { - opal_list_item_t* item; - int32_t rc; - - OMPI_FREE_LIST_WAIT( &(peer->peer_ptl->gm_send_dma_frags), item, rc ); - opal_atomic_sub( &(peer->peer_ptl->num_send_tokens), 1 ); - hdr = (mca_ptl_gm_frag_header_t*)item; - - hdr->hdr_frag.hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG; - hdr->hdr_frag.hdr_common.hdr_flags = send_line->hdr_flags | - frag->frag_send.frag_base.frag_header.hdr_common.hdr_flags; - hdr->hdr_frag.hdr_src_ptr.lval = 0L; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */ - hdr->hdr_frag.hdr_src_ptr.pval = frag; - hdr->hdr_frag.hdr_dst_ptr = frag->frag_send.frag_base.frag_header.hdr_ack.hdr_dst_match; - hdr->hdr_frag.hdr_frag_offset = send_line->offset; - hdr->hdr_frag.hdr_frag_length = send_line->length; - hdr->registered_memory = send_line->local_memory; - - gm_send_with_callback( peer->peer_ptl->gm_port, hdr, - GM_SIZE, sizeof(mca_ptl_gm_frag_header_t), - GM_HIGH_PRIORITY, peer->peer_addr.local_id, peer->peer_addr.port_id, - mca_ptl_gm_basic_frag_callback, (void*)hdr ); - - send_line->flags ^= PTL_GM_PIPELINE_REMOTE; - frag->pipeline.pos_transfert = (frag->pipeline.pos_transfert + 1) % GM_PIPELINE_DEPTH; - DO_DEBUG( count += sprintf( buffer + count, " send new fragment %lld", send_line->length ); ) - } - - /* deregister previous segment */ - dereg_line = &(frag->pipeline.lines[frag->pipeline.pos_deregister]); - if( dereg_line->flags & PTL_GM_PIPELINE_DEREGISTER ) { /* something usefull */ - status = mca_ptl_gm_deregister_memory( peer->peer_ptl->gm_port, - dereg_line->local_memory.pval, dereg_line->length ); - if( GM_SUCCESS != status ) { - opal_output( 0, "unpinning receiver memory from get (%p, %u) failed \n", - dereg_line->local_memory.pval, dereg_line->length ); - } - dereg_line->flags ^= (PTL_GM_PIPELINE_REGISTER | PTL_GM_PIPELINE_DEREGISTER); - assert( dereg_line->flags == 0 ); - frag->frag_bytes_validated += dereg_line->length; - frag->pipeline.pos_deregister = (frag->pipeline.pos_deregister + 1) % GM_PIPELINE_DEPTH; - DO_DEBUG( count += sprintf( buffer + count, " start deregister %lld offset %lld (validated %lld)", - dereg_line->length, dereg_line->offset, frag->frag_bytes_validated ); ) - } - - /* register next segment */ - reg_line = &(frag->pipeline.lines[frag->pipeline.pos_register]); - if( !(reg_line->flags & PTL_GM_PIPELINE_REGISTER) ) { - reg_line->length = frag->frag_send.frag_base.frag_size - frag->frag_bytes_processed; - if( 0 != reg_line->length ) { - reg_line->hdr_flags = frag->frag_send.frag_base.frag_header.hdr_common.hdr_flags; - if( reg_line->length > mca_ptl_gm_component.gm_rdma_frag_size ) { - reg_line->length = mca_ptl_gm_component.gm_rdma_frag_size; - } else { - reg_line->hdr_flags |= PTL_FLAG_GM_LAST_FRAGMENT; - } - reg_line->offset = send_line->offset + send_line->length; - reg_line->local_memory.lval = 0L; - reg_line->local_memory.pval = (char*)frag->frag_send.frag_base.frag_addr + - reg_line->offset; - status = mca_ptl_gm_register_memory( peer->peer_ptl->gm_port, reg_line->local_memory.pval, - reg_line->length ); - if( GM_SUCCESS != status ) { - opal_output( 0, "Cannot register sender memory (%p, %ld) bytes offset %ld\n", - reg_line->local_memory.pval, reg_line->length, reg_line->offset ); - return OMPI_ERROR; - } - reg_line->flags |= PTL_GM_PIPELINE_TRANSFERT; - frag->frag_bytes_processed += reg_line->length; - frag->pipeline.pos_register = (frag->pipeline.pos_register + 1) % GM_PIPELINE_DEPTH; - DO_DEBUG( count += sprintf( buffer + count, " start register %lld offset %lld", - reg_line->length, reg_line->offset ); ) - } - } - - DO_DEBUG( opal_output( 0, "sender %d %s", orte_process_info.my_name->vpid, buffer ); ) - return OMPI_SUCCESS; -} -#endif /* OMPI_MCA_PTL_GM_HAVE_RDMA_GET */ - -static inline -int mca_ptl_gm_send_internal_rndv_header( mca_ptl_gm_peer_t *ptl_peer, - mca_ptl_gm_send_frag_t *fragment, - mca_ptl_gm_frag_header_t* hdr, - int flags ) -{ - struct iovec iov; - uint32_t in_size; - size_t max_data; - int32_t freeAfter; - ompi_convertor_t *convertor = &(fragment->frag_send.frag_base.frag_convertor); - - iov.iov_base = (char*)hdr + sizeof(mca_ptl_gm_frag_header_t); - iov.iov_len = fragment->frag_send.frag_base.frag_size - fragment->frag_bytes_processed; - if( iov.iov_len > (mca_ptl_gm_component.gm_segment_size - sizeof(mca_ptl_gm_frag_header_t)) ) - iov.iov_len = (mca_ptl_gm_component.gm_segment_size - sizeof(mca_ptl_gm_frag_header_t)); - max_data = iov.iov_len; - in_size = 1; - - if( ompi_convertor_pack(convertor, &(iov), &in_size, &max_data, &freeAfter) < 0) - return OMPI_ERROR; - - hdr->hdr_frag.hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG; - hdr->hdr_frag.hdr_common.hdr_flags = flags; - hdr->hdr_frag.hdr_src_ptr.lval = 0L; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */ - hdr->hdr_frag.hdr_src_ptr.pval = fragment; - hdr->hdr_frag.hdr_dst_ptr = fragment->frag_send.frag_request->req_peer_match; - hdr->hdr_frag.hdr_frag_offset = fragment->frag_offset + fragment->frag_bytes_processed; - hdr->hdr_frag.hdr_frag_length = fragment->frag_send.frag_base.frag_size - - fragment->frag_bytes_processed; - hdr->registered_memory.lval = 0L; - hdr->registered_memory.pval = NULL; - - DO_DEBUG( opal_output( 0, "sender %d before send internal rndv header hdr_offset %lld hdr_length %lld max_data %u", - orte_process_info.my_name->vpid, hdr->hdr_frag.hdr_frag_offset, hdr->hdr_frag.hdr_frag_length, max_data ); ); - gm_send_with_callback( ptl_peer->peer_ptl->gm_port, hdr, GM_SIZE, - sizeof(mca_ptl_gm_frag_header_t) + max_data, - GM_LOW_PRIORITY, ptl_peer->peer_addr.local_id, ptl_peer->peer_addr.port_id, - mca_ptl_gm_basic_frag_callback, (void *)hdr ); - fragment->frag_bytes_processed += max_data; - fragment->frag_bytes_validated += max_data; - DO_DEBUG( opal_output( 0, "sender %d after send internal rndv header processed %lld, validated %lld max_data %u", - orte_process_info.my_name->vpid, fragment->frag_bytes_processed, fragment->frag_bytes_validated, max_data ); ); - return OMPI_SUCCESS; -} - -static inline -int mca_ptl_gm_send_burst_data( mca_ptl_gm_peer_t *ptl_peer, - mca_ptl_gm_send_frag_t *fragment, - uint32_t burst_length, - mca_ptl_base_frag_header_t* hdr, - int32_t flags ) -{ - int32_t freeAfter, rc; - uint32_t in_size; - size_t max_data; - struct iovec iov; - ompi_convertor_t *convertor = &(fragment->frag_send.frag_base.frag_convertor); - - while( 0 < burst_length ) { /* send everything for the burst_length size */ - if( NULL == hdr ) { - opal_list_item_t* item; - OMPI_FREE_LIST_WAIT( &(ptl_peer->peer_ptl->gm_send_dma_frags), item, rc ); - opal_atomic_sub( &(ptl_peer->peer_ptl->num_send_tokens), 1 ); - hdr = (mca_ptl_base_frag_header_t*)item; - } - iov.iov_base = (char*)hdr + sizeof(mca_ptl_base_frag_header_t); - iov.iov_len = mca_ptl_gm_component.gm_segment_size - sizeof(mca_ptl_base_frag_header_t); - if( iov.iov_len >= burst_length ) - iov.iov_len = burst_length; - max_data = iov.iov_len; - in_size = 1; - - if( ompi_convertor_pack(convertor, &(iov), &in_size, &max_data, &freeAfter) < 0) - return OMPI_ERROR; - - hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG; - hdr->hdr_common.hdr_flags = flags; - hdr->hdr_src_ptr.lval = 0L; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */ - hdr->hdr_src_ptr.pval = fragment; - hdr->hdr_dst_ptr = fragment->frag_send.frag_request->req_peer_match; - assert( hdr->hdr_dst_ptr.pval != NULL ); - hdr->hdr_frag_offset = fragment->frag_offset + fragment->frag_bytes_processed; - hdr->hdr_frag_length = max_data; - - fragment->frag_bytes_processed += max_data; - fragment->frag_bytes_validated += max_data; - burst_length -= max_data; - if( fragment->frag_send.frag_base.frag_size == fragment->frag_bytes_processed ) { - assert( 0 == burst_length ); - hdr->hdr_common.hdr_flags |= PTL_FLAG_GM_LAST_FRAGMENT; - } - /* for the last piece set the header type to FIN */ - gm_send_with_callback( ptl_peer->peer_ptl->gm_port, hdr, GM_SIZE, - iov.iov_len + sizeof(mca_ptl_base_frag_header_t), - GM_LOW_PRIORITY, ptl_peer->peer_addr.local_id, ptl_peer->peer_addr.port_id, - mca_ptl_gm_basic_frag_callback, (void*)hdr ); - hdr = NULL; /* force to retrieve a new one on the next loop */ - } - DO_DEBUG( opal_output( 0, "sender %d after burst offset %lld, processed %lld, validated %lld\n", - orte_process_info.my_name->vpid, fragment->frag_offset, fragment->frag_bytes_processed, fragment->frag_bytes_validated); ); - return OMPI_SUCCESS; -} - -int mca_ptl_gm_peer_send_continue( mca_ptl_gm_peer_t *ptl_peer, - mca_ptl_gm_send_frag_t *fragment, - struct mca_ptl_base_send_request_t *sendreq, - size_t offset, - size_t *size, - int flags ) -{ - mca_ptl_gm_frag_header_t* hdr; - uint64_t remaining_bytes, burst_length; - opal_list_item_t *item; - int rc = 0; -#if OMPI_MCA_PTL_GM_HAVE_RDMA_GET - gm_status_t status; - mca_ptl_gm_pipeline_line_t* pipeline; -#endif /* OMPI_MCA_PTL_GM_HAVE_RDMA_GET */ - - fragment->frag_offset = offset; - - /* must update the offset after actual fragment size is determined - * before attempting to send the fragment - */ - mca_ptl_base_send_request_offset( fragment->frag_send.frag_request, - fragment->frag_send.frag_base.frag_size ); - DO_DEBUG( opal_output( 0, "sender %d start new send length %ld offset %ld\n", orte_process_info.my_name->vpid, *size, offset ); ) - /* The first DMA memory buffer has been alocated in same time as the fragment */ - item = (opal_list_item_t*)fragment->send_buf; - hdr = (mca_ptl_gm_frag_header_t*)item; - remaining_bytes = fragment->frag_send.frag_base.frag_size - fragment->frag_bytes_processed; - if( remaining_bytes < mca_ptl_gm_component.gm_eager_limit ) { - burst_length = remaining_bytes; - } else { -#if OMPI_MCA_PTL_GM_HAVE_RDMA_GET - if( remaining_bytes < mca_ptl_gm_component.gm_rndv_burst_limit ) { - burst_length = remaining_bytes % (mca_ptl_gm_component.gm_segment_size - sizeof(mca_ptl_base_frag_header_t)); - } else { - if( mca_ptl_gm_component.gm_rdma_frag_size == UINT_MAX ) - burst_length = 0; - else - burst_length = remaining_bytes % mca_ptl_gm_component.gm_rdma_frag_size; - } -#else - /*burst_length = remaining_bytes % (mca_ptl_gm_component.gm_segment_size - sizeof(mca_ptl_base_frag_header_t));*/ - burst_length = (mca_ptl_gm_component.gm_segment_size - sizeof(mca_ptl_base_frag_header_t)); -#endif /* OMPI_MCA_PTL_GM_HAVE_RDMA_GET */ - } - - if( burst_length > 0 ) { - mca_ptl_gm_send_burst_data( ptl_peer, fragment, burst_length, &(hdr->hdr_frag), flags ); - item = NULL; /* this buffer was already used by the mca_ptl_gm_send_burst_data function */ - DO_DEBUG( opal_output( 0, "sender %d burst %ld bytes", orte_process_info.my_name->vpid, burst_length ); ); - } - - if( fragment->frag_send.frag_base.frag_size == fragment->frag_bytes_processed ) { - *size = fragment->frag_bytes_processed; - if( !(flags & MCA_PTL_FLAGS_ACK) ) { - ptl_peer->peer_ptl->super.ptl_send_progress( (mca_ptl_base_module_t*)ptl_peer->peer_ptl, - fragment->frag_send.frag_request, - (*size) ); - OMPI_FREE_LIST_RETURN( &(ptl_peer->peer_ptl->gm_send_frags), ((opal_list_item_t*)fragment) ); - } - return OMPI_SUCCESS; - } - if( NULL == item ) { - OMPI_FREE_LIST_WAIT( &(ptl_peer->peer_ptl->gm_send_dma_frags), item, rc ); - opal_atomic_sub( &(ptl_peer->peer_ptl->num_send_tokens), 1 ); - hdr = (mca_ptl_gm_frag_header_t*)item; - } - - /* Large set of data => we have to setup a rendez-vous protocol. Here we can - * use the match header already filled in by the upper level and just complete it - * with the others informations. When we reach this point the rendez-vous protocol - * has already been realized so we know that the receiver expect our message. - */ -#if OMPI_MCA_PTL_GM_HAVE_RDMA_GET - /* Trigger the long rendez-vous protocol only if gm_get is supported */ - if( remaining_bytes > mca_ptl_gm_component.gm_rndv_burst_limit ) - flags |= PTL_FLAG_GM_REQUIRE_LOCK; -#endif /* OMPI_MCA_PTL_GM_HAVE_RDMA_GET */ - mca_ptl_gm_send_internal_rndv_header( ptl_peer, fragment, hdr, flags ); - if( !(PTL_FLAG_GM_REQUIRE_LOCK & flags) ) - return OMPI_SUCCESS; - -#if OMPI_MCA_PTL_GM_HAVE_RDMA_GET - pipeline = &(fragment->pipeline.lines[0]); - pipeline->length = fragment->frag_send.frag_base.frag_size - fragment->frag_bytes_processed; - if( pipeline->length > mca_ptl_gm_component.gm_rdma_frag_size ) { - pipeline->length = mca_ptl_gm_component.gm_rdma_frag_size; - } - pipeline->offset = fragment->frag_offset + fragment->frag_bytes_processed; - pipeline->hdr_flags = fragment->frag_send.frag_base.frag_header.hdr_common.hdr_flags; - pipeline->local_memory.lval = 0L; - pipeline->local_memory.pval = (char*)fragment->frag_send.frag_base.frag_addr + pipeline->offset; - status = mca_ptl_gm_register_memory( ptl_peer->peer_ptl->gm_port, pipeline->local_memory.pval, - pipeline->length ); - if( GM_SUCCESS != status ) { - opal_output( 0, "Cannot register sender memory (%p, %ld) bytes offset %ld\n", - pipeline->local_memory.pval, pipeline->length, pipeline->offset ); - } - pipeline->flags = PTL_GM_PIPELINE_TRANSFERT; - fragment->frag_bytes_processed += pipeline->length; - DO_DEBUG( opal_output( 0, "sender %d %p start register %lld (%d)", orte_process_info.my_name->vpid, - fragment, pipeline->length, fragment->pipeline.pos_register ); ) - fragment->pipeline.pos_register = (fragment->pipeline.pos_register + 1) % GM_PIPELINE_DEPTH; - /* Now we are waiting for the ack message. Meanwhile we can register the sender first piece - * of data. In this way we have a recovery between the expensive registration on both sides. - */ -#else - assert( 0 ); -#endif /* OMPI_MCA_PTL_GM_HAVE_RDMA_GET */ - return OMPI_SUCCESS; -} - -static void send_match_callback( struct gm_port* port, void* context, gm_status_t status ) -{ - mca_ptl_gm_module_t* gm_ptl; - mca_ptl_base_header_t* header = (mca_ptl_base_header_t*)context; - - gm_ptl = (mca_ptl_gm_module_t*)((long)header->hdr_rndv.hdr_frag_length); - - OMPI_GM_FREE_LIST_RETURN( &(gm_ptl->gm_send_dma_frags), ((opal_list_item_t*)header) ); - /* release the send token */ - opal_atomic_add( &(gm_ptl->num_send_tokens), 1 ); -} - -/* This function is used for the initial send. For small size messages the data will be attached - * to the header, when for long size messages we will setup a rendez-vous protocol. We dont need - * to fill a fragment description here as all that we need is the request pointer. In same time - * even if we fill a fragment it will be lost as soon as we get the answer from the remote node - * and we will be unable to reuse any informations stored inside (like the convertor). - */ -int mca_ptl_gm_peer_send( struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_peer_t* ptl_base_peer, - struct mca_ptl_base_send_request_t *sendreq, - size_t offset, - size_t size, - int flags ) -{ - const int header_length = sizeof(mca_ptl_base_rendezvous_header_t); - mca_ptl_base_header_t* hdr; - mca_ptl_gm_module_t* ptl_gm = (mca_ptl_gm_module_t*)ptl; - ompi_convertor_t *convertor = NULL; - int rc, freeAfter; - size_t max_data = 0; - mca_ptl_gm_peer_t* ptl_peer = (mca_ptl_gm_peer_t*)ptl_base_peer; - opal_list_item_t *item; - char* sendbuf; - - OMPI_FREE_LIST_WAIT( &(ptl_gm->gm_send_dma_frags), item, rc ); - opal_atomic_sub( &(ptl_gm->num_send_tokens), 1 ); - sendbuf = (char*)item; - - hdr = (mca_ptl_base_header_t*)item; - - /* Populate the header with the match informations */ - (void)mca_ptl_gm_init_header_rndv( hdr, sendreq, flags ); - hdr->hdr_rndv.hdr_frag_length = (uint64_t)((long)ptl); - - if( size > 0 ) { - struct iovec iov; - uint32_t iov_count = 1; - - convertor = &sendreq->req_send.req_convertor; - /* We send here the first fragment, and the convertor does not need any - * particular options. Thus, we can use the one already prepared on the - * request. - */ - - if( (size + header_length) <= mca_ptl_gm_component.gm_segment_size ) - iov.iov_len = size; - else - iov.iov_len = mca_ptl_gm_component.gm_segment_size - header_length; - - /* copy the data to the registered buffer */ - iov.iov_base = ((char*)hdr) + header_length; - max_data = iov.iov_len; - if((rc = ompi_convertor_pack(convertor, &(iov), &iov_count, &max_data, &freeAfter)) < 0) - return OMPI_ERROR; - - assert( max_data != 0 ); - /* must update the offset after actual fragment size is determined - * before attempting to send the fragment - */ - mca_ptl_base_send_request_offset( sendreq, max_data ); - } - /* Send the first fragment */ - gm_send_with_callback( ptl_gm->gm_port, hdr, - GM_SIZE, max_data + header_length, GM_LOW_PRIORITY, - ptl_peer->peer_addr.local_id, ptl_peer->peer_addr.port_id, - send_match_callback, (void *)hdr ); - - if( !(flags & MCA_PTL_FLAGS_ACK) ) { - ptl->ptl_send_progress( ptl, sendreq, max_data ); - DO_DEBUG( opal_output( 0, "sender %d complete request %p w/o rndv with %d bytes", - orte_process_info.my_name->vpid, sendreq, max_data ); ); - } else { - DO_DEBUG( opal_output( 0, "sender %d sent request %p for rndv with %d bytes", - orte_process_info.my_name->vpid, sendreq, max_data ); ); - } - - return OMPI_SUCCESS; -} - -static mca_ptl_gm_recv_frag_t* -mca_ptl_gm_recv_frag_ctrl( struct mca_ptl_gm_module_t *ptl, - mca_ptl_base_header_t * header, uint32_t msg_len ) -{ - mca_ptl_base_send_request_t *req; - - assert( MCA_PTL_FLAGS_ACK & header->hdr_common.hdr_flags ); - req = (mca_ptl_base_send_request_t*)(header->hdr_ack.hdr_src_ptr.pval); - req->req_peer_match = header->hdr_ack.hdr_dst_match; - req->req_peer_addr = header->hdr_ack.hdr_dst_addr; - req->req_peer_size = header->hdr_ack.hdr_dst_size; - DO_DEBUG( opal_output( 0, "sender %d get back the rendez-vous for request %p", - orte_process_info.my_name->vpid, req ); ); - ptl->super.ptl_send_progress( (mca_ptl_base_module_t*)ptl, req, req->req_offset ); - - return NULL; -} - -/* We get a RNDV header in two situations: - * - when the remote node need a ack - * - when we set a rendez-vous protocol with the remote node. - * In both cases we have to send an ack back. - */ -static mca_ptl_gm_recv_frag_t* -mca_ptl_gm_recv_frag_match( struct mca_ptl_gm_module_t *ptl, - mca_ptl_base_header_t* hdr, uint32_t msg_len ) -{ - mca_ptl_gm_recv_frag_t* recv_frag; - bool matched; - - /* allocate a receive fragment */ - recv_frag = mca_ptl_gm_alloc_recv_frag( (struct mca_ptl_base_module_t*)ptl ); - - if( MCA_PTL_HDR_TYPE_MATCH == hdr->hdr_rndv.hdr_match.hdr_common.hdr_type ) { - recv_frag->frag_recv.frag_base.frag_addr = - (char*)hdr + sizeof(mca_ptl_base_match_header_t); - recv_frag->frag_recv.frag_base.frag_size = hdr->hdr_match.hdr_msg_length; - } else { - assert( MCA_PTL_HDR_TYPE_RNDV == hdr->hdr_rndv.hdr_match.hdr_common.hdr_type ); - recv_frag->frag_recv.frag_base.frag_addr = - (char*)hdr + sizeof(mca_ptl_base_rendezvous_header_t); - recv_frag->frag_recv.frag_base.frag_size = hdr->hdr_rndv.hdr_match.hdr_msg_length; - } - recv_frag->frag_recv.frag_is_buffered = false; - recv_frag->have_allocated_buffer = false; - recv_frag->attached_data_length = msg_len - sizeof(mca_ptl_base_rendezvous_header_t); - recv_frag->frag_recv.frag_base.frag_peer = NULL; - recv_frag->frag_recv.frag_base.frag_header.hdr_rndv = hdr->hdr_rndv; - matched = ptl->super.ptl_match( &(ptl->super), - &(recv_frag->frag_recv), - &(recv_frag->frag_recv.frag_base.frag_header.hdr_match) ); - if( true == matched ) return NULL; /* done and fragment already removed */ - - /* get some memory and copy the data inside. We can then release the receive buffer */ - if( 0 != recv_frag->attached_data_length ) { - char* ptr = (char*)mca_ptl_gm_get_local_buffer(); - recv_frag->have_allocated_buffer = true; - memcpy( ptr, recv_frag->frag_recv.frag_base.frag_addr, recv_frag->attached_data_length ); - recv_frag->frag_recv.frag_base.frag_addr = ptr; - } else { - recv_frag->frag_recv.frag_base.frag_addr = NULL; - } - recv_frag->matched = false; - - return recv_frag; -} - -static void recv_short_callback( struct gm_port* port, void* context, gm_status_t status ) -{ - mca_ptl_gm_module_t* gm_ptl; - mca_ptl_base_frag_t* frag_base; - mca_ptl_base_ack_header_t* header; - - header = (mca_ptl_base_ack_header_t*)context; - - frag_base = (mca_ptl_base_frag_t*)header->hdr_dst_match.pval; - gm_ptl = (mca_ptl_gm_module_t *)frag_base->frag_owner; - - OMPI_GM_FREE_LIST_RETURN( &(gm_ptl->gm_send_dma_frags), ((opal_list_item_t*)header) ); - /* release the send token */ - opal_atomic_add( &(gm_ptl->num_send_tokens), 1 ); -} - -static int mca_ptl_gm_send_quick_fin_message( struct mca_ptl_gm_peer_t* ptl_peer, - struct mca_ptl_base_frag_t* frag ) -{ - opal_list_item_t *item; - mca_ptl_base_header_t *hdr; - int rc; - - OMPI_FREE_LIST_WAIT( &(ptl_peer->peer_ptl->gm_send_dma_frags), item, rc ); - opal_atomic_sub( &(ptl_peer->peer_ptl->num_send_tokens), 1 ); - hdr = (mca_ptl_base_header_t*)item; - - hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FIN; - hdr->hdr_common.hdr_flags = PTL_FLAG_GM_HAS_FRAGMENT | frag->frag_header.hdr_common.hdr_flags; - hdr->hdr_ack.hdr_src_ptr.pval = frag->frag_header.hdr_frag.hdr_src_ptr.pval; - hdr->hdr_ack.hdr_dst_match.lval = 0; - hdr->hdr_ack.hdr_dst_match.pval = frag; - hdr->hdr_ack.hdr_dst_addr.lval = 0; /*we are filling both p and val of dest address */ - hdr->hdr_ack.hdr_dst_addr.pval = NULL; - hdr->hdr_ack.hdr_dst_size = frag->frag_header.hdr_frag.hdr_frag_length; - - gm_send_with_callback(ptl_peer->peer_ptl->gm_port, hdr, - GM_SIZE, sizeof(mca_ptl_base_ack_header_t), - GM_HIGH_PRIORITY, ptl_peer->peer_addr.local_id, ptl_peer->peer_addr.port_id, - recv_short_callback, (void*)hdr ); - DO_DEBUG( opal_output( 0, "receiver %d %p send quick message for length %lld", orte_process_info.my_name->vpid, - frag, frag->frag_header.hdr_frag.hdr_frag_length ); ) - return OMPI_SUCCESS; -} - -static mca_ptl_gm_recv_frag_t* -mca_ptl_gm_recv_frag_frag( struct mca_ptl_gm_module_t* ptl, - mca_ptl_gm_frag_header_t* hdr, uint32_t msg_len ) -{ - mca_ptl_base_recv_request_t *request; - ompi_convertor_t local_convertor, *convertor; - struct iovec iov; - uint32_t iov_count, header_length; - size_t max_data = 0; - int32_t freeAfter, rc; - mca_ptl_gm_recv_frag_t* frag; - - header_length = sizeof(mca_ptl_base_frag_header_t); - if( hdr->hdr_frag.hdr_common.hdr_flags & PTL_FLAG_GM_HAS_FRAGMENT ) { - frag = (mca_ptl_gm_recv_frag_t*)hdr->hdr_frag.hdr_dst_ptr.pval; - frag->frag_recv.frag_base.frag_header.hdr_frag = hdr->hdr_frag; - request = (mca_ptl_base_recv_request_t*)frag->frag_recv.frag_request; - /* here we can have a synchronisation problem if several threads work in same time - * with the same request. The only question is if it's possible ? - */ - convertor = &(frag->frag_recv.frag_base.frag_convertor); - DO_DEBUG( opal_output( 0, "receiver %d get message tagged as HAS_FRAGMENT", orte_process_info.my_name->vpid ); ); - if( PTL_FLAG_GM_REQUIRE_LOCK & hdr->hdr_frag.hdr_common.hdr_flags ) - header_length = sizeof(mca_ptl_gm_frag_header_t); - } else { - request = (mca_ptl_base_recv_request_t*)hdr->hdr_frag.hdr_dst_ptr.pval; - - if( hdr->hdr_frag.hdr_frag_length <= (mca_ptl_gm_component.gm_segment_size - - sizeof(mca_ptl_base_frag_header_t)) ) { - convertor = &local_convertor; - request->req_recv.req_base.req_proc = - ompi_comm_peer_lookup( request->req_recv.req_base.req_comm, - request->req_recv.req_base.req_ompi.req_status.MPI_SOURCE ); - frag = NULL; - } else { /* large message => we have to create a receive fragment */ - frag = mca_ptl_gm_alloc_recv_frag( (struct mca_ptl_base_module_t*)ptl ); - frag->frag_recv.frag_request = request; - frag->frag_offset = hdr->hdr_frag.hdr_frag_offset; - frag->matched = true; - frag->frag_recv.frag_base.frag_addr = frag->frag_recv.frag_request->req_recv.req_base.req_addr; - frag->frag_recv.frag_base.frag_size = hdr->hdr_frag.hdr_frag_length; - frag->frag_recv.frag_base.frag_peer = (struct mca_ptl_base_peer_t*) - mca_pml_teg_proc_lookup_remote_peer( request->req_recv.req_base.req_comm, - request->req_recv.req_base.req_ompi.req_status.MPI_SOURCE, - (struct mca_ptl_base_module_t*)ptl ); - /* send an ack message to the sender ... quick hack (TODO) */ - frag->frag_recv.frag_base.frag_header.hdr_frag = hdr->hdr_frag; - frag->frag_recv.frag_base.frag_header.hdr_frag.hdr_frag_length = 0; - mca_ptl_gm_send_quick_fin_message( (mca_ptl_gm_peer_t*)frag->frag_recv.frag_base.frag_peer, - &(frag->frag_recv.frag_base) ); - header_length = sizeof(mca_ptl_gm_frag_header_t); - frag->frag_recv.frag_base.frag_header.hdr_frag.hdr_frag_length = hdr->hdr_frag.hdr_frag_length; - convertor = &(frag->frag_recv.frag_base.frag_convertor); - DO_DEBUG( opal_output( 0, "receiver %d create fragment with offset %lld and length %lld", - orte_process_info.my_name->vpid, frag->frag_offset, frag->frag_recv.frag_base.frag_size ); ); - } - /* GM does not use any of the convertor specializations, so we can just clone the - * standard convertor attached to the request and set the position. - */ - ompi_convertor_clone_with_position( &(request->req_recv.req_convertor), - convertor, 1, - (size_t*)&(hdr->hdr_frag.hdr_frag_offset) ); - } - - if( header_length != msg_len ) { - iov.iov_base = (char*)hdr + header_length; - iov.iov_len = msg_len - header_length; - iov_count = 1; - max_data = iov.iov_len; - freeAfter = 0; /* unused here */ - rc = ompi_convertor_unpack( convertor, &iov, &iov_count, &max_data, &freeAfter ); - assert( 0 == freeAfter ); - /* If we are in a short burst mode then update the request */ - if( NULL == frag ) { - ptl->super.ptl_recv_progress( (mca_ptl_base_module_t*)ptl, request, max_data, max_data ); - return NULL; - } - } - - /* Update the status of the fragment depending on the amount of data converted so far */ - frag->frag_bytes_processed += max_data; - frag->frag_bytes_validated += max_data; - if( !(PTL_FLAG_GM_REQUIRE_LOCK & hdr->hdr_frag.hdr_common.hdr_flags) ) { - if( frag->frag_bytes_validated == frag->frag_recv.frag_base.frag_size ) { - ptl->super.ptl_recv_progress( (mca_ptl_base_module_t*)ptl, request, - frag->frag_recv.frag_base.frag_size, - frag->frag_recv.frag_base.frag_size ); - OMPI_FREE_LIST_RETURN( &(((mca_ptl_gm_peer_t*)frag->frag_recv.frag_base.frag_peer)->peer_ptl->gm_recv_frags_free), (opal_list_item_t*)frag ); - } - DO_DEBUG( opal_output( 0, "receiver %d waiting for burst with fragment ...", orte_process_info.my_name->vpid ); ); - return NULL; - } - -#if OMPI_MCA_PTL_GM_HAVE_RDMA_GET - { - mca_ptl_gm_pipeline_line_t* pipeline; - - /* There is a kind of rendez-vous protocol used internally by the GM driver. If the amount of data - * to transfert is large enough, then the sender will start sending a frag message with the - * remote_memory set to NULL (but with the length set to the length of the first fragment). - * It will allow the receiver to start to register it's own memory. Later when the receiver - * get a fragment with the remote_memory field not NULL it can start getting the data. - */ - if( NULL == hdr->registered_memory.pval ) { /* first round of the local rendez-vous protocol */ - pipeline = &(frag->pipeline.lines[0]); - pipeline->hdr_flags = hdr->hdr_frag.hdr_common.hdr_flags; - pipeline->offset = frag->frag_offset + frag->frag_bytes_processed; - pipeline->length = 0; /* we can lie about this one */ - mca_ptl_gm_receiver_advance_pipeline( frag, 0 ); - } else { - pipeline = &(frag->pipeline.lines[frag->pipeline.pos_remote]); - DO_DEBUG( opal_output( 0, "receiver %d %p get remote memory length %lld (%d)\n", - orte_process_info.my_name->vpid, frag, hdr->hdr_frag.hdr_frag_length, frag->pipeline.pos_remote ); ); - frag->pipeline.pos_remote = (frag->pipeline.pos_remote + 1) % GM_PIPELINE_DEPTH; - assert( (pipeline->flags & PTL_GM_PIPELINE_REMOTE) == 0 ); - pipeline->remote_memory = hdr->registered_memory; - pipeline->flags |= PTL_GM_PIPELINE_REMOTE; - mca_ptl_gm_receiver_advance_pipeline( frag, 0 ); - } - } -#else - assert( 0 ); -#endif /* OMPI_MCA_PTL_GM_HAVE_RDMA_GET */ - - return NULL; -} - -static mca_ptl_gm_recv_frag_t* -mca_ptl_gm_recv_frag_fin( struct mca_ptl_gm_module_t* ptl, - mca_ptl_base_header_t* hdr, uint32_t msg_len ) -{ - mca_ptl_gm_send_frag_t* frag; - - frag = (mca_ptl_gm_send_frag_t*)hdr->hdr_ack.hdr_src_ptr.pval; - - frag->frag_send.frag_base.frag_header.hdr_common.hdr_flags = hdr->hdr_common.hdr_flags; - frag->frag_send.frag_base.frag_header.hdr_ack.hdr_dst_match = hdr->hdr_ack.hdr_dst_match; - frag->frag_send.frag_request->req_peer_match = hdr->hdr_ack.hdr_dst_match; - if( PTL_FLAG_GM_REQUIRE_LOCK & hdr->hdr_common.hdr_flags ) { -#if OMPI_MCA_PTL_GM_HAVE_RDMA_GET - if( 0 == hdr->hdr_ack.hdr_dst_size ) { - DO_DEBUG( opal_output( 0, "sender %d %p get FIN message (initial)", orte_process_info.my_name->vpid, frag ); ); - /* I just receive the ack for the first fragment => setup the pipeline */ - mca_ptl_gm_sender_advance_pipeline( frag ); - } else { - /* mark the memory as ready to be deregistered */ - frag->pipeline.lines[frag->pipeline.pos_deregister].flags |= PTL_GM_PIPELINE_DEREGISTER; - DO_DEBUG( opal_output( 0, "sender %d %p get FIN message (%d)", orte_process_info.my_name->vpid, frag, frag->pipeline.pos_deregister ); ); - } - /* continue the pipeline ... send the next segment */ - mca_ptl_gm_sender_advance_pipeline( frag ); -#else - assert( 0 ); -#endif /* OMPI_MCA_PTL_GM_HAVE_RDMA_GET */ - } else { - DO_DEBUG( opal_output( 0, "sender %d burst data after rendez-vous protocol", orte_process_info.my_name->vpid ); ); - /* do a burst but with the remote fragment as we just get it from the message */ - mca_ptl_gm_send_burst_data( (mca_ptl_gm_peer_t*)frag->frag_send.frag_base.frag_peer, frag, - frag->frag_send.frag_base.frag_size - frag->frag_bytes_validated, - NULL, hdr->hdr_common.hdr_flags ); - } - if( frag->frag_send.frag_base.frag_size == frag->frag_bytes_validated ) { - DO_DEBUG( opal_output( 0, "sender %d complete send operation", orte_process_info.my_name->vpid ); ); - ptl->super.ptl_send_progress( (mca_ptl_base_module_t*)ptl, - frag->frag_send.frag_request, - frag->frag_bytes_validated ); - OMPI_FREE_LIST_RETURN( &(ptl->gm_send_frags), (opal_list_item_t*)frag ); - } - - return NULL; -} - -void mca_ptl_gm_outstanding_recv( struct mca_ptl_gm_module_t *ptl ) -{ - mca_ptl_gm_recv_frag_t * frag = NULL; - int size; - bool matched; - - size = opal_list_get_size (&ptl->gm_recv_outstanding_queue); - - if (size > 0) { - frag = (mca_ptl_gm_recv_frag_t *) - opal_list_remove_first( (opal_list_t *)&(ptl->gm_recv_outstanding_queue) ); - - - matched = ptl->super.ptl_match( &(ptl->super), &(frag->frag_recv), - &(frag->frag_recv.frag_base.frag_header.hdr_match) ); - - if(!matched) { - opal_list_append((opal_list_t *)&(ptl->gm_recv_outstanding_queue), - (opal_list_item_t *) frag); - } else { - /* if allocated buffer, free the buffer */ - /* return the recv descriptor to the free list */ - OMPI_FREE_LIST_RETURN(&(ptl->gm_recv_frags_free), (opal_list_item_t *)frag); - } - } -} - -/* - * - */ -typedef mca_ptl_gm_recv_frag_t* (mca_ptl_gm_frag_management_fct_t)( struct mca_ptl_gm_module_t *ptl, - mca_ptl_base_header_t *hdr, - uint32_t msg_len ); - -mca_ptl_gm_frag_management_fct_t* mca_ptl_gm_frag_management_fct[MCA_PTL_HDR_TYPE_MAX] = { - NULL, /* empty no header type equal to zero */ - NULL, /* mca_ptl_gm_recv_frag_match, */ - mca_ptl_gm_recv_frag_match, - (mca_ptl_gm_frag_management_fct_t*)mca_ptl_gm_recv_frag_frag, /* force the conversion to remove a warning */ - mca_ptl_gm_recv_frag_ctrl, - NULL, - NULL, - mca_ptl_gm_recv_frag_fin, - NULL }; - -int mca_ptl_gm_analyze_recv_event( struct mca_ptl_gm_module_t* ptl, gm_recv_event_t* event ) -{ - mca_ptl_base_header_t *header = NULL, *release_buf; - mca_ptl_gm_frag_management_fct_t* function; - uint32_t priority = GM_HIGH_PRIORITY, msg_len; - - release_buf = (mca_ptl_base_header_t*)gm_ntohp(event->recv.buffer); - - switch (gm_ntohc(event->recv.type)) { - case GM_FAST_RECV_EVENT: - case GM_FAST_PEER_RECV_EVENT: - priority = GM_LOW_PRIORITY; - case GM_FAST_HIGH_RECV_EVENT: - case GM_FAST_HIGH_PEER_RECV_EVENT: - header = (mca_ptl_base_header_t *)gm_ntohp(event->recv.message); - break; - case GM_RECV_EVENT: - case GM_PEER_RECV_EVENT: - priority = GM_LOW_PRIORITY; - case GM_HIGH_RECV_EVENT: - case GM_HIGH_PEER_RECV_EVENT: - header = release_buf; - break; - case GM_NO_RECV_EVENT: - - default: - gm_unknown(ptl->gm_port, event); - return 1; - } - - assert( header->hdr_common.hdr_type < MCA_PTL_HDR_TYPE_MAX ); - function = mca_ptl_gm_frag_management_fct[header->hdr_common.hdr_type]; - assert( NULL != function ); - - msg_len = gm_ntohl( event->recv.length ); - (void)function( ptl, header, msg_len ); - - gm_provide_receive_buffer( ptl->gm_port, release_buf, GM_SIZE, priority ); - - return 0; -} - - -void mca_ptl_gm_dump_header( char* str, mca_ptl_base_header_t* hdr ) -{ - switch( hdr->hdr_common.hdr_type ) { - case MCA_PTL_HDR_TYPE_MATCH: - goto print_match_hdr; - case MCA_PTL_HDR_TYPE_RNDV: - goto print_rndv_hdr; - case MCA_PTL_HDR_TYPE_FRAG: - goto print_frag_hdr; - case MCA_PTL_HDR_TYPE_ACK: - goto print_ack_hdr; - case MCA_PTL_HDR_TYPE_NACK: - goto print_ack_hdr; - case MCA_PTL_HDR_TYPE_GET: - goto print_match_hdr; - case MCA_PTL_HDR_TYPE_FIN: - goto print_ack_hdr; - case MCA_PTL_HDR_TYPE_FIN_ACK: - goto print_match_hdr; - default: - opal_output( 0, "unknown header of type %d\n", hdr->hdr_common.hdr_type ); - } - return; - - print_ack_hdr: - opal_output( 0, "%s hdr_common hdr_type %d hdr_flags %x\nack header hdr_src_ptr (lval %lld, pval %p)\n hdr_dst_match (lval %lld pval %p)\n hdr_dst_addr (lval %lld pval %p)\n hdr_dst_size %lld\n", - str, hdr->hdr_common.hdr_type, hdr->hdr_common.hdr_flags, - hdr->hdr_ack.hdr_src_ptr.lval, hdr->hdr_ack.hdr_src_ptr.pval, - hdr->hdr_ack.hdr_dst_match.lval, hdr->hdr_ack.hdr_dst_match.pval, - hdr->hdr_ack.hdr_dst_addr.lval, hdr->hdr_ack.hdr_dst_addr.pval, hdr->hdr_ack.hdr_dst_size ); - return; - print_frag_hdr: - opal_output( 0, "%s hdr_common hdr_type %d hdr_flags %x\nfrag header hdr_frag_length %lld hdr_frag_offset %lld\n hdr_src_ptr (lval %lld, pval %p)\n hdr_dst_ptr (lval %lld, pval %p)\n", - str, hdr->hdr_common.hdr_type, hdr->hdr_common.hdr_flags, - hdr->hdr_frag.hdr_frag_length, hdr->hdr_frag.hdr_frag_offset, hdr->hdr_frag.hdr_src_ptr.lval, - hdr->hdr_frag.hdr_src_ptr.pval, hdr->hdr_frag.hdr_dst_ptr.lval, hdr->hdr_frag.hdr_dst_ptr.pval ); - return; - print_match_hdr: - opal_output( 0, "%s hdr_common hdr_type %d hdr_flags %x\nmatch header hdr_contextid %d hdr_src %d hdr_dst %d hdr_tag %d\n hdr_msg_length %lld hdr_msg_seq %d\n", - str, hdr->hdr_common.hdr_type, hdr->hdr_common.hdr_flags, - hdr->hdr_match.hdr_contextid, hdr->hdr_match.hdr_src, hdr->hdr_match.hdr_dst, - hdr->hdr_match.hdr_tag, hdr->hdr_match.hdr_msg_length, hdr->hdr_match.hdr_msg_seq ); - return; - print_rndv_hdr: - opal_output( 0, "%s hdr_common hdr_type %d hdr_flags %x\nrndv header hdr_contextid %d hdr_src %d hdr_dst %d hdr_tag %d\n hdr_msg_length %lld hdr_msg_seq %d\n hdr_frag_length %lld hdr_src_ptr (lval %lld, pval %p)\n", - str, hdr->hdr_common.hdr_type, hdr->hdr_common.hdr_flags, - hdr->hdr_rndv.hdr_match.hdr_contextid, hdr->hdr_rndv.hdr_match.hdr_src, - hdr->hdr_rndv.hdr_match.hdr_dst, hdr->hdr_rndv.hdr_match.hdr_tag, - hdr->hdr_rndv.hdr_match.hdr_msg_length, hdr->hdr_rndv.hdr_match.hdr_msg_seq, - hdr->hdr_rndv.hdr_frag_length, hdr->hdr_rndv.hdr_src_ptr.lval, hdr->hdr_rndv.hdr_src_ptr.pval); - return; -} diff --git a/ompi/mca/ptl/gm/ptl_gm_priv.h b/ompi/mca/ptl/gm/ptl_gm_priv.h deleted file mode 100644 index bc22303b66..0000000000 --- a/ompi/mca/ptl/gm/ptl_gm_priv.h +++ /dev/null @@ -1,68 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ - -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004 The Ohio State University. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#include "gm.h" - -struct mca_ptl_gm_send_frag_t; -struct mca_ptl_gm_peer_t; -struct mca_ptl_gm_module_t; - -/* Pinning down memory pages is a costly operation. We can avoid it by using a LRU list - * of pinned down memory, managed inside the GM PTL. - */ -gm_status_t mca_ptl_gm_register_memory( struct gm_port *port, void *ptr, unsigned len ); -gm_status_t mca_ptl_gm_deregister_memory( struct gm_port *port, void *ptr, unsigned len ); -#if OMPI_MCA_PTL_GM_CACHE_ENABLE -void mca_ptl_gm_regcache_init(void); -unsigned int gmpi_use_interval(struct gm_port *gmpi_gm_port, gm_up_t start, unsigned int length); -gm_status_t gmpi_unuse_interval(struct gm_port *gmpi_gm_port, gm_up_t start, unsigned int length); -void gmpi_clear_interval(struct gm_port *gmpi_gm_port, gm_up_t start, unsigned int length); -void gmpi_clear_all_intervals(void); -#endif /* OMPI_MCA_PTL_GM_CACHE_ENABLE */ - -/* Some flags that have to go in the header hdr_common.hdr_flags field */ -#define PTL_FLAG_GM_HAS_FRAGMENT 0x04 -#define PTL_FLAG_GM_LAST_FRAGMENT 0x08 -#define PTL_FLAG_GM_REQUIRE_LOCK 0x10 - -/* Internal flags for handling long messages */ -#define GM_PTL_REGISTER_MEMORY 0x01 -#define GM_PTL_SEND_MESSAGE 0x02 - -int mca_ptl_gm_analyze_recv_event( struct mca_ptl_gm_module_t* ptl, gm_recv_event_t* event ); - -void mca_ptl_gm_outstanding_recv( struct mca_ptl_gm_module_t *ptl); - -int mca_ptl_gm_peer_send( struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_peer_t* ptl_base_peer, - struct mca_ptl_base_send_request_t *sendreq, - size_t offset, - size_t size, - int flags ); - -int -mca_ptl_gm_peer_send_continue( struct mca_ptl_gm_peer_t *ptl_peer, - struct mca_ptl_gm_send_frag_t *fragment, - struct mca_ptl_base_send_request_t *sendreq, - size_t offset, - size_t *size, - int flags ); diff --git a/ompi/mca/ptl/gm/ptl_gm_proc.c b/ompi/mca/ptl/gm/ptl_gm_proc.c deleted file mode 100644 index 232c322ac0..0000000000 --- a/ompi/mca/ptl/gm/ptl_gm_proc.c +++ /dev/null @@ -1,184 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ - -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004 The Ohio State University. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include - -#include "opal/sys/atomic.h" -#include "opal/class/opal_hash_table.h" -#include "ompi/mca/pml/base/pml_base_module_exchange.h" -#include "ompi/proc/proc.h" -#include "opal/util/output.h" -#include "ptl_gm.h" -#include "ptl_gm_peer.h" -#include "ptl_gm_proc.h" -#include "ptl_gm_priv.h" - -static void mca_ptl_gm_proc_construct (mca_ptl_gm_proc_t * proc); -static void mca_ptl_gm_proc_destruct (mca_ptl_gm_proc_t * proc); -static mca_ptl_gm_proc_t *mca_ptl_gm_proc_lookup_ompi (ompi_proc_t * - ompi_proc); - -opal_class_t mca_ptl_gm_proc_t_class = { - "mca_ptl_gm_proc_t", - OBJ_CLASS (opal_list_item_t), - (opal_construct_t) mca_ptl_gm_proc_construct, - (opal_destruct_t) mca_ptl_gm_proc_destruct -}; - - -/** - * Initialize gm proc instance - */ - -void -mca_ptl_gm_proc_construct (mca_ptl_gm_proc_t * proc) -{ - proc->proc_ompi = NULL; - proc->proc_addrs = NULL; - proc->proc_addr_count = 0; - proc->peer_arr = NULL; - proc->proc_peer_count = 0; - - OBJ_CONSTRUCT (&proc->proc_lock, opal_mutex_t); - - /* add to list of all proc instance */ - OPAL_THREAD_LOCK (&mca_ptl_gm_component.gm_lock); - opal_list_append (&mca_ptl_gm_component.gm_procs, &proc->super); - OPAL_THREAD_UNLOCK (&mca_ptl_gm_component.gm_lock); - - return; -} - - -/* - * Cleanup gm proc instance - */ - -void -mca_ptl_gm_proc_destruct (mca_ptl_gm_proc_t * proc) -{ - /* remove from list of all proc instances */ - OPAL_THREAD_LOCK (&mca_ptl_gm_component.gm_lock); - opal_list_remove_item (&mca_ptl_gm_component.gm_procs, &proc->super); - OPAL_THREAD_UNLOCK (&mca_ptl_gm_component.gm_lock); - - /* release resources */ - if (NULL != proc->peer_arr) - free (proc->peer_arr); - - return; -} - -/* - * Create a GM process structure. There is a one-to-one correspondence - * between a ompi_proc_t and a mca_ptl_gm_proc_t instance. - * We cache additional data (specifically the list - * of mca_ptl_gm_peer_t instances, and publiched - * addresses) associated w/ a given destination on this datastructure. - */ - -mca_ptl_gm_proc_t * -mca_ptl_gm_proc_create (mca_ptl_gm_module_t * ptl, ompi_proc_t * ompi_proc) -{ - int rc; - size_t size; - mca_ptl_gm_proc_t *ptl_proc; - - ptl_proc = mca_ptl_gm_proc_lookup_ompi (ompi_proc); - if (ptl_proc != NULL) - { - return ptl_proc; - } - - - /* only gm ptl opened */ - ptl_proc = OBJ_NEW (mca_ptl_gm_proc_t); - ptl_proc->proc_ompi = ompi_proc; - - - /* Extract exposed addresses from remote proc */ - rc = mca_pml_base_modex_recv (&mca_ptl_gm_component.super.ptlm_version, - ompi_proc, (void **) &ptl_proc->proc_addrs, - &size); - - if (rc != OMPI_SUCCESS) { - opal_output (0, - "[%s:%d] mca_pml_base_modex_recv failed to recv data \n", - __FILE__, __LINE__); - OBJ_RELEASE (ptl_proc); - return NULL; - } - - if (0 != (size % sizeof (mca_ptl_gm_addr_t))) { - opal_output (0, "[%s:%d] invalid received data size %d\n", - __FILE__, __LINE__, size); - return NULL; - } - ptl_proc->proc_addr_count = size / sizeof (mca_ptl_gm_addr_t); - - /* allocate space for peer array - one for each exported address */ - ptl_proc->peer_arr = (mca_ptl_gm_peer_t **) - malloc (ptl_proc->proc_addr_count * sizeof (mca_ptl_gm_peer_t *)); - - if (NULL == ptl_proc->peer_arr) { - OBJ_RELEASE (ptl_proc); - opal_output (0, "[%s:%d] unable to allocate peer procs \n" - __FILE__, __LINE__); - return NULL; - } - - if(NULL == mca_ptl_gm_component.gm_local && - ompi_proc == ompi_proc_local() ) { - mca_ptl_gm_component.gm_local = ptl_proc; - } - - return ptl_proc; -} - -/* - * Look for an existing GM process instances based on the associated - * ompi_proc_t instance. - */ -static mca_ptl_gm_proc_t * -mca_ptl_gm_proc_lookup_ompi (ompi_proc_t * ompi_proc) -{ - mca_ptl_gm_proc_t *gm_proc; - - OPAL_THREAD_LOCK (&mca_ptl_gm_component.gm_lock); - - gm_proc = (mca_ptl_gm_proc_t *) - opal_list_get_first (&mca_ptl_gm_component.gm_procs); - - for (; gm_proc != (mca_ptl_gm_proc_t *) - opal_list_get_end (&mca_ptl_gm_component.gm_procs); - gm_proc = (mca_ptl_gm_proc_t *) opal_list_get_next (gm_proc)) { - if (gm_proc->proc_ompi == ompi_proc) { - OPAL_THREAD_UNLOCK (&mca_ptl_gm_component.gm_lock); - return gm_proc; - } - } - OPAL_THREAD_UNLOCK (&mca_ptl_gm_component.gm_lock); - - return NULL; -} diff --git a/ompi/mca/ptl/gm/ptl_gm_proc.h b/ompi/mca/ptl/gm/ptl_gm_proc.h deleted file mode 100644 index d1a22087b6..0000000000 --- a/ompi/mca/ptl/gm/ptl_gm_proc.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ - -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004 The Ohio State University. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_GM_PROC_H -#define MCA_PTL_GM_PROC_H - -#include "opal/class/opal_object.h" -#include "orte/mca/ns/ns_types.h" -#include "ptl_gm.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -extern opal_class_t mca_ptl_gm_proc_t_class; - -struct mca_ptl_gm_proc_t { - opal_list_item_t super; /**< allow proc to be placed on a list */ - struct ompi_proc_t *proc_ompi; /**< pointer to corresponding orte_process_name_t */ - struct mca_ptl_gm_addr_t *proc_addrs; /**< array of addresses published by peer */ - opal_mutex_t proc_lock; /**< lock to protect against concurrent access to proc state */ - size_t proc_peer_count; - size_t proc_addr_count; - struct mca_ptl_gm_peer_t **peer_arr; - orte_process_name_t proc_guid; -}; -typedef struct mca_ptl_gm_proc_t mca_ptl_gm_proc_t; - - -mca_ptl_gm_proc_t *mca_ptl_gm_proc_create (mca_ptl_gm_module_t * ptl, - struct ompi_proc_t* orte_proc); -mca_ptl_gm_proc_t *mca_ptl_gm_proc_lookup (void *guid, size_t size); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif diff --git a/ompi/mca/ptl/gm/ptl_gm_regcache.c b/ompi/mca/ptl/gm/ptl_gm_regcache.c deleted file mode 100644 index ce814000cf..0000000000 --- a/ompi/mca/ptl/gm/ptl_gm_regcache.c +++ /dev/null @@ -1,534 +0,0 @@ -/************************************************************************* - * Myricom MPICH-GM ch_gm backend * - * Copyright (c) 2001 by Myricom, Inc. * - * All rights reserved. * - *************************************************************************/ -#include "ompi_config.h" -#include "ptl_gm.h" -#include "ptl_gm_priv.h" - -#if OMPI_MCA_PTL_GM_CACHE_ENABLE - -#include -#include -#include - -#define GM_DISABLE_REGISTRATION 0 -#define GMPI_ENABLE_REG_CACHE 1 - -#define GMPI_DEBUG_REG_CACHE_PRINT0(A) -#define GMPI_DEBUG_REG_CACHE_PRINT1(A,B,C) -#define GMPI_DEBUG_REG_CACHE_PRINT2(A,B,C,D,E) -#define GMPI_DEBUG_REGISTRATION_USE_SEGMENT(ADDR, LENGTH) -#define GMPI_DEBUG_DMA_MEMORY_USE(SIZE) -#define GMPI_DEBUG_DMA_MEMORY_ACQUIRE(ADDR, LENGTH) -#define GMPI_DEBUG_DMA_MEMORY_RELEASE(ADDR, LENGTH) -#define GMPI_DEBUG_DMA_MEMORY_UNUSE(SIZE) -#define GMPI_DEBUG_REGISTRATION_CLEAR_SEGMENT(ADDR, LENGTH) -#define GMPI_DEBUG_REGISTRATION_UNUSE_SEGMENT(ADDR, LENGTH) -#define GMPI_DEBUG_REGISTRATION_CLEAR_ALL_SEGMENTS() - -#define gmpi_debug_assert assert -#define gmpi_malloc_assert(A, B, C) assert( NULL != (A) ) -#define gmpi_abort(ID) abort() -#if GM_DISABLE_REGISTRATION - - -void -gmpi_clear_interval(gm_up_t start, unsigned int length) -{ - GMPI_DEBUG_REG_CACHE_PRINT2("Clear_interval", "start", - start, "length", length); -} - -void -gmpi_clear_all_intervals(void) -{ - GMPI_DEBUG_REG_CACHE_PRINT0("Clear_all_intervals"); -} - -#else - -#if GMPI_ENABLE_REG_CACHE - -typedef struct _entry -{ - gm_up_t addr; - struct _entry * prev; - struct _entry * next; - unsigned int refcount; -} regcache_entry; - - -static struct gm_hash * regcache_hash = NULL; -static struct gm_lookaside * regcache_lookaside = NULL; -static regcache_entry * regcache_head = NULL; -static regcache_entry * regcache_tail = NULL; - - -void -mca_ptl_gm_regcache_init(void) -{ - gmpi_debug_assert(GM_PAGE_LEN != 0); - if (regcache_hash == NULL) - { - regcache_hash = gm_create_hash(gm_hash_compare_ptrs, - gm_hash_hash_ptr, 0, 0, - 4096, 0); - regcache_lookaside = gm_create_lookaside(sizeof(regcache_entry), - 4096); - } - - gmpi_malloc_assert(regcache_hash, - "mca_ptl_gm_regcache_init", - "gm_create_hash: regcache page hash"); - gmpi_malloc_assert(regcache_lookaside, - "mca_ptl_gm_regcache_init", - "gm_create_lookaside: regcache entries list"); -} - - -static void -gmpi_regcache_deregister(struct gm_port *gmpi_gm_port, void * addr, unsigned int pages) -{ - if (pages > 0) - { - gm_deregister_memory(gmpi_gm_port, addr, GM_PAGE_LEN*pages); - GMPI_DEBUG_DMA_MEMORY_RELEASE((gm_up_t)addr,GM_PAGE_LEN*pages); - } -} - - -static void -gmpi_regcache_garbage_collector(struct gm_port *gmpi_gm_port, unsigned int required) -{ - regcache_entry * entry_ptr, * next_entry; - unsigned int count = 0; - gm_up_t batch_addr = 0; - unsigned int batch_pages = 0; - - GMPI_DEBUG_REG_CACHE_PRINT1("Garbage_collector start", "required", required); - entry_ptr = regcache_head; - while ((count < required) && (entry_ptr != NULL)) - { - if (entry_ptr->refcount == 0) - { - gm_hash_remove(regcache_hash, (void *)entry_ptr->addr); - if (batch_addr == 0) - { - batch_addr = entry_ptr->addr; - batch_pages++; - } - else - { - if (entry_ptr->addr == batch_addr+batch_pages*GM_PAGE_LEN) - { - batch_pages++; - } - else - { - gmpi_regcache_deregister(gmpi_gm_port, (void *)batch_addr, batch_pages); - batch_addr = entry_ptr->addr; - batch_pages = 1; - } - } - - count++; - next_entry = entry_ptr->next; - - if (regcache_head == entry_ptr) - regcache_head = next_entry; - else - entry_ptr->prev->next = next_entry; - - if (regcache_tail == entry_ptr) - regcache_tail = entry_ptr->prev; - else - entry_ptr->next->prev = entry_ptr->prev; - - gm_lookaside_free(entry_ptr); - entry_ptr = next_entry; - } - else - { - entry_ptr = entry_ptr->next; - } - } - - if (batch_addr) - { - gmpi_regcache_deregister(gmpi_gm_port, (void *)batch_addr, batch_pages); - } - GMPI_DEBUG_REG_CACHE_PRINT2("Garbage_collector stop", "required", - required, "count", count); -} - - -static unsigned int -gmpi_regcache_register(struct gm_port *gmpi_gm_port, void * addr, unsigned int pages) -{ - unsigned int i; - regcache_entry * entry_ptr; - gm_status_t status; - - GMPI_DEBUG_REG_CACHE_PRINT2("Regcache_register", "addr", - (gm_up_t)addr, "len", pages*GM_PAGE_LEN); - - if (gm_register_memory(gmpi_gm_port, addr, GM_PAGE_LEN*pages) != GM_SUCCESS) - { - GMPI_DEBUG_REG_CACHE_PRINT0("Regcache_register - using GC"); - gmpi_regcache_garbage_collector(gmpi_gm_port, 4096); - if (gm_register_memory(gmpi_gm_port, addr, GM_PAGE_LEN*pages) - != GM_SUCCESS) - { - GMPI_DEBUG_REG_CACHE_PRINT2("Register_memory failed", "start", - addr, "length", GM_PAGE_LEN*pages); - return 0; - } - } - - GMPI_DEBUG_DMA_MEMORY_ACQUIRE((gm_up_t)addr,GM_PAGE_LEN*pages); - - for (i=0; inext = entry_ptr; - } - - entry_ptr->prev = regcache_tail; - entry_ptr->next = NULL; - regcache_tail = entry_ptr; - entry_ptr->refcount = 1; - GMPI_DEBUG_DMA_MEMORY_USE(GM_PAGE_LEN); - entry_ptr->addr = (gm_up_t)addr + i*GM_PAGE_LEN; - - status = gm_hash_insert(regcache_hash, - (void *)(entry_ptr->addr), - (void *)(entry_ptr)); - if (status != GM_SUCCESS) - { - fprintf(stderr, "[%d]: gm_hash_insert failure in " - "gmpi_regcache_register: out of memory\n", 0 ); - gmpi_abort (0); - } - } - - return 1; -} - - -unsigned int -gmpi_use_interval(struct gm_port *gmpi_gm_port, gm_up_t start, unsigned int length) -{ - gm_up_t addr, end, batch_addr; - unsigned int batch_pages; - regcache_entry * entry_ptr; - - GMPI_DEBUG_REG_CACHE_PRINT2("Use_interval", "start", - start, "len", length); - if (length == 0) - { - return 0; - } - GMPI_DEBUG_REGISTRATION_USE_SEGMENT(start, length); - addr = start & ~(GM_PAGE_LEN-1); - end = start + length; - batch_addr = 0; - batch_pages = 0; - - while (addr < end) - { - entry_ptr = (regcache_entry *)gm_hash_find(regcache_hash, (void *)addr); - if (entry_ptr == NULL) - { - if (batch_addr == 0) - { - batch_addr = addr; - } - batch_pages++; - } - else - { - if (entry_ptr->refcount == 0) - { - GMPI_DEBUG_DMA_MEMORY_USE(GM_PAGE_LEN); - } - - entry_ptr->refcount++; - if (batch_addr != 0) - { - GMPI_DEBUG_REG_CACHE_PRINT2("Use_interval batch", "batch_addr", - batch_addr, "batch_pages", - batch_pages); - if (gmpi_regcache_register(gmpi_gm_port, (void *)batch_addr, batch_pages) == 0) - { - entry_ptr->refcount--; - - if (entry_ptr->refcount == 0) - { - GMPI_DEBUG_DMA_MEMORY_UNUSE(GM_PAGE_LEN); - } - - if (batch_addr > start) - { - return (batch_addr-start); - } - else - { - return 0; - } - } - - batch_addr = 0; - batch_pages = 0; - - /* move the entry to the end of the list (LRU policy) */ - if (entry_ptr != regcache_tail) - { - if (entry_ptr == regcache_head) - { - gmpi_debug_assert(entry_ptr->next != NULL); - entry_ptr->next->prev = NULL; - regcache_head = entry_ptr->next; - } - else - { - gmpi_debug_assert(entry_ptr->prev != NULL); - gmpi_debug_assert(entry_ptr->next != NULL); - entry_ptr->prev->next = entry_ptr->next; - entry_ptr->next->prev = entry_ptr->prev; - } - - entry_ptr->next = NULL; - entry_ptr->prev = regcache_tail; - regcache_tail->next = entry_ptr; - regcache_tail = entry_ptr; - } - } - } - addr += GM_PAGE_LEN; - } - - if (batch_addr != 0) - { - if (gmpi_regcache_register(gmpi_gm_port, (void *)batch_addr, batch_pages) == 0) - { - if (batch_addr > start) - { - return (batch_addr-start); - } - else - { - return 0; - } - } - } - - return length; -} - - -gm_status_t -gmpi_unuse_interval(struct gm_port *gmpi_gm_port, gm_up_t start, unsigned int length) -{ - gm_up_t addr, end; - regcache_entry * entry_ptr; - - GMPI_DEBUG_REG_CACHE_PRINT2("Unuse_interval", "start", - start, "length", length); - if (length == 0) - { - return GM_SUCCESS; - } - GMPI_DEBUG_REGISTRATION_UNUSE_SEGMENT(start, length); - - addr = start & ~(GM_PAGE_LEN-1); - end = start + length; - - while (addr < end) - { - entry_ptr = (regcache_entry *)gm_hash_find(regcache_hash, (void *)addr); - - gmpi_debug_assert(entry_ptr != NULL); - gmpi_debug_assert(entry_ptr->refcount > 0); - - entry_ptr->refcount--; - if (entry_ptr->refcount == 0) - { - GMPI_DEBUG_DMA_MEMORY_UNUSE(GM_PAGE_LEN); - } - addr += GM_PAGE_LEN; - } - return GM_SUCCESS; -} - - -void -gmpi_clear_interval(struct gm_port *gmpi_gm_port, gm_up_t start, unsigned int length) -{ - gm_up_t addr, end, batch_addr; - unsigned int batch_pages; - regcache_entry * entry_ptr; - - GMPI_DEBUG_REG_CACHE_PRINT2("Clear_interval", "start", - start, "length", length); - GMPI_DEBUG_REGISTRATION_CLEAR_SEGMENT(start, length); - - if (regcache_hash != NULL) - { - addr = start & ~(GM_PAGE_LEN-1); - end = start + length; - batch_addr = 0; - batch_pages = 0; - - while (addr < end) - { - entry_ptr = (regcache_entry *)gm_hash_find(regcache_hash, - (void *)addr); - if (entry_ptr != NULL) - { - gmpi_debug_assert(entry_ptr->refcount == 0); - if (entry_ptr->refcount > 0) - { - GMPI_DEBUG_DMA_MEMORY_UNUSE(GM_PAGE_LEN); - } - gm_hash_remove(regcache_hash, (void *)addr); - - if (batch_addr == 0) - batch_addr = addr; - batch_pages++; - - if (regcache_head == entry_ptr) - regcache_head = entry_ptr->next; - else - entry_ptr->prev->next = entry_ptr->next; - - if (regcache_tail == entry_ptr) - regcache_tail = entry_ptr->prev; - else - entry_ptr->next->prev = entry_ptr->prev; - - gm_lookaside_free(entry_ptr); - } - else - { - if (batch_addr != 0) - { - gmpi_regcache_deregister(gmpi_gm_port, (void *)batch_addr, batch_pages); - batch_addr = 0; - batch_pages = 0; - } - } - addr += GM_PAGE_LEN; - } - - if (batch_addr != 0) - gmpi_regcache_deregister(gmpi_gm_port, (void *)batch_addr, batch_pages); - } -} - - -void -gmpi_clear_all_intervals(void) -{ - struct gm_hash *old_regcache_hash; - - GMPI_DEBUG_REG_CACHE_PRINT0("Clear_all_intervals"); - GMPI_DEBUG_REGISTRATION_CLEAR_ALL_SEGMENTS(); - - if (regcache_hash != NULL) - { - old_regcache_hash = regcache_hash; - regcache_hash = NULL; - gm_destroy_hash (old_regcache_hash); - gm_destroy_lookaside (regcache_lookaside); - } -} - - -#else /* NO_REG_CACHE */ - - -unsigned int -gmpi_use_interval(struct gm_port *gmpi_gm_port, gm_up_t start, unsigned int length) -{ - gm_status_c cc; - - GMPI_DEBUG_REG_CACHE_PRINT2("Use_interval", "start", - start, "len", length); - if (length == 0) - { - return 0; - } - GMPI_DEBUG_REGISTRATION_USE_SEGMENT(start, length); - - if ((cc = gm_register_memory(gmpi_gm_port, (void*)start, length)) != GM_SUCCESS) - { - GMPI_DEBUG_REG_CACHE_PRINT2("Use_interval no_regcache: register failed", - "start", start, "len", length); - return 0; - } - - GMPI_DEBUG_DMA_MEMORY_USE(length); - GMPI_DEBUG_DMA_MEMORY_ACQUIRE(start,length); - return length; -} - -gm_status_t -gmpi_unuse_interval(struct gm_port *gmpi_gm_port, gm_up_t start, unsigned int length) -{ - gm_status_t cc; - - GMPI_DEBUG_REG_CACHE_PRINT2("Unuse_interval", "start", - start, "length", length); - if (length == 0) - { - return; - } - GMPI_DEBUG_REGISTRATION_UNUSE_SEGMENT(start, length); - - cc = gm_deregister_memory(gmpi_gm_port, (void*)start, length); - gmpi_debug_assert(cc == GM_SUCCESS); - - GMPI_DEBUG_DMA_MEMORY_UNUSE(length); - GMPI_DEBUG_DMA_MEMORY_RELEASE(start,length); - return cc; -} - - -void -mca_ptl_gm_regcache_init(void) -{ - ; -} - -void -gmpi_clear_interval(gm_up_t start, unsigned int length) -{ - GMPI_DEBUG_REG_CACHE_PRINT2("Clear_interval", "start", - start, "length", length); - GMPI_DEBUG_REGISTRATION_CLEAR_SEGMENT(start, length); -} - -void -gmpi_clear_all_intervals(void) -{ - GMPI_DEBUG_REG_CACHE_PRINT0("Clear_all_intervals"); - GMPI_DEBUG_REGISTRATION_CLEAR_ALL_SEGMENTS(); -} - -#endif - -#endif - -#endif /* OMPI_MCA_PTL_GM_CACHE_ENABLE */ diff --git a/ompi/mca/ptl/gm/ptl_gm_sendfrag.c b/ompi/mca/ptl/gm/ptl_gm_sendfrag.c deleted file mode 100644 index a8811e50f1..0000000000 --- a/ompi/mca/ptl/gm/ptl_gm_sendfrag.c +++ /dev/null @@ -1,113 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ - -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004 The Ohio State University. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#include "ompi/types.h" -#include "ompi/datatype/datatype.h" -#include "ptl_gm.h" -#include "ptl_gm_sendfrag.h" -#include "ptl_gm_priv.h" - -/* - * send fragment constructor/destructors. - */ - -OBJ_CLASS_INSTANCE(mca_ptl_gm_send_frag_t, - mca_ptl_base_send_frag_t, - NULL, NULL); - -/* It's not yet clear for me what's the best solution here. Block - * until we get a free request or allocate a new one. The fist case - * allow us to never take care of the gm allocated DMA buffer as all - * send fragments already have one attached, but it can stop the - * application progression. The second case require special cases: we - * should set the data in the header inside the fragment and later - * when we get some free fragments with DMA memory attached we should - * put the header back there, and send it. - * - * I will implement the first case and add the second one in my TODO - * list. - */ -mca_ptl_gm_send_frag_t* -mca_ptl_gm_alloc_send_frag( struct mca_ptl_gm_module_t* ptl, - struct mca_ptl_base_send_request_t* sendreq ) -{ - opal_list_item_t* item; - mca_ptl_gm_send_frag_t* frag; - int32_t rc; - - /* first get a gm_send_frag */ - OMPI_FREE_LIST_GET( &(ptl->gm_send_frags), item, rc ); - frag = (mca_ptl_gm_send_frag_t*)item; - /* And then get some DMA memory to put the data */ - OMPI_FREE_LIST_WAIT( &(ptl->gm_send_dma_frags), item, rc ); - opal_atomic_sub( &(ptl->num_send_tokens), 1 ); - assert( ptl->num_send_tokens >= 0 ); - frag->send_buf = (void*)item; - - frag->frag_send.frag_request = sendreq; - frag->frag_send.frag_base.frag_owner = (struct mca_ptl_base_module_t*)ptl; - frag->frag_send.frag_base.frag_addr = sendreq->req_send.req_addr; - frag->frag_bytes_processed = 0; - frag->frag_bytes_validated = 0; - frag->status = -1; - frag->type = PUT; - ompi_ptl_gm_init_pipeline( &(frag->pipeline) ); - - return frag; -} - -int mca_ptl_gm_put_frag_init( struct mca_ptl_gm_send_frag_t** putfrag, - struct mca_ptl_gm_peer_t* ptl_peer, - struct mca_ptl_gm_module_t* gm_ptl, - struct mca_ptl_base_send_request_t* sendreq, - size_t offset, size_t* size, int flags ) -{ - ompi_convertor_t* convertor; - mca_ptl_gm_send_frag_t* frag; - - frag = mca_ptl_gm_alloc_send_frag( gm_ptl, sendreq ); /*alloc_put_frag */ - - frag->frag_send.frag_base.frag_peer = (struct mca_ptl_base_peer_t*)ptl_peer; - frag->frag_send.frag_base.frag_size = *size; - frag->frag_offset = offset; - - if( (*size) > 0 ) { - convertor = &(frag->frag_send.frag_base.frag_convertor); - /* GM use the default parameters for the convertor without any special memory - * allocation function. We have to call the prepare_for_send in order to - * initialize the missing parameters of the convertor. - */ - ompi_convertor_clone_with_position( &(sendreq->req_send.req_convertor), convertor, 1, - &offset ); - } - *putfrag = frag; - - return OMPI_SUCCESS; -} - -/* - * recv fragment constructor/destructors. - */ - -OBJ_CLASS_INSTANCE(mca_ptl_gm_recv_frag_t, - mca_ptl_base_recv_frag_t, - NULL, NULL); diff --git a/ompi/mca/ptl/gm/ptl_gm_sendfrag.h b/ompi/mca/ptl/gm/ptl_gm_sendfrag.h deleted file mode 100644 index 3d95be5194..0000000000 --- a/ompi/mca/ptl/gm/ptl_gm_sendfrag.h +++ /dev/null @@ -1,219 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ - -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004 The Ohio State University. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_GM_SEND_FRAG_H -#define MCA_PTL_GM_SEND_FRAG_H - -#include "ompi/mca/ptl/base/ptl_base_sendreq.h" -#include "ompi/mca/ptl/base/ptl_base_sendfrag.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" -#include "ompi/communicator/communicator.h" - -#define MATCH 0 -#define FRAG 1 -#define ACK 2 -#define PUT 3 - -/* depth of the GM internal pipeline */ -#define GM_PIPELINE_DEPTH 3 - -#define PTL_GM_PIPELINE_EMPTY 0x0000 -#define PTL_GM_PIPELINE_DEREGISTER 0x0001 -#define PTL_GM_PIPELINE_REGISTER 0x0002 -#define PTL_GM_PIPELINE_REMOTE 0x0004 -#define PTL_GM_PIPELINE_TRANSFERT (PTL_GM_PIPELINE_REGISTER | PTL_GM_PIPELINE_REMOTE) -#define PTL_GM_PIPELINE_HAS_INTERNAL_BUFFERS 0x0008 - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - OBJ_CLASS_DECLARATION (mca_ptl_gm_send_frag_t); - OBJ_CLASS_DECLARATION (mca_ptl_gm_recv_frag_t); - - /* specific header for GM rendezvous protocol. It will be filled up by the sender - * and should be able to hold a pointer to the last registered memory location. - */ - struct mca_ptl_gm_frag_header_t { - mca_ptl_base_frag_header_t hdr_frag; - ompi_ptr_t registered_memory; - }; - typedef struct mca_ptl_gm_frag_header_t mca_ptl_gm_frag_header_t; - - struct mca_ptl_gm_pipeline_line_t { - uint16_t flags; - uint16_t hdr_flags; - uint64_t length; - uint64_t offset; - ompi_ptr_t local_memory; - ompi_ptr_t remote_memory; - }; - typedef struct mca_ptl_gm_pipeline_line_t mca_ptl_gm_pipeline_line_t; - - struct mca_ptl_gm_pipeline_info_t { - mca_ptl_gm_pipeline_line_t lines[GM_PIPELINE_DEPTH]; - uint32_t pos_register; - uint32_t pos_remote; - uint32_t pos_deregister; - uint32_t pos_transfert; - }; - typedef struct mca_ptl_gm_pipeline_info_t mca_ptl_gm_pipeline_info_t; - - struct mca_ptl_gm_peer_t; - - /** - * GM send fragment derived type. - */ - struct mca_ptl_gm_send_frag_t { - mca_ptl_base_send_frag_t frag_send; /**< base send fragment descriptor */ - void* send_buf; - ompi_ptr_t* registered_buf; - - uint64_t frag_bytes_processed; /**< data sended so far */ - uint64_t frag_bytes_validated; /**< amount of data for which we receive an ack */ - uint64_t frag_offset; /**< initial offset of the fragment as specified by the upper level */ - mca_ptl_gm_pipeline_info_t pipeline; /**< storing the information about the status - * of the pipeline for long messages. */ - int status; - uint32_t type; - }; - typedef struct mca_ptl_gm_send_frag_t mca_ptl_gm_send_frag_t; - - struct mca_ptl_gm_recv_frag_t { - mca_ptl_base_recv_frag_t frag_recv; - uint64_t frag_bytes_processed; - uint64_t frag_bytes_validated; /**< amount of data for which we receive an ack */ - uint64_t frag_offset; - mca_ptl_gm_pipeline_info_t pipeline; /**< storing the information about the status of - * the pipeline for long messages. */ - uint32_t type; - bool matched; - bool have_allocated_buffer; - uint32_t attached_data_length; - }; - typedef struct mca_ptl_gm_recv_frag_t mca_ptl_gm_recv_frag_t; - - mca_ptl_gm_send_frag_t * - mca_ptl_gm_alloc_send_frag( struct mca_ptl_gm_module_t* ptl, - struct mca_ptl_base_send_request_t* sendreq ); - - int - mca_ptl_gm_put_frag_init( struct mca_ptl_gm_send_frag_t** sendfrag, - struct mca_ptl_gm_peer_t * ptl_peer, - struct mca_ptl_gm_module_t *ptl, - struct mca_ptl_base_send_request_t * sendreq, - size_t offset, - size_t* size, - int flags ); - -#define OMPI_FREE_LIST_TRY_GET(fl, item) \ -{ \ - item = NULL; \ - if(opal_using_threads()) { \ - if( opal_mutex_trylock( &((fl)->fl_lock)) ) { \ - /* We get the lock. Now let's remove one of the elements */ \ - item = opal_list_remove_first(&((fl)->super)); \ - opal_mutex_unlock(&((fl)->fl_lock)); \ - } \ - } else { \ - item = opal_list_remove_first(&((fl)->super)); \ - } \ -} - - static inline int - mca_ptl_gm_init_header_rndv( mca_ptl_base_header_t *hdr, - struct mca_ptl_base_send_request_t * sendreq, - int flags ) - { - hdr->hdr_common.hdr_flags = flags; - hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_RNDV; - - hdr->hdr_rndv.hdr_match.hdr_contextid = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_rndv.hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_rndv.hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer; - hdr->hdr_rndv.hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_rndv.hdr_match.hdr_msg_length = sendreq->req_send.req_bytes_packed; - hdr->hdr_rndv.hdr_match.hdr_msg_seq = sendreq->req_send.req_base.req_sequence; - hdr->hdr_rndv.hdr_src_ptr.lval = 0L; - hdr->hdr_rndv.hdr_src_ptr.pval = sendreq; - - return OMPI_SUCCESS; - } - - static inline int - mca_ptl_gm_init_header_frag( struct mca_ptl_gm_send_frag_t* sendfrag, - struct mca_ptl_gm_peer_t * ptl_peer, - struct mca_ptl_base_send_request_t * sendreq, - size_t offset, - size_t* size, - int flags ) - - { - mca_ptl_base_header_t *hdr = (mca_ptl_base_header_t *)sendfrag->send_buf; - - hdr->hdr_common.hdr_flags = flags; - hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG; - hdr->hdr_frag.hdr_frag_offset = offset; - hdr->hdr_frag.hdr_frag_length = *size; - hdr->hdr_frag.hdr_src_ptr.lval = 0; - hdr->hdr_frag.hdr_src_ptr.pval = sendfrag; /* pointer to the frag */ - hdr->hdr_frag.hdr_dst_ptr = sendreq->req_peer_match; - sendfrag->type = FRAG; - - return OMPI_SUCCESS; - } - - static inline void ompi_ptl_gm_init_pipeline( mca_ptl_gm_pipeline_info_t* pipeline ) - { - int i; - - pipeline->pos_register = 0; - pipeline->pos_remote = 0; - pipeline->pos_deregister = 0; - pipeline->pos_transfert = 0; - for( i = 0; i < GM_PIPELINE_DEPTH; i++ ) - pipeline->lines[i].flags = 0; - } - - static inline mca_ptl_gm_recv_frag_t* - mca_ptl_gm_alloc_recv_frag( struct mca_ptl_base_module_t *ptl ) - { - int rc; - opal_list_item_t* item; - mca_ptl_gm_recv_frag_t* frag; - - OMPI_FREE_LIST_GET( &(((mca_ptl_gm_module_t *)ptl)->gm_recv_frags_free), item, rc ); - - frag = (mca_ptl_gm_recv_frag_t*)item; - frag->frag_recv.frag_base.frag_owner = (struct mca_ptl_base_module_t*)ptl; - frag->frag_bytes_processed = 0; - frag->frag_bytes_validated = 0; - frag->frag_offset = 0; - ompi_ptl_gm_init_pipeline( &(frag->pipeline) ); - return frag; - } - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif diff --git a/ompi/mca/ptl/mx/Makefile.am b/ompi/mca/ptl/mx/Makefile.am deleted file mode 100644 index 923f805ecb..0000000000 --- a/ompi/mca/ptl/mx/Makefile.am +++ /dev/null @@ -1,54 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Use the top-level Makefile.options - - - -AM_CPPFLAGS = $(ptl_mx_CPPFLAGS) - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if OMPI_BUILD_ptl_mx_DSO -component_noinst = -component_install = mca_ptl_mx.la -else -component_noinst = libmca_ptl_mx.la -component_install = -endif - -ptl_mx_SOURCES = ptl_mx.c ptl_mx.h ptl_mx_component.c ptl_mx_module.c ptl_mx_module.h \ - ptl_mx_peer.c ptl_mx_peer.h ptl_mx_proc.c ptl_mx_proc.h \ - ptl_mx_recvfrag.c ptl_mx_recvfrag.h ptl_mx_sendfrag.c ptl_mx_sendfrag.h - -mcacomponentdir = $(libdir)/openmpi -mcacomponent_LTLIBRARIES = $(component_install) -mca_ptl_mx_la_SOURCES = $(ptl_mx_SOURCES) -mca_ptl_mx_la_LIBADD = \ - $(ptl_mx_LIBS) \ - $(top_ompi_builddir)/ompi/libmpi.la \ - $(top_ompi_builddir)/orte/liborte.la \ - $(top_ompi_builddir)/opal/libopal.la -mca_ptl_mx_la_LDFLAGS = -module -avoid-version $(ptl_mx_LDFLAGS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_ptl_mx_la_SOURCES = $(ptl_mx_SOURCES) -libmca_ptl_mx_la_LIBADD = $(ptl_mx_LIBS) -libmca_ptl_mx_la_LDFLAGS = -module -avoid-version $(ptl_mx_LDFLAGS) diff --git a/ompi/mca/ptl/mx/configure.m4 b/ompi/mca/ptl/mx/configure.m4 deleted file mode 100644 index 1f4a550f79..0000000000 --- a/ompi/mca/ptl/mx/configure.m4 +++ /dev/null @@ -1,69 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - - -# MCA_ptl_mx_CONFIG([action-if-can-compile], -# [action-if-cant-compile]) -# ------------------------------------------------ -AC_DEFUN([MCA_ptl_mx_CONFIG],[ - OMPI_CHECK_MX([ptl_mx], - [ptl_mx_happy="yes"], - [ptl_mx_happy="no"]) - - if [ test "$ptl_mx_happy" = "yes" ]; then - # - # Save a copy of the flags - # - ompi_check_mx_callback_CPPFLAGS="$CPPFLAGS" - ompi_check_mx_callback_LDFLAGS="$LDFLAGS" - ompi_check_mx_callback_LIBS="$LIBS" - # - # Set the value allowing MX compilation - # - CPPFLAGS="$CPPFLAGS $ptl_mx_CPPFLAGS" - LDFLAGS="$LDFLAGS $ptl_mx_LDFLAGS" - LIBS="$LIBS $ptl_mx_LIBS" - - AC_MSG_CHECKING([for a MX version with mx_register_match_callback]) - AC_TRY_COMPILE([#include ], - [mx_register_match_callback(0, 0, 0);], - [ptl_mx_happy="yes"], - [ptl_mx_happy="no"]) - AC_MSG_RESULT([$ptl_mx_happy]) - # - # Restore the original flags - # - CPPFLAGS="$ompi_check_mx_callback_CPPFLAGS" - LDFLAGS="$ompi_check_mx_callback_LDFLAGS" - LIBS="$ompi_check_mx_callback_LIBS" - fi - - AS_IF([test "$ptl_mx_happy" = "yes"], - [ptl_mx_WRAPPER_EXTRA_LDFLAGS="$ptl_mx_LDFLAGS" - ptl_mx_WRAPPER_EXTRA_LIBS="$ptl_mx_LIBS" - $1], - [$2]) - - # substitute in the things needed to build mx - AC_SUBST([ptl_mx_CFLAGS]) - AC_SUBST([ptl_mx_CPPFLAGS]) - AC_SUBST([ptl_mx_LDFLAGS]) - AC_SUBST([ptl_mx_LIBS]) -])dnl - diff --git a/ompi/mca/ptl/mx/configure.params b/ompi/mca/ptl/mx/configure.params deleted file mode 100644 index 9e7e3d2043..0000000000 --- a/ompi/mca/ptl/mx/configure.params +++ /dev/null @@ -1,23 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Specific to this module - -PARAM_INIT_FILE=ptl_mx.c -PARAM_CONFIG_FILES="Makefile" diff --git a/ompi/mca/ptl/mx/ptl_mx.c b/ompi/mca/ptl/mx/ptl_mx.c deleted file mode 100644 index 0bc0c31fa7..0000000000 --- a/ompi/mca/ptl/mx/ptl_mx.c +++ /dev/null @@ -1,499 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#include -#include -#include - -#include "ompi/constants.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/communicator/communicator.h" -#include "ptl_mx.h" -#include "ptl_mx_peer.h" -#include "ptl_mx_sendfrag.h" -#include "ptl_mx_recvfrag.h" - - -mca_ptl_mx_module_t mca_ptl_mx_module = { - { - &mca_ptl_mx_component.super, - 16, /* ptl_cache_size */ - sizeof(mca_ptl_mx_send_frag_t), /* ptl_cache_bytes */ - (32 * 1024) - sizeof(mca_ptl_base_header_t), /* ptl_frag_first_size */ - 0, /* ptl_frag_min_size */ - -1, /* ptl_frag_max_size */ - 0, /* ptl_exclusivity */ - 0, /* ptl_latency */ - 0, /* ptl_bandwidth */ - MCA_PTL_PUT, /* ptl flags */ - mca_ptl_mx_add_procs, - mca_ptl_mx_del_procs, - mca_ptl_mx_finalize, - mca_ptl_mx_send, - mca_ptl_mx_send_continue, - NULL, /* get */ - mca_ptl_mx_matched, /* matched */ - mca_ptl_mx_request_init, - mca_ptl_mx_request_fini, - NULL, /* match */ - NULL, - NULL - } -}; - - - -/** - * Allocate memory for use by the convert. - */ - -static void *mca_ptl_mx_alloc( size_t *size, void* user ) -{ - return malloc(*size); -} - -/** - * PML->PTL Initialize a send request for use by the PTL. - * - * @param ptl (IN) PTL instance - * @param request (IN) Pointer to allocated request. - * - * To reduce latency (number of required allocations), the PML allocates up - * to ptl_cache_bytes of additional space contigous w/ the base send request. - * This space may be used by the PTL for additional control information (e.g. - * first fragment descriptor). - * - * The ptl_request_init() function is called by the PML when requests are - * allocated to the PTLs cache. These requests will be cached by the PML - * on completion and re-used by the same PTL w/out additional calls to - * ptl_request_init(). - * - * If the cache size is exceeded, the PML may pass requests to ptl_send/ptl_put - * that have been taken from the global pool and have not been initialized by the - * PTL. These requests will have the req_cached attribute set to false. - * - */ - -int mca_ptl_mx_request_init(struct mca_ptl_base_module_t* ptl, mca_ptl_base_send_request_t* request) -{ - OBJ_CONSTRUCT(request+1, mca_ptl_mx_send_frag_t); - return OMPI_SUCCESS; -} - - -/** - * PML->PTL Cleanup any resources that may have been associated with the - * request by the PTL. - * - * @param ptl (IN) PTL instance - * @param request (IN) Pointer to allocated request. - * - * The ptl_request_fini function is called when the PML removes a request - * from the PTLs cache (due to resource constraints). This routine provides - * the PTL the chance to cleanup/release any resources cached on the send - * descriptor by the PTL. - */ - -void mca_ptl_mx_request_fini(struct mca_ptl_base_module_t* ptl, mca_ptl_base_send_request_t* request) -{ - OBJ_DESTRUCT(request+1); -} - - -/** - * PML->PTL Initiate a send to the peer. - * - * @param ptl (IN) PTL instance - * @param ptl_base_peer (IN) PTL peer addressing - * @param request (IN) Send request - * @param offset Current offset into packed/contiguous buffer. - * @param size (IN) Number of bytes PML is requesting PTL to deliver, - * @param flags (IN) Flags that should be passed to the peer via the message header. - * @param request (OUT) OMPI_SUCCESS if the PTL was able to queue one or more fragments - * - * The PML implements a rendevouz protocol, with up to the PTL threshold - * (ptl_first_frag_size) bytes of the message sent in eager send mode. The ptl_send() - * function is called by the PML to initiate the send of the first message fragment. - * - * The PTL is responsible for updating the current data offset (req_offset) in the - * request to reflect the actual number of bytes fragmented. This may be less than - * the requested size, due to resource constraints or datatype alighnment/offset. If - * an acknowledgment is required, the MCA_PTL_FLAGS_ACK bit will be set in the - * flags parameter. In this case, the PTL should not call ptl_send_progress() function - * to indicate completion of the fragment until the ack is received. For all other - * fragments ptl_send_progress() may be called based on local completion semantics. - */ - -int mca_ptl_mx_send( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_peer_t* ptl_peer, - struct mca_ptl_base_send_request_t* sendreq, - size_t offset, - size_t size, - int flags) -{ - mca_ptl_mx_module_t* mx_ptl = (mca_ptl_mx_module_t*)ptl; - mca_ptl_mx_send_frag_t* sendfrag; - mca_ptl_base_header_t* hdr; - mx_segment_t *segments; - mx_return_t mx_return; - ompi_ptr_t match; - int rc; - - if (sendreq->req_cached) { - sendfrag = (mca_ptl_mx_send_frag_t*)(sendreq+1); - } else { - opal_list_item_t* item; - OMPI_FREE_LIST_GET(&mca_ptl_mx_component.mx_send_frags, item, rc); - if(NULL == (sendfrag = (mca_ptl_mx_send_frag_t*)item)) - return rc; - } - - /* setup iovec */ - sendfrag->frag_progress = 0; - sendfrag->frag_free = 0; - - /* initialize convertor */ - if(size > 0) { - ompi_convertor_t *convertor; - struct iovec iov; - uint32_t iov_count; - size_t max_data; - int rc; - - convertor = &sendreq->req_send.req_convertor; - ompi_convertor_personalize( convertor, 0, &offset, mca_ptl_mx_alloc, NULL ); - - /* if data is contigous convertor will return an offset - * into users buffer - otherwise will return an allocated buffer - * that holds the packed data - */ - iov.iov_base = NULL; - iov.iov_len = size; - iov_count = 1; - max_data = size; - if((rc = ompi_convertor_pack( convertor, - &iov, - &iov_count, - &max_data, - &(sendfrag->frag_free))) < 0) { - return OMPI_ERROR; - } - sendfrag->frag_segments[1].segment_ptr = iov.iov_base; - sendfrag->frag_segments[1].segment_length = iov.iov_len; - sendfrag->frag_send.frag_base.frag_addr = iov.iov_base; - sendfrag->frag_send.frag_base.frag_size = iov.iov_len; - } else { - sendfrag->frag_send.frag_base.frag_addr = NULL; - sendfrag->frag_send.frag_base.frag_size = 0; - } - - /* setup message header */ - hdr = &sendfrag->frag_send.frag_base.frag_header; - - /* first fragment - need to try and match at the receiver */ - if(offset == 0) { - hdr->hdr_common.hdr_flags = flags; - hdr->hdr_match.hdr_contextid = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_msg_length = sendreq->req_send.req_bytes_packed; - hdr->hdr_match.hdr_msg_seq = sendreq->req_send.req_base.req_sequence; - - /* for the first 32K - send header for matching + data */ - segments = sendfrag->frag_segments; - if(sendfrag->frag_send.frag_base.frag_size > 0) { - sendfrag->frag_segment_count = 2; - } else { - sendfrag->frag_segment_count = 1; - } - - /* if an acknoweldgment is not required - can get by with a shorter header */ - if((flags & MCA_PTL_FLAGS_ACK) == 0) { - hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_MATCH; - sendfrag->frag_segments[0].segment_length = sizeof(mca_ptl_base_match_header_t); - match.lval = MCA_PTL_HDR_TYPE_MATCH; - - /* convert header to network byte order if required */ - if(ptl_peer->peer_nbo) { - hdr->hdr_common.hdr_flags |= MCA_PTL_FLAGS_NBO; - MCA_PTL_BASE_MATCH_HDR_HTON(hdr->hdr_match); - } - } else { - hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_RNDV; - hdr->hdr_rndv.hdr_frag_length = sendfrag->frag_send.frag_base.frag_size; - hdr->hdr_rndv.hdr_src_ptr.lval = 0; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */ - hdr->hdr_rndv.hdr_src_ptr.pval = sendfrag; - sendfrag->frag_segments[0].segment_length = sizeof(mca_ptl_base_rendezvous_header_t); - match.lval = MCA_PTL_HDR_TYPE_RNDV; - - /* convert header to network byte order if required */ - if(ptl_peer->peer_nbo) { - hdr->hdr_common.hdr_flags |= MCA_PTL_FLAGS_NBO; - MCA_PTL_BASE_RNDV_HDR_HTON(hdr->hdr_rndv); - } - } - - /* non-zero offset - fragment of a previously started message */ - } else { - hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG; - hdr->hdr_common.hdr_flags = flags; - hdr->hdr_frag.hdr_frag_offset = offset; - hdr->hdr_frag.hdr_frag_length = sendfrag->frag_send.frag_base.frag_size; - hdr->hdr_frag.hdr_src_ptr.lval = 0; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */ - hdr->hdr_frag.hdr_src_ptr.pval = sendfrag; - hdr->hdr_frag.hdr_dst_ptr = sendreq->req_peer_match; - match.sval.uval = sendreq->req_peer_match.ival; - match.sval.lval = offset; - - /* dont send a header for after the first 32K - MX currently doesn't - * support DMA of more than one segment. - */ - segments = sendfrag->frag_segments+1; - sendfrag->frag_segment_count = 1; - - /* convert header to network byte order if required */ - if(ptl_peer->peer_nbo) { - hdr->hdr_common.hdr_flags |= MCA_PTL_FLAGS_NBO; - MCA_PTL_BASE_FRAG_HDR_HTON(hdr->hdr_frag); - } - } - - /* fragment state */ - sendfrag->frag_send.frag_base.frag_owner = &ptl_peer->peer_ptl->super; - sendfrag->frag_send.frag_request = sendreq; - sendfrag->frag_send.frag_base.frag_peer = ptl_peer; - - /* must update the offset after actual fragment size is determined - * before attempting to send the fragment - */ - mca_ptl_base_send_request_offset(sendreq, - sendfrag->frag_send.frag_base.frag_size); - - /* start the fragment */ - mx_return = mx_isend( mx_ptl->mx_endpoint, - segments, - sendfrag->frag_segment_count, - ptl_peer->peer_addr, - match.lval, - sendfrag, - &sendfrag->frag_request ); - if(mx_return != MX_SUCCESS) { - opal_output(0, "mca_ptl_mx_send: mx_isend() failed with return value=%d\n", mx_return); - return OMPI_ERROR; - } - return OMPI_SUCCESS; -} - - -/** - * PML->PTL Initiate a send to the peer. - * - * @param ptl (IN) PTL instance - * @param ptl_base_peer (IN) PTL peer addressing - * @param request (IN) Send request - * @param offset Current offset into packed/contiguous buffer. - * @param size (IN) Number of bytes PML is requesting PTL to deliver, - * @param flags (IN) Flags that should be passed to the peer via the message header. - * @param request (OUT) OMPI_SUCCESS if the PTL was able to queue one or more fragments - * - * Continue sending fragments of a large message to the peer. - */ - -int mca_ptl_mx_send_continue( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_peer_t* ptl_peer, - struct mca_ptl_base_send_request_t* sendreq, - size_t offset, - size_t size, - int flags) -{ - mca_ptl_mx_module_t* mx_ptl = (mca_ptl_mx_module_t*)ptl; - mca_ptl_mx_send_frag_t* sendfrag; - mx_return_t mx_return; - ompi_ptr_t match; - ompi_convertor_t *convertor; - struct iovec iov; - uint32_t iov_count; - size_t max_data; - int rc; - - /* allocate fragment */ - MCA_PTL_MX_SEND_FRAG_ALLOC(sendfrag, rc); - if(rc != OMPI_SUCCESS) { - return rc; - } - sendfrag->frag_free = 0; - - /* initialize convertor */ - convertor = &sendfrag->frag_send.frag_base.frag_convertor; - ompi_convertor_clone( &(sendreq->req_send.req_convertor), convertor, 1 ); - ompi_convertor_personalize( convertor, 0, &offset, mca_ptl_mx_alloc, NULL ); - - /* if data is contigous convertor will return an offset - * into users buffer - otherwise will return an allocated buffer - * that holds the packed data - */ - iov.iov_base = NULL; - iov.iov_len = size; - iov_count = 1; - max_data = size; - if((rc = ompi_convertor_pack( convertor, - &iov, - &iov_count, - &max_data, - &(sendfrag->frag_free))) < 0) { - return OMPI_ERROR; - } - - sendfrag->frag_segments[0].segment_length = sizeof(mca_ptl_base_frag_header_t); - sendfrag->frag_segments[1].segment_ptr = iov.iov_base; - sendfrag->frag_segments[1].segment_length = iov.iov_len; - sendfrag->frag_segment_count = 1; - sendfrag->frag_send.frag_base.frag_addr = iov.iov_base; - sendfrag->frag_send.frag_base.frag_header.hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG; - sendfrag->frag_send.frag_base.frag_header.hdr_common.hdr_flags = 0; - sendfrag->frag_send.frag_base.frag_header.hdr_frag.hdr_frag_length = iov.iov_len; - sendfrag->frag_send.frag_base.frag_header.hdr_frag.hdr_frag_offset = offset; - - /* fragment state */ - sendfrag->frag_send.frag_base.frag_owner = &ptl_peer->peer_ptl->super; - sendfrag->frag_send.frag_request = sendreq; - sendfrag->frag_send.frag_base.frag_size = size; - sendfrag->frag_send.frag_base.frag_peer = ptl_peer; - sendfrag->frag_progress = 0; - - /* must update the offset after actual fragment size is determined - * before attempting to send the fragment - */ - mca_ptl_base_send_request_offset(sendreq, size); - - /* start the fragment */ - match.sval.uval = sendreq->req_peer_match.ival; - match.sval.lval = offset; - mx_return = mx_isend( - mx_ptl->mx_endpoint, - sendfrag->frag_segments+1, - 1, - sendfrag->frag_send.frag_base.frag_peer->peer_addr, - match.lval, - sendfrag, - &sendfrag->frag_request); - if(mx_return != MX_SUCCESS) { - opal_output(0, "mca_ptl_mx_send: mx_isend() failed with return value=%d\n", mx_return); - return OMPI_ERROR; - } - return OMPI_SUCCESS; -} - -/** - * PML->PTL Notification from the PML to the PTL that a receive - * has been posted and matched against the indicated fragment. - * - * @param ptl (IN) PTL instance - * @param recv_frag Matched fragment - * - * The ptl_matched() function is called by the PML when a fragment - * is matched to a posted receive. This may occur during a call to - * ptl_match() if the receive is matched, or at a later point in time - * when a matching receive is posted. - * - * When this routine is called, the PTL is responsible for generating - * an acknowledgment to the peer if the MCA_PTL_FLAGS_ACK - * bit is set in the original fragment header. Additionally, the PTL - * is responsible for transferring any data associated with the fragment - * into the users buffer utilizing the datatype engine, and notifying - * the PML that the fragment has completed via the ptl_recv_progress() - * function. - */ - -void mca_ptl_mx_matched( - mca_ptl_base_module_t* ptl, - mca_ptl_base_recv_frag_t* frag) -{ - mca_ptl_base_header_t* hdr = &frag->frag_base.frag_header; - mca_ptl_base_recv_request_t* request = frag->frag_request; - mca_ptl_mx_module_t* mx_ptl = (mca_ptl_mx_module_t*)ptl; - mca_ptl_mx_recv_frag_t* mx_frag = (mca_ptl_mx_recv_frag_t*)frag; - size_t bytes_delivered = mx_frag->frag_size; - bool ack_pending = false; - - /* generate an acknowledgment if required */ - if(hdr->hdr_common.hdr_flags & MCA_PTL_FLAGS_ACK) { - int rc; - mca_ptl_mx_send_frag_t* ack; - MCA_PTL_MX_SEND_FRAG_ALLOC(ack, rc); - if(NULL == ack) { - OPAL_THREAD_LOCK(&mca_ptl_mx_component.mx_lock); - ack_pending = true; - opal_list_append(&mca_ptl_mx_component.mx_pending_acks, (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&mca_ptl_mx_component.mx_lock); - } else { - mx_return_t mx_return; - MCA_PTL_MX_SEND_FRAG_INIT_ACK(ack, ptl, mx_frag); - if(hdr->hdr_common.hdr_flags & MCA_PTL_FLAGS_NBO) { - MCA_PTL_BASE_ACK_HDR_HTON(ack->frag_send.frag_base.frag_header.hdr_ack); - } - - /* start the fragment */ - mx_return = mx_isend( - mx_ptl->mx_endpoint, - ack->frag_segments, - ack->frag_segment_count, - ack->frag_send.frag_base.frag_peer->peer_addr, - MCA_PTL_HDR_TYPE_ACK, - ack, - &ack->frag_request); - if(mx_return != MX_SUCCESS) { - opal_output(0, "mca_ptl_mx_matched: mx_isend() failed with return value=%d\n", mx_return); - OPAL_THREAD_LOCK(&mca_ptl_mx_component.mx_lock); - ack_pending = true; - opal_list_append(&mca_ptl_mx_component.mx_pending_acks, (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&mca_ptl_mx_component.mx_lock); - } - } - } - - /* copy data into users buffer */ - if(mx_frag->frag_size > 0) { - struct iovec iov; - uint32_t iov_count = 1; - int free_after = 0; - ompi_convertor_t* convertor = &(request->req_recv.req_convertor); - - /* we do not attach a memory allocation function so the personalization of - * the convertor is not necessary. - */ - iov.iov_base = mx_frag->frag_data; - iov.iov_len = mx_frag->frag_size; - ompi_convertor_unpack(convertor, &iov, &iov_count, &bytes_delivered, &free_after ); - } - - /* update request status */ - ptl->ptl_recv_progress( ptl, request, - mx_frag->frag_size, bytes_delivered); - - /* release resources */ - if(ack_pending == false) - MCA_PTL_MX_RECV_FRAG_RETURN(mx_frag); -} - diff --git a/ompi/mca/ptl/mx/ptl_mx.h b/ompi/mca/ptl/mx/ptl_mx.h deleted file mode 100644 index cec32dbc7e..0000000000 --- a/ompi/mca/ptl/mx/ptl_mx.h +++ /dev/null @@ -1,360 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_MX_H -#define MCA_PTL_MX_H - -#include "ompi_config.h" -#include -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/class/ompi_bitmap.h" -#include "ompi/class/ompi_free_list.h" -#include "orte/class/orte_proc_table.h" - -#define MCA_PTL_MX_STATISTICS 0 - -/** - * Myricom MX PTL component. - */ -struct mca_ptl_mx_component_t { - mca_ptl_base_component_1_0_0_t super; /**< base PTL component */ - int mx_free_list_num; /**< initial size of free lists */ - int mx_free_list_max; /**< maximum size of free lists */ - int mx_free_list_inc; /**< number of elements to growing free lists by */ - int mx_prepost; /**< number of preposted recvs */ - int mx_debug; /**< debug level */ - uint32_t mx_filter; /**< filter assigned to application */ - uint32_t mx_num_ptls; /**< number of MX NICs available to app */ - uint32_t mx_max_ptls; /**< max number of MX NICs to use */ - struct mca_ptl_mx_module_t** mx_ptls; /**< array of available PTL modules */ - ompi_free_list_t mx_send_frags; /**< free list of mx send fragments */ - ompi_free_list_t mx_recv_frags; /**< free list of mx recv fragments */ - opal_hash_table_t mx_procs; /**< hash table of procs */ - opal_list_t mx_pending_acks; /**< queue of pending sends */ - opal_mutex_t mx_lock; /**< lock for accessing module state */ -}; - -typedef struct mca_ptl_mx_component_t mca_ptl_mx_component_t; -struct mca_ptl_mx_recv_frag_t; -struct mca_ptl_mx_send_frag_t; - -extern mca_ptl_mx_component_t mca_ptl_mx_component; - -/** - * Register MX module parameters with the MCA framework - */ -extern int mca_ptl_mx_component_open(void); - -/** - * Any final cleanup before being unloaded. - */ -extern int mca_ptl_mx_component_close(void); - -/** - * MCA->PTL Intializes the PTL component and creates specific PTL - * module(s). - * - * @param num_ptls (OUT) Returns the number of ptl instances created, or 0 - * if the transport is not available. - * - * @param allow_multi_user_threads (OUT) Indicated wether this component can - * run at MPI_THREAD_MULTIPLE or not. - * - * @param have_hidden_threads (OUT) Whether this component uses - * hidden threads (e.g., progress threads) or not. - * - * @return Array of pointers to PTL modules, or NULL if the transport - * is not available. - * - * During component initialization, the PTL component should discover - * the physical devices that are available for the given transport, - * and create a PTL instance to represent each device. Any addressing - * information required by peers to reach the device should be published - * during this function via the mca_pml_base_modex_send() interface. - * - */ - -extern mca_ptl_base_module_t** mca_ptl_mx_component_init( - int *num_ptls, - bool allow_multi_user_threads, - bool have_hidden_threads ); - - -/** - * MCA->PTL Called to dynamically change a component parameter. - * - * @param flag (IN) Parameter to change. - * @param value (IN) Optional parameter value. - * - * @return OMPI_SUCCESS or error code on failure. - * - * The only supported parameter is currently MCA_PTL_ENABLE, - * which can be used by the PML to enable/disable forwarding - * by the PTL. - */ - -extern int mca_ptl_mx_component_control( - int param, - void* value, - size_t size -); - -/** - * MCA->PTL Called to progress outstanding requests for - * non-threaded polling environments. - * - * @param tstamp Current time. - * @return OMPI_SUCCESS or error code on failure. - */ - -extern int mca_ptl_mx_component_progress( - mca_ptl_tstamp_t tstamp -); - -/** - * Myricom MX PTL module. - */ -struct mca_ptl_mx_module_t { - mca_ptl_base_module_t super; /**< base PTL module interface */ - opal_list_t mx_peers; /**< list of peers */ - bool mx_enabled; /**< flag to indicate if endpoint enabled */ - mx_endpoint_t mx_endpoint; /**< endpoint */ - mx_endpoint_addr_t mx_endpoint_addr; /**< endpoint address */ - volatile int32_t mx_recvs_posted; /**< count of posted match fragments */ -#if OMPI_ENABLE_PROGRESS_THREADS - opal_thread_t mx_thread; /**< thread for progressing outstanding requests */ -#endif -}; -typedef struct mca_ptl_mx_module_t mca_ptl_mx_module_t; - - -extern mca_ptl_mx_module_t mca_ptl_mx_module; - -/** - * Create/initialize the MX PTL modules. - * @return OMPI_SUCCESS or error status on failure. - */ - -extern int mca_ptl_mx_module_init(void); - - -/** - * Cleanup any resources held by the PTL. - * - * @param ptl PTL instance. - * @return OMPI_SUCCESS or error status on failure. - */ - -extern int mca_ptl_mx_finalize( - struct mca_ptl_base_module_t* ptl -); - - -/** - * PML->PTL notification of change in the process list. - * - * @param ptl (IN) PTL instance - * @param nprocs (IN) Number of processes - * @param procs (IN) Set of processes - * @param peer (OUT) Set of (optional) mca_ptl_base_peer_t instances returned by PTL. * @param reachable (OUT) Bitmask indicating set of peer processes that are reachable by this PTL. - * @return OMPI_SUCCESS or error status on failure. - * - * The mca_ptl_base_module_add_procs_fn_t() is called by the PML to - * determine the set of PTLs that should be used to reach each process. - * Any addressing information exported by the peer via the mca_pml_base_modex_send() - * function should be available during this call via the corresponding - * mca_pml_base_modex_recv() function. The PTL may utilize this information to - * determine reachability of each peer process. - * - * For each process that is reachable by the PTL, the bit corresponding to the index - * into the proc array (nprocs) should be set in the reachable bitmask. The PML - * provides the PTL the option to return a pointer to a data structure defined - * by the PTL that is returned to the PTL on subsequent calls to the PTL data - * transfer functions (e.g ptl_send). This may be used by the PTL to cache any addressing - * or connection information (e.g. TCP socket, IP queue pair). - */ - -extern int mca_ptl_mx_add_procs( - struct mca_ptl_base_module_t* ptl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_ptl_base_peer_t** peers, - ompi_bitmap_t* reachable -); - - -/** - * PML->PTL notification of change to the process list. - * - * @param ptl (IN) PTL instance - * @param nprocs (IN) Number of processes - * @param proc (IN) Set of processes - * @param peer (IN) Set of peer addressing information. - * @return Status indicating if cleanup was successful - * - * When the process list changes, the PML notifies the PTL of the - * change, to provide the opportunity to cleanup or release any - * resources associated with the peer. - */ - -extern int mca_ptl_mx_del_procs( - struct mca_ptl_base_module_t* ptl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_ptl_base_peer_t** peers -); - - -/** - * PML->PTL Initialize a send request for use by the PTL. - * - * @param ptl (IN) PTL instance - * @param request (IN) Pointer to allocated request. - * - * To reduce latency (number of required allocations), the PML allocates up - * to ptl_cache_bytes of additional space contigous w/ the base send request. - * This space may be used by the PTL for additional control information (e.g. - * first fragment descriptor). - * - * The ptl_request_init() function is called by the PML when requests are - * allocated to the PTLs cache. These requests will be cached by the PML - * on completion and re-used by the same PTL w/out additional calls to - * ptl_request_init(). - * - * If the cache size is exceeded, the PML may pass requests to ptl_send/ptl_put - * that have been taken from the global pool and have not been initialized by the - * PTL. These requests will have the req_cached attribute set to false. - * - */ - -extern int mca_ptl_mx_request_init( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_send_request_t* -); - - -/** - * PML->PTL Cleanup any resources that may have been associated with the - * request by the PTL. - * - * @param ptl (IN) PTL instance - * @param request (IN) Pointer to allocated request. - * - * The ptl_request_fini function is called when the PML removes a request - * from the PTLs cache (due to resource constraints). This routine provides - * the PTL the chance to cleanup/release any resources cached on the send - * descriptor by the PTL. - */ - -extern void mca_ptl_mx_request_fini( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_send_request_t* -); - - -/** - * PML->PTL Notification from the PML to the PTL that a receive - * has been posted and matched against the indicated fragment. - * - * @param ptl (IN) PTL instance - * @param recv_frag Matched fragment - * - * The ptl_matched() function is called by the PML when a fragment - * is matched to a posted receive. This may occur during a call to - * ptl_match() if the receive is matched, or at a later point in time - * when a matching receive is posted. - * - * When this routine is called, the PTL is responsible for generating - * an acknowledgment to the peer if the MCA_PTL_FLAGS_ACK - * bit is set in the original fragment header. Additionally, the PTL - * is responsible for transferring any data associated with the fragment - * into the users buffer utilizing the datatype engine, and notifying - * the PML that the fragment has completed via the ptl_recv_progress() - * function. - */ - -extern void mca_ptl_mx_matched( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_recv_frag_t* frag -); - -/** - * PML->PTL Initiate a send to the peer. - * - * @param ptl (IN) PTL instance - * @param ptl_base_peer (IN) PTL peer addressing - * @param request (IN) Send request - * @param offset Current offset into packed/contiguous buffer. - * @param size (IN) Number of bytes PML is requesting PTL to deliver, - * @param flags (IN) Flags that should be passed to the peer via the message header. - * @param request (OUT) OMPI_SUCCESS if the PTL was able to queue one or more fragments - * - * The PML implements a rendevouz protocol, with up to the PTL threshold - * (ptl_first_frag_size) bytes of the message sent in eager send mode. The ptl_send() - * function is called by the PML to initiate the send of the first message fragment. - * - * The PTL is responsible for updating the current data offset (req_offset) in the - * request to reflect the actual number of bytes fragmented. This may be less than - * the requested size, due to resource constraints or datatype alighnment/offset. If - * an acknowledgment is required, the MCA_PTL_FLAGS_ACK bit will be set in the - * flags parameter. In this case, the PTL should not call ptl_send_progress() function - * to indicate completion of the fragment until the ack is received. For all other - * fragments ptl_send_progress() may be called based on local completion semantics. - */ - -extern int mca_ptl_mx_send( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_peer_t* ptl_peer, - struct mca_ptl_base_send_request_t*, - size_t offset, - size_t size, - int flags -); - - -/** - * PML->PTL Continue sending fragments of a large message. - * - * @param ptl (IN) PTL instance - * @param ptl_base_peer (IN) PTL peer addressing - * @param request (IN) Send request - * @param offset Current offset into packed/contiguous buffer. - * @param size (IN) Number of bytes PML is requesting PTL to deliver, - * @param flags (IN) Flags that should be passed to the peer via the message header. - * @param request (OUT) OMPI_SUCCESS if the PTL was able to queue one or more fragments - * - */ - -extern int mca_ptl_mx_send_continue( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_peer_t* ptl_peer, - struct mca_ptl_base_send_request_t*, - size_t offset, - size_t size, - int flags -); - - -#define HAVE_MX_ICOMPLETED 0 -#if HAVE_MX_ICOMPLETED -extern mx_return_t mx_icompleted(mx_endpoint_t endpoint, mx_status_t *status, uint32_t *result); -#endif -#endif diff --git a/ompi/mca/ptl/mx/ptl_mx_component.c b/ompi/mca/ptl/mx/ptl_mx_component.c deleted file mode 100644 index 2d297389c6..0000000000 --- a/ompi/mca/ptl/mx/ptl_mx_component.c +++ /dev/null @@ -1,307 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#include "ompi/constants.h" -#include "opal/util/output.h" -#include "opal/threads/threads.h" -#include "opal/mca/base/mca_base_param.h" -#include "ptl_mx.h" -#include "ptl_mx_module.h" -#include "ptl_mx_peer.h" - -/* - * The MX component - */ - -mca_ptl_mx_component_t mca_ptl_mx_component = { - { - /* First, the mca_base_module_t struct containing meta - information about the module itself */ - { - /* Indicate that we are a pml v1.0.0 module (which also - implies a specific MCA version) */ - - MCA_PTL_BASE_VERSION_1_0_0, - - "mx", /* MCA module name */ - OMPI_MAJOR_VERSION, /* MCA module major version */ - OMPI_MINOR_VERSION, /* MCA module minor version */ - OMPI_RELEASE_VERSION, /* MCA module release version */ - mca_ptl_mx_component_open, /* module open */ - mca_ptl_mx_component_close /* module close */ - }, - - /* Next the MCA v1.0.0 module meta data */ - - { - /* Whether the module is checkpointable or not */ - - false - }, - - mca_ptl_mx_component_init, - mca_ptl_mx_component_control, - mca_ptl_mx_component_progress, - } -}; - - -/* - * utility routines for parameter registration - */ - -static inline char* mca_ptl_mx_param_register_string( - const char* param_name, - const char* default_value) -{ - char *param_value; - int id = mca_base_param_register_string("ptl","mx",param_name,NULL,default_value); - mca_base_param_lookup_string(id, ¶m_value); - return param_value; -} - -static inline int mca_ptl_mx_param_register_int( - const char* param_name, - int default_value) -{ - int id = mca_base_param_register_int("ptl","mx",param_name,NULL,default_value); - int param_value = default_value; - mca_base_param_lookup_int(id,¶m_value); - return param_value; -} - -/* - * Called by MCA framework to open the module, registers - * module parameters. - */ - -int mca_ptl_mx_component_open(void) -{ - /* initialize state */ - mca_ptl_mx_component.mx_ptls = NULL; - mca_ptl_mx_component.mx_num_ptls = 0; - - /* register MX module parameters */ - mca_ptl_mx_component.mx_filter = - (uint32_t)mca_ptl_mx_param_register_int("filter", 0xdeadbeef); - mca_ptl_mx_component.mx_prepost = - mca_ptl_mx_param_register_int("prepost", 1); - mca_ptl_mx_component.mx_debug = - mca_ptl_mx_param_register_int("debug", 0); - mca_ptl_mx_component.mx_free_list_num = - mca_ptl_mx_param_register_int("free_list_num", 256); - mca_ptl_mx_component.mx_free_list_max = - mca_ptl_mx_param_register_int("free_list_max", -1); - mca_ptl_mx_component.mx_free_list_inc = - mca_ptl_mx_param_register_int("free_list_inc", 256); - mca_ptl_mx_component.mx_max_ptls = - (uint32_t)mca_ptl_mx_param_register_int("num_nics", -1); - mca_ptl_mx_module.super.ptl_exclusivity = - mca_ptl_mx_param_register_int("exclusivity", 0); - mca_ptl_mx_module.super.ptl_first_frag_size = - mca_ptl_mx_param_register_int("first_frag_size", - (32*1024) - sizeof(mca_ptl_base_header_t)); - mca_ptl_mx_module.super.ptl_min_frag_size = - mca_ptl_mx_param_register_int("min_frag_size", 32*1024); - mca_ptl_mx_module.super.ptl_max_frag_size = - mca_ptl_mx_param_register_int("max_frag_size", -1); - return OMPI_SUCCESS; -} - -/* - * module cleanup - sanity checking of queue lengths - */ - -int mca_ptl_mx_component_close(void) -{ - if( NULL == mca_ptl_mx_component.mx_ptls ) - return OMPI_SUCCESS; - - mx_finalize(); -#if OMPI_ENABLE_DEBUG - if (mca_ptl_mx_component.mx_send_frags.fl_num_allocated && - mca_ptl_mx_component.mx_send_frags.fl_num_allocated != - mca_ptl_mx_component.mx_send_frags.super.opal_list_length) { - opal_output(0, "mx send frags: %d allocated %d returned\n", - mca_ptl_mx_component.mx_send_frags.fl_num_allocated, - mca_ptl_mx_component.mx_send_frags.super.opal_list_length); - } - /* allow for pre-posted receives */ - if (mca_ptl_mx_component.mx_recv_frags.fl_num_allocated && - mca_ptl_mx_component.mx_recv_frags.fl_num_allocated - 3 > - mca_ptl_mx_component.mx_recv_frags.super.opal_list_length) { - opal_output(0, "mx recv frags: %d allocated %d returned\n", - mca_ptl_mx_component.mx_recv_frags.fl_num_allocated, - mca_ptl_mx_component.mx_recv_frags.super.opal_list_length); - } -#endif - - /* release resources */ - OBJ_DESTRUCT(&mca_ptl_mx_component.mx_send_frags); - OBJ_DESTRUCT(&mca_ptl_mx_component.mx_recv_frags); - OBJ_DESTRUCT(&mca_ptl_mx_component.mx_procs); - OBJ_DESTRUCT(&mca_ptl_mx_component.mx_pending_acks); - OBJ_DESTRUCT(&mca_ptl_mx_component.mx_lock); - return OMPI_SUCCESS; -} - - -/* - * MX module initialization. - */ -mca_ptl_base_module_t** mca_ptl_mx_component_init( - int *num_ptls, - bool enable_progress_threads, - bool enable_mpi_threads) -{ - mca_ptl_base_module_t** ptls; - *num_ptls = 0; - - /* initialize objects */ - OBJ_CONSTRUCT(&mca_ptl_mx_component.mx_send_frags, ompi_free_list_t); - OBJ_CONSTRUCT(&mca_ptl_mx_component.mx_recv_frags, ompi_free_list_t); - OBJ_CONSTRUCT(&mca_ptl_mx_component.mx_procs, opal_hash_table_t); - OBJ_CONSTRUCT(&mca_ptl_mx_component.mx_pending_acks, opal_list_t); - OBJ_CONSTRUCT(&mca_ptl_mx_component.mx_lock, opal_mutex_t); - - ompi_free_list_init( &mca_ptl_mx_component.mx_send_frags, - sizeof(mca_ptl_mx_send_frag_t), - OBJ_CLASS(mca_ptl_mx_send_frag_t), - mca_ptl_mx_component.mx_free_list_num, - mca_ptl_mx_component.mx_free_list_max, - mca_ptl_mx_component.mx_free_list_inc, - NULL ); /* use default allocator */ - - ompi_free_list_init( &mca_ptl_mx_component.mx_recv_frags, - sizeof(mca_ptl_mx_recv_frag_t), - OBJ_CLASS(mca_ptl_mx_recv_frag_t), - mca_ptl_mx_component.mx_free_list_num, - mca_ptl_mx_component.mx_free_list_max, - mca_ptl_mx_component.mx_free_list_inc, - NULL ); /* use default allocator */ - - /* intialize process hash table */ - opal_hash_table_init( &mca_ptl_mx_component.mx_procs, 256 ); - - /* initialize mx ptls */ - if(OMPI_SUCCESS != mca_ptl_mx_module_init()) - return NULL; - - /* allocate and return a copy of the ptl array */ - ptls = malloc( mca_ptl_mx_component.mx_num_ptls * sizeof(mca_ptl_base_module_t*) ); - if(NULL == ptls) - return NULL; - - memcpy( ptls, mca_ptl_mx_component.mx_ptls, - mca_ptl_mx_component.mx_num_ptls*sizeof(mca_ptl_mx_module_t*) ); - *num_ptls = mca_ptl_mx_component.mx_num_ptls; - return ptls; -} - -/* - * MX module control - */ - -int mca_ptl_mx_component_control(int param, void* value, size_t size) -{ - switch(param) { - case MCA_PTL_ENABLE: - if(*(int*)value) { - mca_ptl_mx_enable(); - } else - mca_ptl_mx_disable(); - break; - default: - break; - } - return OMPI_SUCCESS; -} - - -/* - * MX module progress. - */ - -int mca_ptl_mx_component_progress(mca_ptl_tstamp_t tstamp) -{ - int num_progressed = 0; - size_t i; - for(i=0; imx_recvs_posted == 0) { - OPAL_THREAD_ADD32(&ptl->mx_recvs_posted,1); - MCA_PTL_MX_POST(ptl,MCA_PTL_HDR_TYPE_MATCH,sizeof(mca_ptl_base_match_header_t)); - } - - mx_return = mx_ipeek( - ptl->mx_endpoint, - &mx_request, - &mx_result); - if(mx_return != MX_SUCCESS) { - opal_output(0, "mca_ptl_mx_component_progress: mx_ipeek() failed with status %d\n", - mx_return); - return OMPI_ERROR; - } - if(mx_result == 0) { - continue; - } - - mx_return = mx_test( - ptl->mx_endpoint, - &mx_request, - &mx_status, - &mx_result); - if(mx_return == MX_SUCCESS) { - MCA_PTL_MX_PROGRESS(ptl, mx_status); - } else { - opal_output(0, "mca_ptl_mx_progress: mx_test() failed with status=%dn", - mx_return); - } - num_progressed++; -#else - /* pre-post receive */ - if(ptl->mx_recvs_posted == 0) { - OPAL_THREAD_ADD32(&ptl->mx_recvs_posted,1); - MCA_PTL_MX_POST(ptl,MCA_PTL_HDR_TYPE_MATCH,sizeof(mca_ptl_base_match_header_t)); - } - - /* poll for completion */ - mx_return = mx_icompleted( - ptl->mx_endpoint, - &mx_status, - &mx_result); - if(mx_return != MX_SUCCESS) { - opal_output(0, "mca_ptl_mx_component_progress: mx_ipeek() failed with status %d\n", - mx_return); - return OMPI_ERROR; - } - if(mx_result > 0) { - MCA_PTL_MX_PROGRESS(ptl, mx_status); - } - num_progressed++; -#endif - } - return num_progressed; -} - diff --git a/ompi/mca/ptl/mx/ptl_mx_module.c b/ompi/mca/ptl/mx/ptl_mx_module.c deleted file mode 100644 index e8b7115788..0000000000 --- a/ompi/mca/ptl/mx/ptl_mx_module.c +++ /dev/null @@ -1,459 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ptl_mx.h" -#include "ompi/constants.h" -#include "ompi/mca/pml/base/pml_base_module_exchange.h" -#include "ompi/communicator/communicator.h" -#include "opal/util/output.h" -#include "ptl_mx_peer.h" -#include "ptl_mx_proc.h" -#include "ptl_mx_module.h" - -static mca_ptl_mx_module_t* mca_ptl_mx_create(uint64_t addr); - -static void* mca_ptl_mx_mem_alloc( size_t* size, void* userdata ) -{ - return malloc(*size); -} - - -/** - * Initialize MX PTL modules - */ - -int mca_ptl_mx_module_init(void) -{ - size_t size; - uint32_t i; - int rc; - uint64_t *nic_addrs; - mca_ptl_mx_endpoint_t *endpoint_addrs; - mx_return_t status; - - /* intialize MX library */ - if(MX_SUCCESS != (status = mx_init())) { - opal_output(0, "mca_ptl_mx_init: mx_init() failed with status=%d\n", status); - return OMPI_ERROR; - } - - /* Do not abort on errors */ - mx_set_error_handler(MX_ERRORS_RETURN); - - /* determine the number of NICs */ - if((status = mx_get_info( NULL, MX_NIC_COUNT, NULL, 0, - &mca_ptl_mx_component.mx_num_ptls, sizeof(uint32_t))) != MX_SUCCESS) { - opal_output(0, "mca_ptl_mx_init: mx_get_info(MX_NIC_COUNT) failed with status=%d\n", status); - return OMPI_ERROR; - } - - /* determine the NIC ids */ - size = sizeof(uint64_t) * (mca_ptl_mx_component.mx_num_ptls+1); - if(NULL == (nic_addrs = (uint64_t*)malloc(size))) - return OMPI_ERR_OUT_OF_RESOURCE; - if( (status = mx_get_info( NULL, MX_NIC_IDS, NULL, 0, - nic_addrs, size)) != MX_SUCCESS) { - free(nic_addrs); - return OMPI_ERROR; - } - - /* check for limit on number of ptls */ - if(mca_ptl_mx_component.mx_num_ptls > mca_ptl_mx_component.mx_max_ptls) - mca_ptl_mx_component.mx_num_ptls = mca_ptl_mx_component.mx_max_ptls; - - /* allocate an array of pointers to ptls */ - mca_ptl_mx_component.mx_ptls = (mca_ptl_mx_module_t**)malloc( - sizeof(mca_ptl_mx_module_t*) * mca_ptl_mx_component.mx_num_ptls); - if(NULL == mca_ptl_mx_component.mx_ptls) { - opal_output(0, "mca_ptl_mx_init: malloc() failed\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* create a ptl for each NIC */ - for( i = 0; i < mca_ptl_mx_component.mx_num_ptls; i++ ) { - mca_ptl_mx_module_t* ptl = mca_ptl_mx_create(nic_addrs[i]); - if(NULL == ptl) { - return OMPI_ERROR; - } - mca_ptl_mx_component.mx_ptls[i] = ptl; - } - free(nic_addrs); - - /* post local endpoint addresses */ - size = mca_ptl_mx_component.mx_num_ptls * sizeof(mca_ptl_mx_endpoint_t); - endpoint_addrs = (mca_ptl_mx_endpoint_t*)malloc(size); - if(NULL == endpoint_addrs) { - opal_output(0, "mca_ptl_mx_module_init: malloc() failed\n"); - return OMPI_ERR_OUT_OF_RESOURCE; - } - for( i = 0; i < mca_ptl_mx_component.mx_num_ptls; i++ ) { - mca_ptl_mx_module_t* ptl = mca_ptl_mx_component.mx_ptls[i]; - mx_decompose_endpoint_addr( ptl->mx_endpoint_addr, - &(endpoint_addrs[i].nic_id), &(endpoint_addrs[i].endpoint_id) ); - } - if((rc = mca_pml_base_modex_send( &mca_ptl_mx_component.super.ptlm_version, - endpoint_addrs, size )) != OMPI_SUCCESS ) - return rc; - return OMPI_SUCCESS; -} - - -/** - * Thread to progress outstanding requests. - */ - -#if OMPI_ENABLE_PROGRESS_THREADS - -static void* mca_ptl_mx_thread(opal_object_t *arg) -{ - opal_thread_t* thr = (opal_thread_t*)arg; - mca_ptl_mx_module_t* ptl = thr->t_arg; - while(ptl->mx_enabled) { - mx_request_t mx_request; - mx_return_t mx_return; - mx_status_t mx_status; - uint32_t mx_result; - - /* block waiting for status */ - mx_return = mx_peek( - ptl->mx_endpoint, - UINT_MAX, - &mx_request, - &mx_result); - if(mx_return == MX_TIMEOUT) - continue; - else if(ptl->mx_enabled == false) - break; - else if(mx_return != MX_SUCCESS) { - opal_output(0, "mca_ptl_mx_thread: mx_probe() failed with status %d\n", - mx_return); - break; - } - - /* dispatch completed requests */ - mx_return = mx_test( - ptl->mx_endpoint, - &mx_request, - &mx_status, - &mx_result); - if(mx_return == MX_SUCCESS) { - MCA_PTL_MX_PROGRESS(ptl, mx_status); - } else { - opal_output(0, "mca_ptl_mx_progress: mx_test() failed with status=%dn", - mx_return); - } - - /* pre-post receive */ - if(ptl->mx_recvs_posted == 0) { - OPAL_THREAD_ADD32(&ptl->mx_recvs_posted,1); - MCA_PTL_MX_POST(ptl,MCA_PTL_HDR_TYPE_MATCH,sizeof(mca_ptl_base_match_header_t)); - } - } - return NULL; -} - -#endif - -/* - * Callback on a match. - * - */ - -static void mca_ptl_mx_match(void* context, uint64_t match_value, int size) -{ - mca_ptl_mx_module_t* ptl = (mca_ptl_mx_module_t*)context; - mca_ptl_base_recv_request_t* request; - mca_ptl_mx_recv_frag_t *frag; - mx_return_t mx_return; - ompi_ptr_t match; - size_t offset; - ompi_proc_t* proc; - ompi_convertor_t* convertor; - int rc; - - /* use of the header type as the match value */ - if(match_value <= MCA_PTL_HDR_TYPE_MAX) - return; - - /* otherwise extract request pointer and offset */ - match.lval = match_value; - request = (mca_ptl_base_recv_request_t*)match.pval; - offset = match.sval.lval; - proc = ompi_comm_peer_lookup(request->req_recv.req_base.req_comm, - request->req_recv.req_base.req_ompi.req_status.MPI_SOURCE); - - /* allocate a fragment for receive */ - MCA_PTL_MX_RECV_FRAG_ALLOC(frag, rc); - if(rc != OMPI_SUCCESS) { - opal_output(0, "mca_ptl_mx_match: unable to allocate resources.\n"); - return; - } - - frag->frag_size = size; - frag->frag_recv.frag_request = request; - frag->frag_recv.frag_base.frag_peer = NULL; - frag->frag_recv.frag_base.frag_owner = &ptl->super; - frag->frag_recv.frag_base.frag_size = frag->frag_size; - frag->frag_recv.frag_base.frag_header.hdr_common.hdr_type = - MCA_PTL_HDR_TYPE_FRAG; - frag->frag_recv.frag_base.frag_header.hdr_common.hdr_flags = 0; - convertor = &frag->frag_recv.frag_base.frag_convertor; - - ompi_convertor_clone( &(request->req_recv.req_convertor), - convertor, 1 ); - ompi_convertor_personalize( convertor, 0, &offset, mca_ptl_mx_mem_alloc, NULL ); - - /* non-contiguous - allocate buffer for receive */ - if( 1 == ompi_convertor_need_buffers( convertor ) || - request->req_recv.req_bytes_packed < offset + frag->frag_size ) { - - /* TODO - use a fixed fragment size for non-contigous and convert - * this to a free-list of buffers. - */ - frag->frag_recv.frag_is_buffered = true; - frag->frag_recv.frag_base.frag_addr = malloc(frag->frag_size); - if( NULL == frag->frag_recv.frag_base.frag_addr ) { - opal_output(0, "mca_ptl_mx_match: unable to allocate buffer (%d)\n", frag->frag_size); - MCA_PTL_MX_RECV_FRAG_RETURN(frag); - return; - } - - /* check for sending more than receiving */ - if( offset > request->req_recv.req_bytes_packed ) { - frag->frag_recv.frag_base.frag_size = 0; - } else if (offset + frag->frag_size > request->req_recv.req_bytes_packed ) { - frag->frag_recv.frag_base.frag_size = request->req_recv.req_bytes_packed - offset; - } - /* calculate offset into users buffer */ - } else { - frag->frag_recv.frag_base.frag_addr = ((unsigned char*)request->req_recv.req_base.req_addr) + offset; - frag->frag_recv.frag_is_buffered = false; - } - - /* dont receive a header */ - frag->frag_segment_count = 1; - frag->frag_segments[1].segment_ptr = frag->frag_recv.frag_base.frag_addr; - frag->frag_segments[1].segment_length = frag->frag_size; - - mx_return = mx_irecv( - ptl->mx_endpoint, - frag->frag_segments+1, - frag->frag_segment_count, - match_value, - MX_MATCH_MASK_NONE, - frag, - &frag->frag_request); - if(mx_return != MX_SUCCESS) { - opal_output(0, "mca_ptl_mx_match: mx_irecv() failed with status=%dn", mx_return); - MCA_PTL_MX_RECV_FRAG_RETURN(frag); - } -} - - -/* - * Create and intialize an MX PTL module, where each module - * represents a specific NIC. - */ - -static mca_ptl_mx_module_t* mca_ptl_mx_create(uint64_t addr) -{ - mca_ptl_mx_module_t* ptl = malloc(sizeof(mca_ptl_mx_module_t)); - mx_return_t status; - uint32_t nic_id; - - if(NULL == ptl) return NULL; - - status = mx_nic_id_to_board_number( addr, &nic_id ); - if( MX_SUCCESS != status ) { - return NULL; - } - - /* copy over default settings */ - memcpy(ptl, &mca_ptl_mx_module, sizeof(mca_ptl_mx_module_t)); - OBJ_CONSTRUCT(&ptl->mx_peers, opal_list_t); - - /* open local endpoint */ - status = mx_open_endpoint( nic_id, MX_ANY_ENDPOINT, - mca_ptl_mx_component.mx_filter, - NULL, 0, &ptl->mx_endpoint); - if(status != MX_SUCCESS) { - opal_output(0, "mca_ptl_mx_init: mx_open_endpoint() failed with status=%d\n", status); - mca_ptl_mx_finalize(&ptl->super); - return NULL; - } - - /* query the endpoint address */ - if((status = mx_get_endpoint_addr( ptl->mx_endpoint, - &ptl->mx_endpoint_addr)) != MX_SUCCESS) { - opal_output(0, "mca_ptl_mx_init: mx_get_endpoint_addr() failed with status=%d\n", status); - mca_ptl_mx_finalize(&ptl->super); - return NULL; - } - - /* prepost a receive buffer */ - ptl->mx_recvs_posted = 1; - MCA_PTL_MX_POST(ptl, MCA_PTL_HDR_TYPE_MATCH, sizeof(mca_ptl_base_match_header_t)); - MCA_PTL_MX_POST(ptl, MCA_PTL_HDR_TYPE_RNDV, sizeof(mca_ptl_base_rendezvous_header_t)); - MCA_PTL_MX_POST(ptl, MCA_PTL_HDR_TYPE_ACK, sizeof(mca_ptl_base_ack_header_t)); - - /* register a callback function for matching */ - mx_register_match_callback(ptl->mx_endpoint, mca_ptl_mx_match, ptl); - return ptl; -} - -void mca_ptl_mx_enable() -{ - size_t i; - for(i=0; imx_enabled = true; -#if OMPI_ENABLE_PROGRESS_THREADS - /* create a thread to progress requests */ - OBJ_CONSTRUCT(&ptl->mx_thread, opal_thread_t); - ptl->mx_thread.t_run = mca_ptl_mx_thread; - ptl->mx_thread.t_arg = ptl; - if(opal_thread_start(&ptl->mx_thread) != OMPI_SUCCESS) { - opal_output(0, "mca_ptl_mx_create: unable to start progress thread.\n"); - return; - } -#endif - } -} - -void mca_ptl_mx_disable(void) -{ - size_t i; - for(i=0; imx_enabled = false; -#if OMPI_ENABLE_PROGRESS_THREADS - mx_wakeup(ptl->mx_endpoint); - opal_thread_join(&ptl->mx_thread, NULL); -#endif - } -} - -/* - * Cleanup PTL resources. - */ - -int mca_ptl_mx_finalize(struct mca_ptl_base_module_t* ptl) -{ - mca_ptl_mx_module_t* mx_ptl = (mca_ptl_mx_module_t*)ptl; - mx_ptl->mx_enabled = false; -#if OMPI_ENABLE_PROGRESS_THREADS - mx_wakeup(mx_ptl->mx_endpoint); - opal_thread_join(&mx_ptl->mx_thread, NULL); -#endif - mx_close_endpoint(mx_ptl->mx_endpoint); - mx_ptl->mx_endpoint = NULL; - free(mx_ptl); - return OMPI_SUCCESS; -} - -/** - * PML->PTL notification of addition to the process list. - * - * @param ptl (IN) - * @param nprocs (IN) Number of processes - * @param procs (IN) Set of processes - * @param peers (OUT) Set of (optional) peer addressing info. - * @param peers (IN/OUT) Set of processes that are reachable via this PTL. - * @return OMPI_SUCCESS or error status on failure. - * - */ - -int mca_ptl_mx_add_procs( - struct mca_ptl_base_module_t* ptl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_ptl_base_peer_t** peers, - ompi_bitmap_t* reachable) -{ - ompi_proc_t* proc_self = ompi_proc_local(); - mca_ptl_mx_module_t* ptl_mx = (mca_ptl_mx_module_t*)ptl; - size_t n; - for( n = 0; n < nprocs; n++ ) { - int rc; - mca_ptl_mx_proc_t *ptl_proc; - mca_ptl_mx_peer_t* ptl_peer; - - /* Dont let mx register for self */ - if( proc_self == procs[n] ) continue; - - if((ptl_proc = mca_ptl_mx_proc_create(procs[n])) == NULL) - return OMPI_ERR_OUT_OF_RESOURCE; - - /* peer doesn't export enough addresses */ - OPAL_THREAD_LOCK(&ptl_proc->proc_lock); - if(ptl_proc->proc_peer_count == ptl_proc->proc_addr_count) { - OPAL_THREAD_UNLOCK(&ptl_proc->proc_lock); - continue; - } - - /* The ptl_proc datastructure is shared by all MX PTL instances that are trying - * to reach this destination. Cache the peer instance on the proc. - */ - ptl_peer = OBJ_NEW(mca_ptl_mx_peer_t); - if(NULL == ptl_peer) { - OPAL_THREAD_UNLOCK(&ptl_proc->proc_lock); - return OMPI_ERR_OUT_OF_RESOURCE; - } - ptl_peer->peer_ptl = ptl_mx; - rc = mca_ptl_mx_proc_insert(ptl_proc, ptl_peer); - if(rc != OMPI_SUCCESS) { - OBJ_RELEASE(ptl_peer); - OPAL_THREAD_UNLOCK(&ptl_proc->proc_lock); - return rc; - } - /* do we need to convert to/from network byte order */ - if(procs[n]->proc_arch != proc_self->proc_arch) { - ptl_peer->peer_nbo = true; - } - - /* set peer as reachable */ - ompi_bitmap_set_bit(reachable, n); - OPAL_THREAD_UNLOCK(&ptl_proc->proc_lock); - peers[n] = ptl_peer; - opal_list_append(&ptl_mx->mx_peers, (opal_list_item_t*)ptl_peer); - } - return OMPI_SUCCESS; -} - - -/** - * PML->PTL notification of change in the process list. - * - * @param ptl (IN) PTL instance - * @param nproc (IN) Number of processes. - * @param procs (IN) Set of processes. - * @param peers (IN) Set of peer data structures. - * @return Status indicating if cleanup was successful - * - */ - -int mca_ptl_mx_del_procs( - struct mca_ptl_base_module_t* ptl, - size_t nprocs, - struct ompi_proc_t **proc, - struct mca_ptl_base_peer_t** ptl_peer) -{ - return OMPI_SUCCESS; -} - - diff --git a/ompi/mca/ptl/mx/ptl_mx_module.h b/ompi/mca/ptl/mx/ptl_mx_module.h deleted file mode 100644 index 7368f77eb2..0000000000 --- a/ompi/mca/ptl/mx/ptl_mx_module.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_MX_MODULE_H -#define MCA_PTL_MX_MODULE_H - -#include "ompi_config.h" -#include "ptl_mx.h" -#include "ptl_mx_recvfrag.h" -#include "ptl_mx_sendfrag.h" - - -/** - * Post a receive for short messages (<32 K) - */ - -#define MCA_PTL_MX_POST(ptl, match, header_size) \ -do { \ - mca_ptl_mx_recv_frag_t *frag; \ - mx_return_t mx_return; \ - int rc; \ - \ - MCA_PTL_MX_RECV_FRAG_ALLOC(frag, rc); \ - if(rc != OMPI_SUCCESS) { \ - opal_output(0, "mca_ptl_mx_match: unable to allocate resources.\n"); \ - break; \ - } \ - frag->frag_size = 0; \ - frag->frag_recv.frag_base.frag_owner = &ptl->super; \ - frag->frag_recv.frag_base.frag_peer = NULL; \ - frag->frag_recv.frag_base.frag_size = 0; \ - frag->frag_recv.frag_base.frag_addr = frag->frag_data; \ - frag->frag_recv.frag_is_buffered = true; \ - frag->frag_recv.frag_request = NULL; \ - frag->frag_segments[0].segment_length = header_size; \ - frag->frag_segments[1].segment_ptr = frag->frag_data; \ - frag->frag_segments[1].segment_length = sizeof(frag->frag_data); \ - frag->frag_segment_count = 2; \ - mx_return = mx_irecv( \ - ptl->mx_endpoint, \ - frag->frag_segments, \ - frag->frag_segment_count, \ - match, \ - MX_MATCH_MASK_NONE, \ - frag, \ - &frag->frag_request); \ - if(mx_return != MX_SUCCESS) { \ - opal_output(0, "mca_ptl_mx_match: mx_irecv() failed with status=%dn", mx_return); \ - MCA_PTL_MX_RECV_FRAG_RETURN(frag); \ - } \ -} while(0) - - -/** - * Routine to process complete request(s). - */ - -#define MCA_PTL_MX_PROGRESS(ptl, mx_status) \ -do { \ - mca_ptl_base_frag_t* frag; \ - frag = (mca_ptl_base_frag_t*)mx_status.context; \ - switch(frag->frag_type) { \ - case MCA_PTL_FRAGMENT_SEND: \ - { \ - mca_ptl_mx_send_frag_t* sendfrag = (mca_ptl_mx_send_frag_t*)frag; \ - MCA_PTL_MX_SEND_FRAG_PROGRESS(sendfrag); \ - break; \ - } \ - case MCA_PTL_FRAGMENT_RECV: \ - { \ - mca_ptl_mx_recv_frag_t* recvfrag = (mca_ptl_mx_recv_frag_t*)frag; \ - mca_ptl_base_header_t* hdr = \ - &recvfrag->frag_recv.frag_base.frag_header; \ - switch(hdr->hdr_common.hdr_type) { \ - case MCA_PTL_HDR_TYPE_MATCH: \ - { \ - recvfrag->frag_size = hdr->hdr_match.hdr_msg_length; \ - MCA_PTL_MX_RECV_FRAG_MATCH(recvfrag,hdr); \ - OPAL_THREAD_ADD32(&ptl->mx_recvs_posted, -1); \ - break; \ - } \ - case MCA_PTL_HDR_TYPE_RNDV: \ - { \ - recvfrag->frag_size = hdr->hdr_rndv.hdr_frag_length; \ - MCA_PTL_MX_RECV_FRAG_RNDV(recvfrag,hdr); \ - MCA_PTL_MX_POST(ptl, MCA_PTL_HDR_TYPE_RNDV, \ - sizeof(mca_ptl_base_rendezvous_header_t)); \ - break; \ - } \ - case MCA_PTL_HDR_TYPE_FRAG: \ - { \ - MCA_PTL_MX_RECV_FRAG_FRAG(recvfrag); \ - break; \ - } \ - case MCA_PTL_HDR_TYPE_ACK: \ - { \ - mca_ptl_mx_send_frag_t* sendfrag; \ - mca_ptl_base_send_request_t* sendreq; \ - sendfrag = (mca_ptl_mx_send_frag_t*) \ - hdr->hdr_ack.hdr_src_ptr.pval; \ - sendreq = sendfrag->frag_send.frag_request; \ - sendreq->req_peer_match = hdr->hdr_ack.hdr_dst_match; \ - MCA_PTL_MX_SEND_FRAG_PROGRESS(sendfrag); \ - MCA_PTL_MX_RECV_FRAG_RETURN(recvfrag); \ - MCA_PTL_MX_POST(ptl, MCA_PTL_HDR_TYPE_ACK, \ - sizeof(mca_ptl_base_ack_header_t)); \ - break; \ - } \ - } \ - break; \ - } \ - default: \ - { \ - opal_output(0, "mca_ptl_mx_progress: invalid request type: %d", \ - frag->frag_type); \ - break; \ - } \ - } \ -} while(0) - -void mca_ptl_mx_enable(void); -void mca_ptl_mx_disable(void); - -#endif - diff --git a/ompi/mca/ptl/mx/ptl_mx_peer.c b/ompi/mca/ptl/mx/ptl_mx_peer.c deleted file mode 100644 index af2fd92194..0000000000 --- a/ompi/mca/ptl/mx/ptl_mx_peer.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#include "ptl_mx.h" -#include "ptl_mx_peer.h" - -static void mca_ptl_mx_peer_construct(mca_ptl_base_peer_t* ptl_peer); -static void mca_ptl_mx_peer_destruct(mca_ptl_base_peer_t* ptl_peer); - -OBJ_CLASS_INSTANCE( - mca_ptl_mx_peer_t, - opal_list_item_t, - mca_ptl_mx_peer_construct, - mca_ptl_mx_peer_destruct); - - -/* - * Initialize state of the peer instance. - */ - -static void mca_ptl_mx_peer_construct(mca_ptl_base_peer_t* ptl_peer) -{ - ptl_peer->peer_ptl = NULL; - ptl_peer->peer_nbo = false; -} - -/* - * Cleanup any resources held by the peer. - */ - -static void mca_ptl_mx_peer_destruct(mca_ptl_base_peer_t* ptl_peer) -{ -} - diff --git a/ompi/mca/ptl/mx/ptl_mx_peer.h b/ompi/mca/ptl/mx/ptl_mx_peer.h deleted file mode 100644 index 57a370dc26..0000000000 --- a/ompi/mca/ptl/mx/ptl_mx_peer.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_MX_PEER_H -#define MCA_PTL_MX_PEER_H - -#include "ompi_config.h" -#include - -/** - * An abstraction that represents a a peer process. -*/ -struct mca_ptl_base_peer_t { - opal_list_item_t peer_item; - mx_endpoint_addr_t peer_addr; - struct mca_ptl_mx_module_t* peer_ptl; - struct mca_ptl_mx_proc_t* peer_proc; - bool peer_nbo; -}; -typedef struct mca_ptl_base_peer_t mca_ptl_base_peer_t; -typedef struct mca_ptl_base_peer_t mca_ptl_mx_peer_t; - -OBJ_CLASS_DECLARATION(mca_ptl_mx_peer_t); - - -#endif - diff --git a/ompi/mca/ptl/mx/ptl_mx_proc.c b/ompi/mca/ptl/mx/ptl_mx_proc.c deleted file mode 100644 index a356217d84..0000000000 --- a/ompi/mca/ptl/mx/ptl_mx_proc.c +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include - -#include "opal/sys/atomic.h" -#include "orte/class/orte_proc_table.h" -#include "ompi/mca/pml/base/pml_base_module_exchange.h" -#include "opal/util/output.h" -#include "ptl_mx.h" -#include "ptl_mx_peer.h" -#include "ptl_mx_proc.h" - - -static void mca_ptl_mx_proc_construct(mca_ptl_mx_proc_t* proc); -static void mca_ptl_mx_proc_destruct(mca_ptl_mx_proc_t* proc); - -OBJ_CLASS_INSTANCE( - mca_ptl_mx_proc_t, - opal_list_item_t, - mca_ptl_mx_proc_construct, - mca_ptl_mx_proc_destruct -); - - -/** - * Initialize mx proc instance - */ - -void mca_ptl_mx_proc_construct(mca_ptl_mx_proc_t* proc) -{ - proc->proc_ompi = NULL; - proc->proc_addrs = NULL; - proc->proc_addr_count = 0; - proc->proc_peers = NULL; - proc->proc_peer_count = 0; - OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t); -} - - -/* - * Cleanup mx proc instance - */ - -void mca_ptl_mx_proc_destruct(mca_ptl_mx_proc_t* proc) -{ - /* remove from list of all proc instances */ - OPAL_THREAD_LOCK(&mca_ptl_mx_component.mx_lock); - orte_hash_table_remove_proc(&mca_ptl_mx_component.mx_procs, &proc->proc_name); - OPAL_THREAD_UNLOCK(&mca_ptl_mx_component.mx_lock); - - /* release resources */ - if(NULL != proc->proc_peers) - free(proc->proc_peers); -} - - -/* - * Create a MX process structure. There is a one-to-one correspondence - * between a ompi_proc_t and a mca_ptl_mx_proc_t instance. We cache additional - * data (specifically the list of mca_ptl_mx_peer_t instances, and published - * addresses) associated w/ a given destination on this datastructure. - */ - -mca_ptl_mx_proc_t* mca_ptl_mx_proc_create(ompi_proc_t* ompi_proc) -{ - int rc; - size_t size; - mca_ptl_mx_proc_t* ptl_proc; - - OPAL_THREAD_LOCK(&mca_ptl_mx_component.mx_lock); - ptl_proc = (mca_ptl_mx_proc_t*)orte_hash_table_get_proc( - &mca_ptl_mx_component.mx_procs, &ompi_proc->proc_name); - if(NULL != ptl_proc) { - OPAL_THREAD_UNLOCK(&mca_ptl_mx_component.mx_lock); - return ptl_proc; - } - - ptl_proc = OBJ_NEW(mca_ptl_mx_proc_t); - if(NULL == ptl_proc) - return NULL; - ptl_proc->proc_ompi = ompi_proc; - ptl_proc->proc_name = ompi_proc->proc_name; - - /* add to hash table of all proc instance */ - orte_hash_table_set_proc( - &mca_ptl_mx_component.mx_procs, - &ptl_proc->proc_name, - ptl_proc); - OPAL_THREAD_UNLOCK(&mca_ptl_mx_component.mx_lock); - - /* lookup mx parameters exported by this proc */ - rc = mca_pml_base_modex_recv( - &mca_ptl_mx_component.super.ptlm_version, - ompi_proc, - (void**)&ptl_proc->proc_addrs, - &size); - if(rc != OMPI_SUCCESS) { - opal_output(0, "mca_ptl_mx_proc_create: mca_pml_base_modex_recv: failed with return value=%d", rc); - OBJ_RELEASE(ptl_proc); - return NULL; - } - if(0 != (size % sizeof(mca_ptl_mx_endpoint_t))) { - opal_output(0, "mca_ptl_mx_proc_create: mca_pml_base_modex_recv: invalid size %d\n", size); - return NULL; - } - ptl_proc->proc_addr_count = size / sizeof(mca_ptl_mx_endpoint_t); - - /* allocate space for peer array - one for each exported address */ - ptl_proc->proc_peers = (mca_ptl_mx_peer_t**) - malloc(ptl_proc->proc_addr_count * sizeof(mca_ptl_base_peer_t*)); - if(NULL == ptl_proc->proc_peers) { - OBJ_RELEASE(ptl_proc); - return NULL; - } - return ptl_proc; -} - - -/* - * Look for an existing MX process instance based on the globally unique - * process identifier. - */ -mca_ptl_mx_proc_t* mca_ptl_mx_proc_lookup(const orte_process_name_t *name) -{ - mca_ptl_mx_proc_t* proc; - OPAL_THREAD_LOCK(&mca_ptl_mx_component.mx_lock); - proc = (mca_ptl_mx_proc_t*)orte_hash_table_get_proc( - &mca_ptl_mx_component.mx_procs, name); - OPAL_THREAD_UNLOCK(&mca_ptl_mx_component.mx_lock); - return proc; -} - - -/* - * Note that this routine must be called with the lock on the process already - * held. Insert a ptl instance into the proc array and assign it an address. - */ -int mca_ptl_mx_proc_insert(mca_ptl_mx_proc_t* ptl_proc, mca_ptl_base_peer_t* ptl_peer) -{ - mx_return_t mx_status; - mca_ptl_mx_endpoint_t* remote = &(ptl_proc->proc_addrs[ptl_proc->proc_peer_count]); - int num_retry = 0; - /* insert into peer array */ - ptl_peer->peer_proc = ptl_proc; - ptl_proc->proc_peers[ptl_proc->proc_peer_count] = ptl_peer; - - /* construct the remote endpoint addr */ - retry_connect: - mx_status = mx_connect( ptl_peer->peer_ptl->mx_endpoint, remote->nic_id, remote->endpoint_id, - mca_ptl_mx_component.mx_filter, 1, &(ptl_peer->peer_addr) ); - if( MX_SUCCESS != mx_status ) { - if( MX_TIMEOUT == mx_status ) - if( num_retry++ < 5 ) - goto retry_connect; - opal_output( 0, "mx_connect fail for peer %d remote %lx %d filter %x with error %s\n", - ptl_proc->proc_peer_count, - remote->nic_id, remote->endpoint_id, mca_ptl_mx_component.mx_filter, - mx_strerror(mx_status) ); - return OMPI_ERROR; - } - ptl_proc->proc_peer_count++; - return OMPI_SUCCESS; -} - -/* - * Remove a peer from the proc array and indicate the address is - * no longer in use. - */ - -int mca_ptl_mx_proc_remove(mca_ptl_mx_proc_t* ptl_proc, mca_ptl_base_peer_t* ptl_peer) -{ - size_t i; - OPAL_THREAD_LOCK(&ptl_proc->proc_lock); - for(i=0; iproc_peer_count; i++) { - if(ptl_proc->proc_peers[i] == ptl_peer) { - memmove(ptl_proc->proc_peers+i, ptl_proc->proc_peers+i+1, - (ptl_proc->proc_peer_count-i-1)*sizeof(mca_ptl_mx_peer_t*)); - ptl_proc->proc_peer_count--; - break; - } - } - OPAL_THREAD_UNLOCK(&ptl_proc->proc_lock); - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/ptl/mx/ptl_mx_proc.h b/ompi/mca/ptl/mx/ptl_mx_proc.h deleted file mode 100644 index 4e012e426b..0000000000 --- a/ompi/mca/ptl/mx/ptl_mx_proc.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_MX_PROC_H -#define MCA_PTL_MX_PROC_H - -#include "ompi_config.h" -#include -#include "orte/mca/ns/ns.h" -#include "opal/class/opal_object.h" -#include "ompi/proc/proc.h" -#include "ptl_mx.h" -#include "ptl_mx_peer.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -typedef struct mca_ptl_mx_endpoint_t { - uint64_t nic_id; - uint32_t endpoint_id; -} mca_ptl_mx_endpoint_t; - -/** - * Represents the state of a remote process and the set of addresses - * that it exports. Also cache an instance of mca_ptl_base_peer_t for each - * PTL instance that attempts to open a connection to the process. - */ -struct mca_ptl_mx_proc_t { - opal_list_item_t super; /**< allow proc to be placed on a list */ - ompi_proc_t *proc_ompi; /**< pointer to corresponding ompi_proc_t */ - orte_process_name_t proc_name; /**< globally unique identifier for the process */ - mca_ptl_mx_endpoint_t *proc_addrs; /**< peer endpoint address */ - size_t proc_addr_count; /**< number of addresses published by peer */ - mca_ptl_mx_peer_t **proc_peers; /**< array of peers that have been created to access this proc */ - size_t proc_peer_count; /**< number of peers */ - opal_mutex_t proc_lock; /**< lock to protect against concurrent access to proc state */ -}; -typedef struct mca_ptl_mx_proc_t mca_ptl_mx_proc_t; - - -OBJ_CLASS_DECLARATION(mca_ptl_mx_proc_t); - - -mca_ptl_mx_proc_t* mca_ptl_mx_proc_create(ompi_proc_t* ompi_proc); -mca_ptl_mx_proc_t* mca_ptl_mx_proc_lookup(const orte_process_name_t*); - -int mca_ptl_mx_proc_insert(mca_ptl_mx_proc_t* ptl_proc, mca_ptl_base_peer_t* ptl_peer); -int mca_ptl_mx_proc_remove(mca_ptl_mx_proc_t* ptl_proc, mca_ptl_base_peer_t* ptl_peer); - - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif - diff --git a/ompi/mca/ptl/mx/ptl_mx_recvfrag.c b/ompi/mca/ptl/mx/ptl_mx_recvfrag.c deleted file mode 100644 index 66e79559e5..0000000000 --- a/ompi/mca/ptl/mx/ptl_mx_recvfrag.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#include "ptl_mx.h" -#include "ptl_mx_recvfrag.h" - - -static void mca_ptl_mx_recv_frag_construct(mca_ptl_mx_recv_frag_t* frag); -static void mca_ptl_mx_recv_frag_destruct(mca_ptl_mx_recv_frag_t* frag); - - -OBJ_CLASS_INSTANCE( - mca_ptl_mx_recv_frag_t, - mca_ptl_base_recv_frag_t, - mca_ptl_mx_recv_frag_construct, - mca_ptl_mx_recv_frag_destruct); - -/* - * MX recv fragment constructor - */ - -static void mca_ptl_mx_recv_frag_construct(mca_ptl_mx_recv_frag_t* frag) -{ - /* one time initialization */ - frag->frag_segments[0].segment_ptr = &frag->frag_recv.frag_base.frag_header; - frag->frag_segments[0].segment_length = sizeof(frag->frag_recv.frag_base.frag_header); -} - - -/* - * MX recv fragment destructor - */ - -static void mca_ptl_mx_recv_frag_destruct(mca_ptl_mx_recv_frag_t* frag) -{ -} - diff --git a/ompi/mca/ptl/mx/ptl_mx_recvfrag.h b/ompi/mca/ptl/mx/ptl_mx_recvfrag.h deleted file mode 100644 index f501ed3dc7..0000000000 --- a/ompi/mca/ptl/mx/ptl_mx_recvfrag.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PTL_MX_RECV_FRAG_H -#define MCA_PTL_MX_RECV_FRAG_H - -#include "ptl_mx.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" -#include "ptl_mx_sendfrag.h" - -/** - * MX received fragment derived type. - */ -struct mca_ptl_mx_recv_frag_t { - mca_ptl_base_recv_frag_t frag_recv; /**< base receive fragment descriptor */ - mx_request_t frag_request; - mx_segment_t frag_segments[2]; - uint32_t frag_segment_count; - unsigned char frag_data[32*1024]; - size_t frag_size; -}; -typedef struct mca_ptl_mx_recv_frag_t mca_ptl_mx_recv_frag_t; - -OBJ_CLASS_DECLARATION(mca_ptl_mx_recv_frag_t); - - -#define MCA_PTL_MX_RECV_FRAG_ALLOC(frag, rc) \ - { \ - opal_list_item_t* item; \ - OMPI_FREE_LIST_GET(&mca_ptl_mx_component.mx_recv_frags, item, rc); \ - frag = (mca_ptl_mx_recv_frag_t*)item; \ - } - -#define MCA_PTL_MX_RECV_FRAG_RETURN(frag) \ -{ \ - if(frag->frag_recv.frag_is_buffered && \ - frag->frag_data != frag->frag_recv.frag_base.frag_addr) { \ - free(frag->frag_recv.frag_base.frag_addr); \ - } \ - OMPI_FREE_LIST_RETURN(&mca_ptl_mx_component.mx_recv_frags, (opal_list_item_t*)frag); \ -} - - -/** - * Callback on receipt of a match fragment. - */ - -#define MCA_PTL_MX_RECV_FRAG_MATCH(frag, hdr) \ -do { \ - if(hdr->hdr_common.hdr_flags & MCA_PTL_FLAGS_NBO) { \ - MCA_PTL_BASE_MATCH_HDR_NTOH(hdr->hdr_match); \ - } \ - ptl->super.ptl_match(&ptl->super, &frag->frag_recv, &hdr->hdr_match); \ -} while(0) - - -/** - * Callback on receipt of a rendezvous fragment. - */ - -#define MCA_PTL_MX_RECV_FRAG_RNDV(frag, hdr) \ -do { \ - if(hdr->hdr_common.hdr_flags & MCA_PTL_FLAGS_NBO) { \ - MCA_PTL_BASE_RNDV_HDR_NTOH(hdr->hdr_rndv); \ - } \ - ptl->super.ptl_match(&ptl->super, &frag->frag_recv, &hdr->hdr_match); \ -} while(0) - - -/** - * Process a fragment that completed. - */ - -#define MCA_PTL_MX_RECV_FRAG_FRAG(frag) \ -do { \ - /* copy into user space */ \ - if(frag->frag_recv.frag_is_buffered) { \ - struct iovec iov; \ - uint32_t iov_count; \ - size_t max_data; \ - int32_t free_after; \ - \ - iov.iov_base = frag->frag_recv.frag_base.frag_addr; \ - iov.iov_len = frag->frag_recv.frag_base.frag_size; \ - iov_count = 1; \ - max_data = iov.iov_len; \ - ompi_convertor_unpack( &frag->frag_recv.frag_base.frag_convertor, \ - &iov, &iov_count, &max_data, &free_after ); \ - frag->frag_recv.frag_base.frag_size = max_data; \ - } \ - \ - /* progress the request */ \ - frag->frag_recv.frag_base.frag_owner->ptl_recv_progress( \ - frag->frag_recv.frag_base.frag_owner, \ - frag->frag_recv.frag_request, \ - frag->frag_size, \ - frag->frag_recv.frag_base.frag_size); \ - \ - MCA_PTL_MX_RECV_FRAG_RETURN(frag); \ -} while(0) - - -#endif - diff --git a/ompi/mca/ptl/mx/ptl_mx_sendfrag.c b/ompi/mca/ptl/mx/ptl_mx_sendfrag.c deleted file mode 100644 index cb11a05179..0000000000 --- a/ompi/mca/ptl/mx/ptl_mx_sendfrag.c +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ptl_mx.h" -#include "ptl_mx_peer.h" -#include "ptl_mx_sendfrag.h" - - -static void mca_ptl_mx_send_frag_construct(mca_ptl_mx_send_frag_t* frag); -static void mca_ptl_mx_send_frag_destruct(mca_ptl_mx_send_frag_t* frag); - - -OBJ_CLASS_INSTANCE( - mca_ptl_mx_send_frag_t, - mca_ptl_base_send_frag_t, - mca_ptl_mx_send_frag_construct, - mca_ptl_mx_send_frag_destruct); - - -/* - * Placeholders for send fragment constructor/destructors. - */ - -static void mca_ptl_mx_send_frag_construct(mca_ptl_mx_send_frag_t* frag) -{ - /* one time initialization */ - frag->frag_segments[0].segment_ptr = &frag->frag_send.frag_base.frag_header; - frag->frag_segments[0].segment_length = sizeof(mca_ptl_base_header_t); -} - - -static void mca_ptl_mx_send_frag_destruct(mca_ptl_mx_send_frag_t* frag) -{ -} - - - diff --git a/ompi/mca/ptl/mx/ptl_mx_sendfrag.h b/ompi/mca/ptl/mx/ptl_mx_sendfrag.h deleted file mode 100644 index 3b69baa196..0000000000 --- a/ompi/mca/ptl/mx/ptl_mx_sendfrag.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_MX_SEND_FRAG_H -#define MCA_PTL_MX_SEND_FRAG_H - -#include "ompi_config.h" -#include "opal/sys/atomic.h" -#include "ompi/mca/ptl/base/ptl_base_sendreq.h" -#include "ompi/mca/ptl/base/ptl_base_sendfrag.h" -#include "ptl_mx.h" -#include "ptl_mx_peer.h" - - -/** - * MX send fragment derived type. - */ -struct mca_ptl_mx_send_frag_t { - mca_ptl_base_send_frag_t frag_send; /**< base send fragment descriptor */ - int frag_free; - mx_request_t frag_request; - mx_segment_t frag_segments[2]; - size_t frag_segment_count; - int32_t frag_progress; -}; -typedef struct mca_ptl_mx_send_frag_t mca_ptl_mx_send_frag_t; - -OBJ_CLASS_DECLARATION(mca_ptl_mx_send_frag_t); - - -#define MCA_PTL_MX_SEND_FRAG_ALLOC(sendfrag, rc) \ - { \ - opal_list_item_t* item; \ - OMPI_FREE_LIST_GET(&mca_ptl_mx_component.mx_send_frags, item, rc); \ - sendfrag = (mca_ptl_mx_send_frag_t*)item; \ - } - -#define MCA_PTL_MX_SEND_FRAG_RETURN(sendfrag) \ - { \ - int seg_free = sendfrag->frag_free; \ - mx_segment_t *seg_ptr = sendfrag->frag_segments+1; \ - while(seg_free) { \ - if(seg_free & 1) { \ - free(seg_ptr->segment_ptr); \ - } \ - seg_free >>= 1; \ - seg_ptr++; \ - } \ - OMPI_FREE_LIST_RETURN(&mca_ptl_mx_component.mx_send_frags, (opal_list_item_t*)sendfrag); \ - } - -#define MCA_PTL_MX_SEND_FRAG_INIT_ACK(ack,ptl,frag) \ -{ \ - mca_ptl_base_header_t* hdr = &((ack)->frag_send.frag_base.frag_header); \ - mca_pml_base_recv_request_t* request = &((frag)->frag_recv.frag_request->req_recv); \ - hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_ACK; \ - hdr->hdr_common.hdr_flags = 0; \ - hdr->hdr_ack.hdr_src_ptr = (frag)->frag_recv.frag_base.frag_header.hdr_rndv.hdr_src_ptr; \ - hdr->hdr_ack.hdr_dst_match.lval = 0; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */ \ - hdr->hdr_ack.hdr_dst_match.pval = request; \ - hdr->hdr_ack.hdr_dst_addr.lval = 0; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */ \ - hdr->hdr_ack.hdr_dst_addr.pval = request->req_base.req_addr; \ - hdr->hdr_ack.hdr_dst_size = request->req_bytes_packed; \ - (ack)->frag_send.frag_request = NULL; \ - (ack)->frag_send.frag_base.frag_peer = (frag)->frag_recv.frag_base.frag_peer; \ - (ack)->frag_send.frag_base.frag_owner = ptl; \ - (ack)->frag_send.frag_base.frag_addr = NULL; \ - (ack)->frag_send.frag_base.frag_size = 0; \ - (ack)->frag_segments[0].segment_length = sizeof(mca_ptl_base_ack_header_t); \ - (ack)->frag_segment_count = 1; \ - (ack)->frag_free = 0; \ -} - -#define MCA_PTL_MX_SEND_FRAG_PROGRESS(frag) \ -do { \ - mca_ptl_base_send_request_t* request = frag->frag_send.frag_request; \ - bool frag_ack; \ - \ - /* if this is an ack - simply return to pool */ \ - if(request == NULL) { \ - MCA_PTL_MX_SEND_FRAG_RETURN(frag); \ - break; \ - } \ - \ - /* Done when: \ - * (1) ack is not required and send completes \ - * (2) ack is received and send has completed \ - */ \ - frag_ack = (frag->frag_send.frag_base.frag_header. \ - hdr_common.hdr_flags & MCA_PTL_FLAGS_ACK) ? true : false; \ - if(frag_ack == false || opal_atomic_add_32(&frag->frag_progress,1) == 2) { \ - \ - /* update request status */ \ - frag->frag_send.frag_base.frag_owner->ptl_send_progress( \ - frag->frag_send.frag_base.frag_owner, \ - request, \ - frag->frag_send.frag_base.frag_size); \ - \ - /* return any fragment that didnt come from the cache */ \ - if (request->req_cached == false || \ - frag->frag_send.frag_base.frag_header.hdr_common.hdr_type == MCA_PTL_HDR_TYPE_FRAG) { \ - MCA_PTL_MX_SEND_FRAG_RETURN(frag); \ - } \ - } \ -} while (0) - -#endif - diff --git a/ompi/mca/ptl/portals/.ompi_ignore b/ompi/mca/ptl/portals/.ompi_ignore deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ompi/mca/ptl/portals/Makefile.am b/ompi/mca/ptl/portals/Makefile.am deleted file mode 100644 index affaa3c02a..0000000000 --- a/ompi/mca/ptl/portals/Makefile.am +++ /dev/null @@ -1,64 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Use the top-level Makefile.options - - - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if OMPI_BUILD_ptl_portals_DSO -component_noinst = -component_install = mca_ptl_portals.la -else -component_noinst = libmca_ptl_portals.la -component_install = -endif - -EXTRA_DIST = \ - src/ptl_portals_compat_utcp.c \ - src/ptl_portals_compat_redstorm.c - -portals_SOURCES = \ - src/ptl_portals.h \ - src/ptl_portals_compat.h \ - src/ptl_portals_send.h \ - src/ptl_portals_recv.h \ - src/ptl_portals.c \ - src/ptl_portals_component.c \ - src/ptl_portals_stubs.c \ - src/ptl_portals_compat_utcp.c \ - src/ptl_portals_send.c \ - src/ptl_portals_recv.c - - -mcacomponentdir = $(libdir)/openmpi -mcacomponent_LTLIBRARIES = $(component_install) -mca_ptl_portals_la_SOURCES = $(portals_SOURCES) -mca_ptl_portals_la_LIBADD = \ - $(top_ompi_builddir)/ompi/libmpi.la \ - $(top_ompi_builddir)/orte/liborte.la \ - $(top_ompi_builddir)/opal/libopal.la -mca_ptl_portals_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_ptl_portals_la_SOURCES = $(portals_SOURCES) -libmca_ptl_portals_la_LIBADD = -libmca_ptl_portals_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/ptl/portals/configure.params b/ompi/mca/ptl/portals/configure.params deleted file mode 100644 index f58e6ac123..0000000000 --- a/ompi/mca/ptl/portals/configure.params +++ /dev/null @@ -1,24 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Specific to this module - -PARAM_INIT_FILE=src/ptl_portals.h -PARAM_CONFIG_HEADER_FILE="portals_config.h" -PARAM_CONFIG_FILES="Makefile" diff --git a/ompi/mca/ptl/portals/configure.stub b/ompi/mca/ptl/portals/configure.stub deleted file mode 100644 index f099d87520..0000000000 --- a/ompi/mca/ptl/portals/configure.stub +++ /dev/null @@ -1,217 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# -# quicky function to set #defines based on argument values -# -# ARGUMENTS: -# 1 configure name (first argument to ARG_WITH, minus the ptl-portals-) -# 2 define name -# 3 default value -# 4 description (used for both ARG_WITH and DEFINE) -AC_DEFUN([MCA_PTL_PORTALS_CONFIG_VAL], -[ - AC_ARG_WITH([ptl-portals-$1], AC_HELP_STRING([--with-ptl-portals-$1], - [$4 (default: $3)])) - AC_MSG_CHECKING([for $1 value]) - case "[$with_]m4_bpatsubst([ptl-portals-$1], -, _)" in - "") - $2=$3 - AC_MSG_RESULT([[$]$2 (default)]) - ;; - "no") - AC_MSG_RESULT([error]) - AC_MSG_ERROR([--without-ptl-portals-$1 is invalid argument]) - ;; - *) - $2="$with_m4_bpatsubst([ptl-portals-$1], -, _)" - AC_MSG_RESULT([[$]$2]) - ;; - esac - AC_DEFINE_UNQUOTED([$2], [[$]$2], [$4]) -])dnl - - -# -# Main function. This will be invoked in the middle of the templated -# configure script. -# -AC_DEFUN([MCA_CONFIGURE_STUB],[ - - # Additional --with flags that can be specified - - AC_ARG_WITH(ptl-portals, - AC_HELP_STRING([--with-ptl-portals=DIR], - [Specify the installation directory of PORTALS])) - - # Add to CPPFLAGS if necessary - EXTRA_CPPFLAGS= - if test -n "$with_ptl_portals"; then - if test -d "$with_ptl_portals/include"; then - EXTRA_CPPFLAGS="-I$with_ptl_portals/include" - else - AC_MSG_WARN([*** Warning: cannot find $with_ptl_portals/include]) - AC_MSG_WARN([*** Will still try to configure portals ptl anyway...]) - fi - fi - - # See if we can find portals.h - CPPFLAGS="$CPPFLAGS $EXTRA_CPPFLAGS" - AC_CHECK_HEADERS(portals3.h,, - AC_MSG_ERROR([*** Cannot find working portals3.h.])) - - # Add to LDFLAGS if necessary - EXTRA_LDFLAGS= - if test -n "$with_ptl_portals"; then - if test -d "$with_ptl_portals/lib"; then - EXTRA_LDFLAGS="-L$with_ptl_portals/lib" - else - AC_MSG_WARN([*** Warning: cannot find $with_ptl_portals/lib]) - AC_MSG_WARN([*** Will still try to configure portals ptl anyway...]) - fi - fi - - - # - # Configure Portals for our local environment - # - PTL_PORTALS_UTCP=0 - PTL_PORTALS_REDSTORM=0 - PTL_PORTALS_COMPAT="" - PTL_PORTALS_HAVE_EVENT_UNLINK=0 - - AC_ARG_WITH([ptl-portals-config], - AC_HELP_STRING([--with-ptl-portals-config], - [configuration to use for Portals support. - One of "utcp", "redstorm". (default: utcp)])) - AC_MSG_CHECKING([for Portals configuration]) - if test "$with_ptl_portals_config" = "" ; then - with_ptl_portals_config="utcp" - fi - case "$with_ptl_portals_config" in - "utcp") - PTL_PORTALS_UTCP=1 - PORTALS_LIBS="-lutcpapi -lutcplib -lp3api -lp3lib -lp3rt" - PTL_PORTALS_HAVE_EVENT_UNLINK=1 - AC_MSG_RESULT([utcp]) - ;; - "redstorm") - PTL_PORTALS_REDSTORM=1 - PORTALS_LIBS="-lp3api -lp3lib -lp3rt" - PTL_PORTALS_HAVE_EVENT_UNLINK=0 - AC_MSG_RESULT([red storm]) - ;; - *) - AC_MSG_ERROR([unknown Portals configuration. Can not continue]) - ;; - esac - - # Try to find all the portals libraries (this is not fun!) - AC_ARG_WITH(ptl-portals-libs, - AC_HELP_STRING([--with-ptl-portals-libs=LIBS], - [Libraries to link with for portals])) - if test -n "$with_ptl_portals_libs" ; then - PORTALS_LIBS="" - for lib in $with_ptl_portals_libs ; do - PORTALS_LIBS="$PORTALS_LIBS -l$lib" - done - fi - - AC_MSG_CHECKING([if possible to link Portals application]) - LIBS="$LIBS $PORTALS_LIBS" - LDFLAGS="$LDFLAGS $EXTRA_LDFLAGS" - AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ], [int i; PtlInit(&i);])], - [AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no]) - AC_MSG_ERROR([Can not link with Portals libraries])]) - - AC_DEFINE_UNQUOTED([PTL_PORTALS_HAVE_EVENT_UNLINK], - [$PTL_PORTALS_HAVE_EVENT_UNLINK], - [Does Portals send a PTL_EVENT_UNLINK event]) - - AC_DEFINE_UNQUOTED([PTL_PORTALS_UTCP], [$PTL_PORTALS_UTCP], - [Use the UTCP reference implementation or Portals]) - AM_CONDITIONAL([PTL_PORTALS_UTCP], [test "$PTL_PORTALS_UTCP" = "1"]) - - AC_DEFINE_UNQUOTED([PTL_PORTALS_REDSTORM], [$PTL_PORTALS_REDSTORM], - [Use the Red Storm implementation or Portals]) - AM_CONDITIONAL([PTL_PORTALS_REDSTORM], [test "$PTL_PORTALS_REDSTORM" = "1"]) - - MCA_PTL_PORTALS_CONFIG_VAL([frag-table-id], - [PTL_PORTALS_FRAG_TABLE_ID], [1], - [Portals table id to use for fragment receive queue]) - - MCA_PTL_PORTALS_CONFIG_VAL([retrans-table-id], - [PTL_PORTALS_RETRANS_TABLE_ID], [2], - [Portals table id to use for retransmit request queue]) - - MCA_PTL_PORTALS_CONFIG_VAL([debug-level], - [PTL_PORTALS_DEFAULT_DEBUG_LEVEL], [100], - [debugging level for portals ptl]) - - MCA_PTL_PORTALS_CONFIG_VAL([request-cache-size], - [PTL_PORTALS_DEFAULT_REQUEST_CACHE_SIZE], [1], - [request cache size for portals ptl]) - - MCA_PTL_PORTALS_CONFIG_VAL([first-frag-size], - [PTL_PORTALS_DEFAULT_FIRST_FRAG_SIZE], [16384], - [first frag size for portals ptl]) - - MCA_PTL_PORTALS_CONFIG_VAL([first-frag-num-entries], - [PTL_PORTALS_DEFAULT_FIRST_FRAG_NUM_ENTRIES], [3], - [number of memory descriptors for first fragments]) - - MCA_PTL_PORTALS_CONFIG_VAL([first-frag-entry-size], - [PTL_PORTALS_DEFAULT_FIRST_FRAG_ENTRY_SIZE], [2098152], - [size of memory associeted with first fag md]) - - MCA_PTL_PORTALS_CONFIG_VAL([recv-queue-size], - [PTL_PORTALS_DEFAULT_RECV_QUEUE_SIZE], [512], - [size of event queue for receiving frags]) - - MCA_PTL_PORTALS_CONFIG_VAL([send-queue-size], - [PTL_PORTALS_DEFAULT_SEND_QUEUE_SIZE], [128], - [Max number of send fragmenst pending]) - - MCA_PTL_PORTALS_CONFIG_VAL([rndv-frag-min-size], - [PTL_PORTALS_DEFAULT_RNDV_FRAG_MIN_SIZE], [0], - [minimum size of rndv fragments]) - - MCA_PTL_PORTALS_CONFIG_VAL([rndv-frag-max-size], - [PTL_PORTALS_DEFAULT_RNDV_FRAG_MAX_SIZE], [16384], - [maximum size of rndv fragments]) - - MCA_PTL_PORTALS_CONFIG_VAL([free-list-init-num], - [PTL_PORTALS_DEFAULT_FREE_LIST_INIT_NUM], [256], - [starting size of free lists]) - MCA_PTL_PORTALS_CONFIG_VAL([free-list-max-num], - [PTL_PORTALS_DEFAULT_FREE_LIST_MAX_NUM], [-1], - [maximum size of free lists]) - MCA_PTL_PORTALS_CONFIG_VAL([free-list-inc-num], - [PTL_PORTALS_DEFAULT_FREE_LIST_inc_NUM], [256], - [grow size for freelists]) - - # - # Save extra compiler/linker flags so that they can be added in - # the wrapper compilers, if necessary - # - - WRAPPER_EXTRA_LDFLAGS="$EXTRA_LDFLAGS" - WRAPPER_EXTRA_LIBS="$PORTALS_LIBS" -])dnl diff --git a/ompi/mca/ptl/portals/src/ptl_portals.c b/ompi/mca/ptl/portals/src/ptl_portals.c deleted file mode 100644 index 53f4eeb601..0000000000 --- a/ompi/mca/ptl/portals/src/ptl_portals.c +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "portals_config.h" - -#include -#include -#include -#include - -#include "ompi/constants.h" -#include "opal/util/output.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/base/ptl_base_sendreq.h" - -#include "ptl_portals.h" -#include "ptl_portals_compat.h" -#include "ptl_portals_send.h" -#include "ptl_portals_recv.h" - -mca_ptl_portals_module_t mca_ptl_portals_module = { - { - &mca_ptl_portals_component.super, - 0, /* max size of request cache */ - sizeof(mca_ptl_portals_send_frag_t), /* byes required by ptl for a request */ - 0, /* max size of first frag */ - 0, /* min size of frag */ - 0, /* max size of frag */ - 60, /* exclusivity - higher than sm, lower than self */ - 0, /* latency */ - 0, /* bandwidth */ - MCA_PTL_PUT, /* ptl flags */ - - mca_ptl_portals_add_procs, - mca_ptl_portals_del_procs, - mca_ptl_portals_finalize, - mca_ptl_portals_send, - mca_ptl_portals_send, - NULL, - mca_ptl_portals_matched, - mca_ptl_portals_request_init, - mca_ptl_portals_request_fini, - - NULL, - NULL, - NULL, - - NULL, /* PTL stack */ - NULL /* PML use */ - }, -}; - - - -int -mca_ptl_portals_add_procs(struct mca_ptl_base_module_t* ptl, - size_t nprocs, struct ompi_proc_t **procs, - struct mca_ptl_base_peer_t** peers, - ompi_bitmap_t* reachable) -{ - int ret; - struct ompi_proc_t *local_proc = ompi_proc_local(); - struct ompi_proc_t *curr_proc; - ptl_process_id_t *portals_procs; - size_t i; - unsigned long distance; - struct mca_ptl_portals_module_t *myptl = - (struct mca_ptl_portals_module_t*) ptl; - - /* make sure our environment is fully initialized. At end of this - call, we have a working network handle on our module and - portals_procs will have the portals process identifier for each - proc (ordered, in theory) */ - ret = mca_ptl_portals_add_procs_compat(myptl, nprocs, procs, - &portals_procs); - if (OMPI_SUCCESS != ret) return ret; - - /* loop through all procs, setting our reachable flag */ - for (i= 0; i < nprocs ; ++i) { - curr_proc = procs[i]; - /* BWB - do we want to send to self? No for now */ - if (curr_proc == local_proc) continue; - - /* make sure we can reach the process */ - ret = PtlNIDist(myptl->ni_handle, - portals_procs[i], - &distance); - if (ret != PTL_OK) { - opal_output_verbose(10, mca_ptl_portals_component.portals_output, - "Could not find distance to process %d", i); - continue; - } - - /* set the peer as a pointer to the address */ - peers[i] = (struct mca_ptl_base_peer_t*) &(portals_procs[i]); - - /* and here we can reach */ - ompi_bitmap_set_bit(reachable, i); - } - - return OMPI_SUCCESS; -} - - -int -mca_ptl_portals_del_procs(struct mca_ptl_base_module_t *ptl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_ptl_base_peer_t **peers) -{ - /* yeah, I have no idea what to do here */ - - return OMPI_SUCCESS; -} - - - -int -mca_ptl_portals_module_enable(struct mca_ptl_portals_module_t *ptl, - int enable) -{ - int i, ret; - - if (enable == 0) { - /* disable the unexpected receive queue */ - /* BWB - not really sure how - would have to track a lot more data... */ - } else { - /* only do all the hard stuff if we haven't created the queue */ - if (ptl->eq_handles[MCA_PTL_PORTALS_EQ_SIZE - 1] != PTL_EQ_NONE) { - return OMPI_SUCCESS; - } - - /* create an event queue, then the match entries for the match - entries */ - for (i = 0 ; i < MCA_PTL_PORTALS_EQ_SIZE ; ++i) { - ret = PtlEQAlloc(ptl->ni_handle, - ptl->eq_sizes[i], - PTL_EQ_HANDLER_NONE, - &(ptl->eq_handles[i])); - if (ret != PTL_OK) { - opal_output(mca_ptl_portals_component.portals_output, - "Failed to allocate event queue: %d", ret); - return OMPI_ERROR; - } - } - - for (i = 0 ; i < ptl->first_frag_num_entries ; ++i) { - ret = ptl_portals_post_recv_md(ptl, NULL); - if (OMPI_SUCCESS != ret) return ret; - } - } - - return OMPI_SUCCESS; -} - - -int -mca_ptl_portals_finalize(struct mca_ptl_base_module_t *ptl_base) -{ - struct mca_ptl_portals_module_t *ptl = - (struct mca_ptl_portals_module_t *) ptl_base; - int ret; - - ret = PtlNIFini(ptl->ni_handle); - if (PTL_OK != ret) { - opal_output_verbose(20, mca_ptl_portals_component.portals_output, - "PtlNIFini returned %d", ret); - return OMPI_ERROR; - } - opal_output_verbose(20, mca_ptl_portals_component.portals_output, - "successfully finalized module"); - - return OMPI_SUCCESS; -} - - -int -mca_ptl_portals_request_init(struct mca_ptl_base_module_t *ptl, - struct mca_ptl_base_send_request_t *req) -{ - OBJ_CONSTRUCT(req + 1, mca_ptl_portals_send_frag_t); - return OMPI_SUCCESS; -} - - -void -mca_ptl_portals_request_fini(struct mca_ptl_base_module_t *ptl, - struct mca_ptl_base_send_request_t *req) -{ - OBJ_DESTRUCT(req + 1); -} - diff --git a/ompi/mca/ptl/portals/src/ptl_portals.h b/ompi/mca/ptl/portals/src/ptl_portals.h deleted file mode 100644 index 7ea0bf2ac6..0000000000 --- a/ompi/mca/ptl/portals/src/ptl_portals.h +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/* - * @file - */ -#ifndef MCA_PTL_PORTALS_H -#define MCA_PTL_PORTALS_H - -#include - -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/class/ompi_bitmap.h" -#include "ompi/class/ompi_free_list.h" -#include "orte/class/orte_proc_table.h" - - -/* - * Portals PTL component. - */ -struct mca_ptl_portals_component_t { - /* base PTL component */ - mca_ptl_base_component_1_0_0_t super; - - /* output channel for debugging. Value settings when using - * output_verbose: - * - * - 0 : critical user information - * - 10: general execution diagnostic information - * - 20: initialization / shutdown diagnostic information - * - 30: basic debugging information - * - 90: useful only to developers - * - 100: lots and lots of performance impacting output - */ - int portals_output; - -#if PTL_PORTALS_UTCP - /* ethernet interface to use - only has meaning with utcp - reference */ - char *portals_ifname; -#endif - - /* Number of currently active portals modules. We assume these - never change between init and finalize, so these aren't thread - locked */ - uint32_t portals_num_modules; - /* List of currently available modules */ - struct mca_ptl_portals_module_t **portals_modules; - - /* initial size of free lists */ - int portals_free_list_init_num; - /* max size of free lists */ - int portals_free_list_max_num; - /* numer of elements to grow free lists */ - int portals_free_list_inc_num; - - /* free list of portals send fragments */ - ompi_free_list_t portals_send_frags; - /* free list of portals recv fragments */ - ompi_free_list_t portals_recv_frags; - - /* queue of pending sends */ - opal_list_t portals_pending_acks; - - /* lock for accessing component */ - opal_mutex_t portals_lock; -}; -typedef struct mca_ptl_portals_component_t mca_ptl_portals_component_t; - - -#define MCA_PTL_PORTALS_EQ_RECV 0 -#define MCA_PTL_PORTALS_EQ_SEND 1 -#define MCA_PTL_PORTALS_EQ_SIZE 2 - -struct mca_ptl_portals_module_t { - /* base PTL module interface */ - mca_ptl_base_module_t super; - - /* number of mds for first frags */ - int first_frag_num_entries; - /* size of each md for first frags */ - int first_frag_entry_size; - - /* size for event queue */ - int eq_sizes[MCA_PTL_PORTALS_EQ_SIZE]; - /* frag receive event queue */ - ptl_handle_eq_t eq_handles[MCA_PTL_PORTALS_EQ_SIZE]; - - /* our portals network interface */ - ptl_handle_ni_t ni_handle; - /* the limits returned from PtlNIInit for interface */ - ptl_ni_limits_t limits; - - /* number of dropped messages */ - ptl_sr_value_t dropped; -}; -typedef struct mca_ptl_portals_module_t mca_ptl_portals_module_t; - -struct mca_ptl_portals_recv_frag_t; -struct mca_ptl_portals_send_frag_t; - -/* - * Component functions (ptl_portals_component.c) - */ -int mca_ptl_portals_component_open(void); -int mca_ptl_portals_component_close(void); - - -mca_ptl_base_module_t** mca_ptl_portals_component_init(int *num_ptls, - bool has_progress_threads, - bool has_mpi_threads); - -int mca_ptl_portals_component_control(int param, - void* value, - size_t size); - -int mca_ptl_portals_component_progress(mca_ptl_tstamp_t tstamp); - -/* - * Compatibility functions (ptl_portals_compat_{}.c) - * - * Need to be implemented for every version of Portals - */ -int mca_ptl_portals_init(mca_ptl_portals_component_t *comp); - -int mca_ptl_portals_add_procs_compat(mca_ptl_portals_module_t* ptl, - size_t nprocs, struct ompi_proc_t **procs, - ptl_process_id_t **portals_procs); - -/* - * Module configuration functions (ptl_portals.c) - */ -int mca_ptl_portals_finalize(struct mca_ptl_base_module_t* ptl); - -int mca_ptl_portals_add_procs(struct mca_ptl_base_module_t* ptl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_ptl_base_peer_t** peers, - ompi_bitmap_t* reachable); - -int mca_ptl_portals_del_procs(struct mca_ptl_base_module_t* ptl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_ptl_base_peer_t** peers); - -int mca_ptl_portals_module_enable(struct mca_ptl_portals_module_t *ptl, - int value); - -int mca_ptl_portals_request_init(struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_send_request_t* req); - -void mca_ptl_portals_request_fini(struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_send_request_t* req); - - -/* - * Communication functions (ptl_portals_{send,recv}.c) - */ -void mca_ptl_portals_matched(struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_recv_frag_t* frag); - -int mca_ptl_portals_send(struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_peer_t* ptl_peer, - struct mca_ptl_base_send_request_t*, - size_t offset, - size_t size, - int flags); - -int mca_ptl_portals_process_send_event(ptl_event_t *ev); - - -/* - * global structures - */ -extern mca_ptl_portals_component_t mca_ptl_portals_component; -/* don't use, except as base for creating module instances */ -extern mca_ptl_portals_module_t mca_ptl_portals_module; - -#endif diff --git a/ompi/mca/ptl/portals/src/ptl_portals_compat.h b/ompi/mca/ptl/portals/src/ptl_portals_compat.h deleted file mode 100644 index 89eea1e44c..0000000000 --- a/ompi/mca/ptl/portals/src/ptl_portals_compat.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#ifndef PTL_PORTALS_COMPAT_H -#define PTL_PORTALS_COMPAT_H - -#if PTL_PORTALS_UTCP - -#include -#include -#include - -#elif PTL_PORTALS_REDSTORM - -#error "Red Storm Compatibility not implemented" - -#else - -#error "Unknown Portals library configuration" - -#endif - -#endif /* PTL_PORTALS_NAL_H */ diff --git a/ompi/mca/ptl/portals/src/ptl_portals_compat_redstorm.c b/ompi/mca/ptl/portals/src/ptl_portals_compat_redstorm.c deleted file mode 100644 index b42e128e2b..0000000000 --- a/ompi/mca/ptl/portals/src/ptl_portals_compat_redstorm.c +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "portals_config.h" - -#include "ompi/constants.h" -#include "opal/util/output.h" - -#include "ptl_portals.h" -#include "ptl_portals_compat.h" - - -int -mca_ptl_portals_init(mca_ptl_portals_component_t *comp) -{ - int ret, max_interfaces; - struct mca_ptl_portals_module_t *ptl; - - /* - * Initialize Portals interface - */ - ret = PtlInit(&max_interfaces); - if (PTL_OK != ret) { - opal_output_verbose(10, mca_ptl_portals_component.portals_output, - "PtlInit failed, returning %d\n", ret); - return OMPI_ERR_FATAL; - } - - /* - * create module - only ever one "NIC" on red storm - */ - comp->portals_num_modules = 1; - comp->portals_modules = calloc(comp->portals_num_modules, - sizeof(mca_ptl_portals_module_t *)); - if (NULL == comp->portals_modules) { - opal_output_verbose(10, mca_ptl_portals_component.portals_output, - "malloc failed in mca_ptl_portals_init"); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - comp->portals_modules[0] = malloc(sizeof(mca_ptl_portals_module_t)); - if (NULL == comp->portals_modules) { - opal_output_verbose(10, mca_ptl_portals_component.portals_output, - "malloc failed in mca_ptl_portals_init"); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - ptl = comp->portals_modules[0]; - - *ptl = = mca_ptl_portals_module; - - /* - * Initialize a network device - */ - ret = PtlNIInit(PTL_IFACE_DEFAULT, /* interface to initialize */ - PTL_PID_ANY, /* let library assign our pid */ - NULL, /* no desired limits */ - &(ptl->limits), /* save our limits somewhere */ - &(ptl->ni_handle) /* our interface handle */ - ); - if (PTL_OK != ret) { - opal_output_verbose(10, mca_ptl_portals_component.portals_output, - "PtlNIInit failed, returning %d\n", ret); - return OMPI_ERR_FATAL; - } - - - return OMPI_SUCCESS; -} - - -int -mca_ptl_portals_add_procs_compat(struct mca_ptl_portals_module_t* ptl, - size_t nprocs, struct ompi_proc_t **procs, - ptl_process_id_t **portals_procs) -{ - int nptl_procs = 0; - - /* - * FIXME - XXX - FIXME - * BWB - implicit assumption that cnos procs list will match our - * procs list. Don't know what to do about that... - */ - - nptl_procs = cnos_get_nidpid_map(portals_procs); - if (nptl_procs <= 0) { - opal_output_verbose(10, mca_ptl_portals_component.portals_output, - "cnos_get_nidpid_map() returned %d", nptl_procs); - return OMPI_ERR_FATAL; - } else if (nptl_procs != nprocs) { - opal_output_verbose(10, mca_ptl_portals_component.portals_output, - "nptl_procs != nprocs (%d, %d)", nptl_procs, - nprocs); - return OMPI_ERR_FATAL; - } - - return OMPI_ERR_NOT_IMPLEMENTED; -} diff --git a/ompi/mca/ptl/portals/src/ptl_portals_compat_utcp.c b/ompi/mca/ptl/portals/src/ptl_portals_compat_utcp.c deleted file mode 100644 index 30a4c8857c..0000000000 --- a/ompi/mca/ptl/portals/src/ptl_portals_compat_utcp.c +++ /dev/null @@ -1,221 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" -#include "portals_config.h" - -#include -#include -#include -#include -#include - -#include "ompi/constants.h" -#include "opal/util/output.h" - -#include "ptl_portals.h" -#include "ptl_portals_compat.h" - -#include - -/* how's this for source code diving? - find private method for - getting interface */ -extern unsigned int utcp_my_nid(const char *if_str); - -/* these need to be defined, or things get "unhappy" */ -FILE* utcp_api_out; -FILE* utcp_lib_out; - -int -mca_ptl_portals_init(mca_ptl_portals_component_t *comp) -{ - ptl_process_id_t info; - int ret; -#if 0 - FILE *output; - char *tmp; - - asprintf(&tmp, "portals.%d", getpid()); - output = fopen(tmp, "w"); - free(tmp); - - utcp_lib_out = output; - utcp_api_out = output; -#else - utcp_lib_out = stderr; - utcp_api_out = stderr; -#endif - - info.nid = htonl(utcp_my_nid(mca_ptl_portals_component.portals_ifname)); - info.pid = htonl((ptl_pid_t) getpid()); - opal_output_verbose(100, mca_ptl_portals_component.portals_output, - "contact info: %u, %u", ntohl(info.nid), - ntohl(info.pid)); - - ret = mca_pml_base_modex_send(&mca_ptl_portals_component.super.ptlm_version, - &info, sizeof(ptl_process_id_t)); - if (OMPI_SUCCESS != ret) { - opal_output_verbose(10, mca_ptl_portals_component.portals_output, - "mca_pml_base_modex_send failed: %d", ret); - return ret; - } - - /* with the utcp interface, only ever one "NIC" */ - comp->portals_num_modules = 1; - comp->portals_modules = calloc(comp->portals_num_modules, - sizeof(mca_ptl_portals_module_t *)); - if (NULL == comp->portals_modules) { - opal_output_verbose(10, mca_ptl_portals_component.portals_output, - "malloc failed in mca_ptl_portals_init"); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - comp->portals_modules[0] = malloc(sizeof(mca_ptl_portals_module_t)); - if (NULL == comp->portals_modules) { - opal_output_verbose(10, mca_ptl_portals_component.portals_output, - "malloc failed in mca_ptl_portals_init"); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - *(comp->portals_modules[0]) = mca_ptl_portals_module; - - return OMPI_SUCCESS; -} - - -int -mca_ptl_portals_add_procs_compat(struct mca_ptl_portals_module_t* ptl, - size_t nprocs, struct ompi_proc_t **procs, - ptl_process_id_t **portals_procs) -{ - int ret, my_rid; - ptl_process_id_t *info; - char *nidmap = NULL; - char *pidmap = NULL; - char *nid_str; - char *pid_str; - const size_t map_size = nprocs * 12 + 1; /* 12 is max length of long in decimal */ - size_t size, i; - char *tmp; - ompi_proc_t* proc_self = ompi_proc_local(); - int max_interfaces; - - /* - * Do all the NID/PID map setup - */ - /* each nid is a int, so need 10 there, plus the : */ - nidmap = malloc(map_size); - pidmap = malloc(map_size); - nid_str = malloc(12 + 1); - pid_str = malloc(12 + 1); - if (NULL == nidmap || NULL == pidmap || NULL == nid_str || NULL == pid_str) - return OMPI_ERROR; - - /* get space for the portals procs list */ - *portals_procs = calloc(nprocs, sizeof(ptl_process_id_t)); - if (NULL == *portals_procs) { - opal_output_verbose(10, mca_ptl_portals_component.portals_output, - "calloc(nprocs, sizeof(ptl_process_id_t)) failed"); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - - for (i = 0 ; i < nprocs ; ++i) { - if (proc_self == procs[i]) my_rid = i; - - ret = mca_pml_base_modex_recv(&mca_ptl_portals_component.super.ptlm_version, - procs[i], (void**) &info, &size); - if (OMPI_SUCCESS != ret) { - opal_output_verbose(10, mca_ptl_portals_component.portals_output, - "mca_pml_base_modex_recv failed: %d", ret); - return ret; - } else if (sizeof(ptl_process_id_t) != size) { - opal_output_verbose(10, mca_ptl_portals_component.portals_output, - "mca_pml_base_modex_recv returned size %d, expected %d", - size, sizeof(ptl_process_id_t)); - return OMPI_ERROR; - } - - if (i == 0) { - snprintf(nidmap, map_size, "%u", ntohl(info->nid)); - snprintf(pidmap, map_size, "%u", ntohl(info->pid)); - } else { - snprintf(nid_str, 12 + 1, ":%u", ntohl(info->nid)); - snprintf(pid_str, 12 + 1, ":%u", ntohl(info->pid)); - strncat(nidmap, nid_str, 12); - strncat(pidmap, pid_str, 12); - } - - /* update my local array of proc structs */ - (*portals_procs)[i].nid = ntohl(info->nid); - (*portals_procs)[i].pid = ntohl(info->pid); - - free(info); - } - - opal_output_verbose(100, mca_ptl_portals_component.portals_output, - "my rid: %u", my_rid); - opal_output_verbose(100, mca_ptl_portals_component.portals_output, - "nid map: %s", nidmap); - opal_output_verbose(100, mca_ptl_portals_component.portals_output, - "pid map: %s", pidmap); - opal_output_verbose(100, mca_ptl_portals_component.portals_output, - "iface: %s", - mca_ptl_portals_component.portals_ifname); - - asprintf(&tmp, "PTL_MY_RID=%u", my_rid); - putenv(tmp); - asprintf(&tmp, "PTL_NIDMAP=%s", nidmap); - putenv(tmp); - asprintf(&tmp, "PTL_PIDMAP=%s", pidmap); - putenv(tmp); - asprintf(&tmp, "PTL_IFACE=%s", mca_ptl_portals_component.portals_ifname); - putenv(tmp); - - free(pidmap); - free(nidmap); - free(pid_str); - free(nid_str); - - /* - * Initialize Portals - */ - ret = PtlInit(&max_interfaces); - if (PTL_OK != ret) { - opal_output_verbose(10, mca_ptl_portals_component.portals_output, - "PtlInit failed, returning %d\n", ret); - return OMPI_ERR_FATAL; - } - - ret = PtlNIInit(PTL_IFACE_DEFAULT, /* interface to initialize */ - PTL_PID_ANY, /* let library assign our pid */ - NULL, /* no desired limits */ - &(ptl->limits), /* save our limits somewhere */ - &(ptl->ni_handle) /* our interface handle */ - ); - if (PTL_OK != ret) { - opal_output_verbose(10, mca_ptl_portals_component.portals_output, - "PtlNIInit failed, returning %d\n", ret); - return OMPI_ERR_FATAL; - } - -#if 0 - PtlNIDebug(ptl->ni_handle, PTL_DBG_ALL | PTL_DBG_NI_ALL); -#endif - - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/ptl/portals/src/ptl_portals_component.c b/ompi/mca/ptl/portals/src/ptl_portals_component.c deleted file mode 100644 index 4639e271d9..0000000000 --- a/ompi/mca/ptl/portals/src/ptl_portals_component.c +++ /dev/null @@ -1,392 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "portals_config.h" - -#include "ompi/constants.h" - -#include "opal/util/output.h" -#include "opal/threads/threads.h" - -#include "ptl_portals.h" -#include "ptl_portals_compat.h" -#include "ptl_portals_recv.h" -#include "ptl_portals_send.h" - - -mca_ptl_portals_component_t mca_ptl_portals_component = { - { - /* First, the mca_base_module_t struct containing meta - information about the module itself */ - { - /* Indicate that we are a pml v1.0.0 module (which also - implies a specific MCA version) */ - - MCA_PTL_BASE_VERSION_1_0_0, - - "portals", /* MCA module name */ - OMPI_MAJOR_VERSION, /* MCA module major version */ - OMPI_MINOR_VERSION, /* MCA module minor version */ - OMPI_RELEASE_VERSION, /* MCA module release version */ - mca_ptl_portals_component_open, /* module open */ - mca_ptl_portals_component_close /* module close */ - }, - - /* Next the MCA v1.0.0 module meta data */ - - { - /* Whether the module is checkpointable or not */ - - false - }, - - mca_ptl_portals_component_init, - mca_ptl_portals_component_control, - mca_ptl_portals_component_progress, - } -}; - - -static opal_output_stream_t portals_output_stream; -{ - true, /* is debugging */ - 0, /* verbose level */ - 0, /* want syslog */ - 0, /* syslog priority */ - NULL, /* syslog ident */ - NULL, /* prefix */ - true, /* want stdout */ - false, /* want stderr */ - false, /* want file */ - false, /* file append */ - "ptl-portals" /* file suffix */ -}; - - -static inline char* -param_register_string(const char* param_name, - const char* default_value) -{ - char *param_value; - int id = mca_base_param_register_string("ptl", "portals", - param_name, NULL, - default_value); - mca_base_param_lookup_string(id, ¶m_value); - return param_value; -} - - -static inline int -param_register_int(const char* param_name, - int default_value) -{ - int id = mca_base_param_register_int("ptl", "portals", param_name, - NULL, default_value); - int param_value = default_value; - mca_base_param_lookup_int(id, ¶m_value); - return param_value; -} - - - -int -mca_ptl_portals_component_open(void) -{ - int i; - - /* initialize state */ - mca_ptl_portals_component.portals_num_modules = 0; - mca_ptl_portals_component.portals_modules = NULL; - - /* initialize objects */ - OBJ_CONSTRUCT(&mca_ptl_portals_component.portals_send_frags, - ompi_free_list_t); - OBJ_CONSTRUCT(&mca_ptl_portals_component.portals_recv_frags, - ompi_free_list_t); - OBJ_CONSTRUCT(&mca_ptl_portals_component.portals_pending_acks, - opal_list_t); - OBJ_CONSTRUCT(&mca_ptl_portals_component.portals_lock, - opal_mutex_t); - - OBJ_CONSTRUCT(&portals_output_stream, opal_output_stream_t); - portals_output_stream.lds_is_debugging = true; - portals_output_stream.lds_want_stdout = true; - portals_output_stream.lds_file_suffix = "ptl-portals"; - - /* register portals module parameters */ -#if PTL_PORTALS_UTCP - mca_ptl_portals_component.portals_ifname = - param_register_string("ifname", "eth0"); -#endif - portals_output_stream.lds_verbose_level = - param_register_int("debug_level", - PTL_PORTALS_DEFAULT_DEBUG_LEVEL); - - mca_ptl_portals_component.portals_free_list_init_num = - param_register_int("free_list_init_num", - PTL_PORTALS_DEFAULT_FREE_LIST_INIT_NUM); - mca_ptl_portals_component.portals_free_list_max_num = - param_register_int("free_list_max_num", - PTL_PORTALS_DEFAULT_FREE_LIST_MAX_NUM); - mca_ptl_portals_component.portals_free_list_inc_num = - param_register_int("free_list_inc_num", - PTL_PORTALS_DEFAULT_FREE_LIST_inc_NUM); - - mca_ptl_portals_module.super.ptl_cache_size = - param_register_int("request_cache_size", - PTL_PORTALS_DEFAULT_REQUEST_CACHE_SIZE); - mca_ptl_portals_module.super.ptl_first_frag_size = - param_register_int("first_frag_size", - PTL_PORTALS_DEFAULT_FIRST_FRAG_SIZE); - mca_ptl_portals_module.super.ptl_min_frag_size = - param_register_int("rndv_frag_min_size", - PTL_PORTALS_DEFAULT_RNDV_FRAG_MIN_SIZE); - mca_ptl_portals_module.super.ptl_max_frag_size = - param_register_int("rndv_frag_max_size", - PTL_PORTALS_DEFAULT_RNDV_FRAG_MAX_SIZE); - - mca_ptl_portals_module.first_frag_num_entries = - param_register_int("first_frag_num_entries", - PTL_PORTALS_DEFAULT_FIRST_FRAG_NUM_ENTRIES); - mca_ptl_portals_module.first_frag_entry_size = - param_register_int("first_frag_entry_size", - PTL_PORTALS_DEFAULT_FIRST_FRAG_ENTRY_SIZE); - - mca_ptl_portals_module.eq_sizes[MCA_PTL_PORTALS_EQ_RECV] = - param_register_int("recv_queue_size", - PTL_PORTALS_DEFAULT_RECV_QUEUE_SIZE); - mca_ptl_portals_module.eq_sizes[MCA_PTL_PORTALS_EQ_SEND] = - (param_register_int("send_queue_size", - PTL_PORTALS_DEFAULT_SEND_QUEUE_SIZE)) * 3; - - /* finish with objects */ - asprintf(&(portals_output_stream.lds_prefix), - "ptl: portals (%5d): ", getpid()); - - mca_ptl_portals_component.portals_output = - opal_output_open(&portals_output_stream); - - /* fill in remaining defaults for module data */ - for (i = 0 ; i < MCA_PTL_PORTALS_EQ_SIZE ; ++i) { - mca_ptl_portals_module.eq_handles[i] = PTL_EQ_NONE; - } - - mca_ptl_portals_module.ni_handle = PTL_INVALID_HANDLE; - mca_ptl_portals_module.dropped = 0; - - return OMPI_SUCCESS; -} - - -int -mca_ptl_portals_component_close(void) -{ - /* print out debugging if anything is pending */ - /* BWB - implement me, if possible */ - - /* release resources */ - OBJ_DESTRUCT(&mca_ptl_portals_component.portals_lock); - OBJ_DESTRUCT(&mca_ptl_portals_component.portals_recv_frags); - OBJ_DESTRUCT(&mca_ptl_portals_component.portals_pending_acks); - OBJ_DESTRUCT(&mca_ptl_portals_component.portals_lock); - - if (NULL != mca_ptl_portals_component.portals_ifname) { - free(mca_ptl_portals_component.portals_ifname); - } - - if (NULL != portals_output_stream.lds_prefix) { - portals_output_stream.lds_prefix = NULL; - } - - opal_output_close(mca_ptl_portals_component.portals_output); - mca_ptl_portals_component.portals_output = -1; - - return OMPI_SUCCESS; -} - - -mca_ptl_base_module_t** -mca_ptl_portals_component_init(int *num_ptls, - bool enable_progress_threads, - bool enable_mpi_threads) -{ - mca_ptl_base_module_t** ptls; - *num_ptls = 0; - - if (enable_progress_threads) { - opal_output_verbose(20, mca_ptl_portals_component.portals_output, - "disabled because progress threads enabled"); - return NULL; - } - - ompi_free_list_init(&mca_ptl_portals_component.portals_send_frags, - sizeof(mca_ptl_portals_send_frag_t), - OBJ_CLASS(mca_ptl_portals_send_frag_t), - mca_ptl_portals_component.portals_free_list_init_num, - mca_ptl_portals_component.portals_free_list_max_num, - mca_ptl_portals_component.portals_free_list_inc_num, - NULL); /* use default allocator */ - - ompi_free_list_init(&mca_ptl_portals_component.portals_recv_frags, - sizeof(mca_ptl_portals_recv_frag_t), - OBJ_CLASS(mca_ptl_portals_recv_frag_t), - mca_ptl_portals_component.portals_free_list_init_num, - mca_ptl_portals_component.portals_free_list_max_num, - mca_ptl_portals_component.portals_free_list_inc_num, - NULL); /* use default allocator */ - - /* initialize portals ptl. note that this is in the compat code because - it's fairly non-portable between implementations */ - if (OMPI_SUCCESS != mca_ptl_portals_init(&mca_ptl_portals_component)) { - opal_output_verbose(20, mca_ptl_portals_component.portals_output, - "disabled because compatibility init failed"); - return NULL; - } - - /* return array of ptls */ - ptls = malloc(mca_ptl_portals_component.portals_num_modules * - sizeof(mca_ptl_base_module_t*)); - if (NULL == ptls) return NULL; - - memcpy(ptls, - mca_ptl_portals_component.portals_modules, - mca_ptl_portals_component.portals_num_modules * - sizeof(mca_ptl_base_module_t*)); - *num_ptls = mca_ptl_portals_component.portals_num_modules; - - opal_output_verbose(20, mca_ptl_portals_component.portals_output, - "initialized %d modules", - *num_ptls); - - return ptls; -} - - -int -mca_ptl_portals_component_control(int param, void* value, size_t size) -{ - uint32_t i; - int ret = OMPI_SUCCESS; - - opal_output_verbose(30, mca_ptl_portals_component.portals_output, - "component control: %d, %d", - param, (*(int*) value)); - - switch(param) { - case MCA_PTL_ENABLE: - for (i = 0 ; - i < mca_ptl_portals_component.portals_num_modules ; - ++i) { - ret = mca_ptl_portals_module_enable( - mca_ptl_portals_component.portals_modules[i], - *(int*)value); - if (ret != OMPI_SUCCESS) break; - } - break; - default: - break; - } - return ret; -} - - -int -mca_ptl_portals_component_progress(mca_ptl_tstamp_t tstamp) -{ - int num_progressed = 0; - size_t i; - tstamp = 10; - - for (i = 0 ; i < mca_ptl_portals_component.portals_num_modules ; ++i) { - struct mca_ptl_portals_module_t *module = - mca_ptl_portals_component.portals_modules[i]; - ptl_event_t ev; - ptl_sr_value_t numdropped; - int which; - int ret; - - if (module->eq_handles[MCA_PTL_PORTALS_EQ_SIZE - 1] == - PTL_EQ_NONE) continue; /* they are all initialized at once */ - -#if OMPI_ENABLE_DEBUG - /* BWB - this is going to kill performance */ - PtlNIStatus(module->ni_handle, - PTL_SR_DROP_COUNT, - &numdropped); - if (numdropped != module->dropped) { - opal_output_verbose(30, mca_ptl_portals_component.portals_output, - "*** Dropped message count changed. %lld, %lld", - module->dropped, numdropped); - module->dropped = numdropped; - } -#endif - - ret = PtlEQPoll(module->eq_handles, - MCA_PTL_PORTALS_EQ_SIZE, /* number of eq handles */ - (int) tstamp, - &ev, - &which); - if (PTL_EQ_EMPTY == ret) { - /* nothing to see here - move along */ - continue; - } else if (!(PTL_OK == ret || PTL_EQ_DROPPED == ret)) { - /* BWB - how can we report errors? */ - opal_output(mca_ptl_portals_component.portals_output, - "Error calling PtlEQGet: %d", ret); - continue; - } else if (PTL_EQ_DROPPED == ret) { - opal_output_verbose(10, mca_ptl_portals_component.portals_output, - "*** Event queue entries were dropped"); - } - -#if PTL_PORTALS_HAVE_EVENT_UNLINK - /* not everyone has UNLINK. Use it only to print the event, - so we can make sure we properly re-initialize the ones that - need to be re-initialized */ - if (PTL_EVENT_UNLINK == ev.type) { - OPAL_OUTPUT_VERBOSE((100, mca_ptl_portals_component.portals_output, - "unlink event occurred")); - continue; - } -#endif - - if (ev.md.user_ptr == NULL) { - /* no fragment associated with it - it's a receive */ - assert(which == MCA_PTL_PORTALS_EQ_RECV); - mca_ptl_portals_process_recv_event(module, &ev); - } else { - /* there's a fragment associated with it - choose based on - frag type */ - mca_ptl_base_frag_t *frag = - (mca_ptl_base_frag_t*) ev.md.user_ptr; - if (frag->frag_type == MCA_PTL_FRAGMENT_SEND) { - assert(which == MCA_PTL_PORTALS_EQ_SEND); - mca_ptl_portals_process_send_event(&ev); - } else { - assert(which == MCA_PTL_PORTALS_EQ_RECV); - mca_ptl_portals_process_recv_event(module, &ev); - } - } - - num_progressed++; - } - - return num_progressed; -} - diff --git a/ompi/mca/ptl/portals/src/ptl_portals_recv.c b/ompi/mca/ptl/portals/src/ptl_portals_recv.c deleted file mode 100644 index 1432db99bb..0000000000 --- a/ompi/mca/ptl/portals/src/ptl_portals_recv.c +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "portals_config.h" - -#include "ptl_portals.h" -#include "ptl_portals_compat.h" -#include "ptl_portals_recv.h" -#include "ptl_portals_send.h" -#include "ompi/mca/ptl/base/ptl_base_sendreq.h" - - -OBJ_CLASS_INSTANCE(mca_ptl_portals_recv_frag_t, - mca_ptl_base_recv_frag_t, - NULL, NULL); - - -int -ptl_portals_post_recv_md(struct mca_ptl_portals_module_t *ptl, void *data_ptr) -{ - ptl_handle_me_t me_handle; - ptl_handle_md_t md_handle; - ptl_md_t md; - void *mem; - int ret; - ptl_process_id_t proc = { PTL_NID_ANY, PTL_PID_ANY }; - - /* create match entry */ - ret = PtlMEAttach(ptl->ni_handle, - PTL_PORTALS_FRAG_TABLE_ID, - proc, - 0, /* match bits */ - 0, /* ignore bits */ - PTL_UNLINK, - PTL_INS_AFTER, - &me_handle); - if (PTL_OK != ret) return OMPI_ERROR; - - if (NULL == data_ptr) { - /* and some memory */ - mem = malloc(ptl->first_frag_entry_size); - if (NULL == mem) { - PtlMEUnlink(me_handle); - return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - } - } else { - /* re-use the memory from the (now unlinked) memory descriptor */ - mem = data_ptr; - } - - /* and the memory descriptor */ - md.start = mem; - md.length = ptl->first_frag_entry_size; - md.threshold = PTL_MD_THRESH_INF; - md.max_size = ptl->super.ptl_first_frag_size; - md.options = PTL_MD_OP_PUT | PTL_MD_MAX_SIZE; - md.user_ptr = NULL; - md.eq_handle = ptl->eq_handles[MCA_PTL_PORTALS_EQ_RECV]; - - ret = PtlMDAttach(me_handle, - md, - PTL_UNLINK, - &md_handle); - if (PTL_OK != ret) { - PtlMEUnlink(me_handle); - return OMPI_ERROR; - } - - OPAL_OUTPUT_VERBOSE((100, mca_ptl_portals_component.portals_output, - "new receive buffer posted")); - - return OMPI_SUCCESS; -} - - -int -mca_ptl_portals_process_recv_event(struct mca_ptl_portals_module_t *ptl, - ptl_event_t *ev) -{ - int ret; - - if (ev->type == PTL_EVENT_PUT_START) { - OPAL_OUTPUT_VERBOSE((101, mca_ptl_portals_component.portals_output, - "starting to receive message", ev->link)); - } else if (ev->type == PTL_EVENT_PUT_END) { - mca_ptl_base_header_t *hdr; - - OPAL_OUTPUT_VERBOSE((101, mca_ptl_portals_component.portals_output, - "message %ld received, start: %p, mlength: %lld," - " offset: %lld", - ev->link, ev->md.start, ev->mlength, ev->offset)); - - /* buffer is going to be header followed by data */ - hdr = (mca_ptl_base_header_t*) (((char*) ev->md.start) + ev->offset); - switch (hdr->hdr_common.hdr_type) { - - case MCA_PTL_HDR_TYPE_MATCH: - ret = mca_ptl_portals_process_first_frag(ptl, hdr, ev, - sizeof(mca_ptl_base_match_header_t)); - if (OMPI_SUCCESS != ret) return ret; - break; - - case MCA_PTL_HDR_TYPE_RNDV: - ret = mca_ptl_portals_process_first_frag(ptl, hdr, ev, - sizeof(mca_ptl_base_rendezvous_header_t)); - if (OMPI_SUCCESS != ret) return ret; - break; - - case MCA_PTL_HDR_TYPE_FRAG: - ret = mca_ptl_portals_process_frag_frag(ptl, hdr, ev); - if (OMPI_SUCCESS != ret) return ret; - break; - - case MCA_PTL_HDR_TYPE_ACK: - { - mca_ptl_portals_send_frag_t *sendfrag; - mca_ptl_base_send_request_t *sendreq; - - sendfrag = hdr->hdr_ack.hdr_src_ptr.pval; - sendreq = sendfrag->frag_send.frag_request; - sendreq->req_peer_match = hdr->hdr_ack.hdr_dst_match; - - OPAL_OUTPUT_VERBOSE((100, - mca_ptl_portals_component.portals_output, - "received ack for recv request %p (msg %d)", - hdr->hdr_ack.hdr_dst_match, - sendreq->req_send.req_base.req_sequence)); - - mca_ptl_portals_complete_send_event(sendfrag); - } - break; - - default: - opal_output(mca_ptl_portals_component.portals_output, - "*** unable to deal with header of type %d", - hdr->hdr_common.hdr_type); - break; - } - - if (ev->md.length - (ev->offset + ev->mlength) < ev->md.max_size) { - /* use the same memory as the old md - it's not using it anymore */ - ret = ptl_portals_post_recv_md(ptl, ev->md.start); - if (OMPI_SUCCESS != ret) { - opal_output(mca_ptl_portals_component.portals_output, - "failed to allocate receive memory descriptor"); - /* BWB - ok, what do I do now? */ - } - } - - } else { - opal_output_verbose(10, mca_ptl_portals_component.portals_output, - "*** unknown event: %d (%ld)", - ev->type, ev->link); - } - - return OMPI_SUCCESS; -} - - -void -mca_ptl_portals_matched(struct mca_ptl_base_module_t *ptl_base, - struct mca_ptl_base_recv_frag_t *frag_base) -{ - mca_ptl_base_header_t* hdr = &frag_base->frag_base.frag_header; - mca_ptl_base_recv_request_t* request = frag_base->frag_request; - mca_ptl_portals_module_t* ptl = (mca_ptl_portals_module_t*) ptl_base; - mca_ptl_portals_recv_frag_t* recvfrag = (mca_ptl_portals_recv_frag_t*) frag_base; - size_t bytes_delivered = recvfrag->frag_size; - - /* generate an acknowledgment if required */ - if(hdr->hdr_common.hdr_flags & MCA_PTL_FLAGS_ACK) { - mca_ptl_portals_send_ack(ptl, recvfrag); - } - - /* can just use the convertor straight from the request - no need - to initialize. Might want to personalize, if required */ - - /* copy data into users buffer */ - if(recvfrag->frag_size > 0) { - struct iovec iov; - unsigned int iov_count = 1; - int free_after = 0; - ompi_convertor_t *convertor = &(request->req_recv.req_convertor); - - iov.iov_base = recvfrag->frag_data; - iov.iov_len = recvfrag->frag_size; - ompi_convertor_unpack(convertor, &iov, &iov_count, &bytes_delivered, &free_after ); - } - - /* update request status */ - ptl->super.ptl_recv_progress(&ptl->super, - request, - recvfrag->frag_size, - bytes_delivered); - - /* release resources */ -#if 0 - if(ack_pending == false) - MCA_PTL_PORTALS_RECV_FRAG_RETURN(recvfrag); -#endif - return; -} diff --git a/ompi/mca/ptl/portals/src/ptl_portals_recv.h b/ompi/mca/ptl/portals/src/ptl_portals_recv.h deleted file mode 100644 index 192376d5fb..0000000000 --- a/ompi/mca/ptl/portals/src/ptl_portals_recv.h +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PTL_PORTALS_RECV_FRAG_H -#define MCA_PTL_PORTALS_RECV_FRAG_H - -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" - -/** - * PORTALS received fragment derived type. - */ -struct mca_ptl_portals_recv_frag_t { - mca_ptl_base_recv_frag_t frag_recv; /**< base receive fragment descriptor */ - void *frag_data; - size_t frag_size; - ptl_process_id_t frag_source; -}; -typedef struct mca_ptl_portals_recv_frag_t mca_ptl_portals_recv_frag_t; - -OBJ_CLASS_DECLARATION(mca_ptl_portals_recv_frag_t); - - -int ptl_portals_post_recv_md(struct mca_ptl_portals_module_t *ptl, - void *data_ptr); -int mca_ptl_portals_process_recv_event(struct mca_ptl_portals_module_t *ptl, - ptl_event_t *ev); - -static inline mca_ptl_portals_recv_frag_t * -mca_ptl_portals_recv_get_frag(struct mca_ptl_portals_module_t *ptl, - mca_ptl_base_header_t *hdr, - ptl_event_t *ev, - size_t header_size) -{ - mca_ptl_portals_recv_frag_t * recvfrag; - opal_list_item_t *item; - int ret; - - /* get a fragment header */ - OMPI_FREE_LIST_GET(&mca_ptl_portals_component.portals_recv_frags, - item, ret); - recvfrag = (mca_ptl_portals_recv_frag_t*) item; - if (OMPI_SUCCESS != ret) { - opal_output(mca_ptl_portals_component.portals_output, - "unable to allocate resources"); - return NULL; - } - - /* save the sender */ - recvfrag->frag_source = ev->initiator; - - recvfrag->frag_data = ((char*) hdr) + header_size; - recvfrag->frag_size = ev->mlength - header_size; - memcpy(&(recvfrag->frag_recv.frag_base.frag_header), - hdr, header_size); - recvfrag->frag_recv.frag_base.frag_owner = &(ptl->super); - recvfrag->frag_recv.frag_base.frag_peer = NULL; /* BWB - fix me */ - recvfrag->frag_recv.frag_base.frag_size = 0; - recvfrag->frag_recv.frag_base.frag_addr = recvfrag->frag_data; - recvfrag->frag_recv.frag_is_buffered = true; - - return recvfrag; -} - - -static inline int -mca_ptl_portals_process_first_frag(struct mca_ptl_portals_module_t *ptl, - mca_ptl_base_header_t *hdr, - ptl_event_t *ev, - size_t header_size) -{ - mca_ptl_portals_recv_frag_t *recvfrag; - - recvfrag = mca_ptl_portals_recv_get_frag(ptl, hdr, ev, header_size); - if (NULL == recvfrag) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - - OPAL_OUTPUT_VERBOSE((100, mca_ptl_portals_component.portals_output, - "recving first frag of size %d for msg %d from %lu", - recvfrag->frag_size, - (int) hdr->hdr_match.hdr_msg_seq, - ev->initiator.pid)); - - recvfrag->frag_recv.frag_request = NULL; - ptl->super.ptl_match(&ptl->super, &recvfrag->frag_recv, - &hdr->hdr_match); - - return OMPI_SUCCESS; -} - -static inline int -mca_ptl_portals_process_frag_frag(struct mca_ptl_portals_module_t *ptl, - mca_ptl_base_header_t *hdr, - ptl_event_t *ev) -{ - size_t bytes_delivered; - mca_ptl_base_recv_request_t* request; - mca_ptl_portals_recv_frag_t *recvfrag; - - /* get a frag and fill it in */ - recvfrag = mca_ptl_portals_recv_get_frag(ptl, hdr, ev, - sizeof(mca_ptl_base_frag_header_t)); - if (NULL == recvfrag) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; - - recvfrag->frag_recv.frag_request = hdr->hdr_frag.hdr_dst_ptr.pval; - bytes_delivered = recvfrag->frag_size; - request = recvfrag->frag_recv.frag_request; - - if (recvfrag->frag_size > 0) { - struct iovec iov; - unsigned int iov_count = 1; - int free_after = 0; - ompi_convertor_t* convertor = - &(recvfrag->frag_recv.frag_base.frag_convertor); - - /* clone receive convertor and set to correct position */ - ompi_convertor_clone_with_position(&(request->req_recv.req_convertor), - convertor, 1, - &(hdr->hdr_frag.hdr_frag_offset)); - - iov.iov_base = recvfrag->frag_data; - iov.iov_len = recvfrag->frag_size; - ompi_convertor_unpack(convertor, &iov, &iov_count, - &bytes_delivered, &free_after ); - } - - OPAL_OUTPUT_VERBOSE((100, mca_ptl_portals_component.portals_output, - "recving secnd frag of size %d for msg %d, offset %lld from %lu, %p", - recvfrag->frag_size, - (int) hdr->hdr_match.hdr_msg_seq, - hdr->hdr_frag.hdr_frag_offset, - ev->initiator.pid, - request)); - - /* update request status */ - ptl->super.ptl_recv_progress(&ptl->super, - request, - recvfrag->frag_size, - bytes_delivered); - - return OMPI_SUCCESS; -} - -#endif diff --git a/ompi/mca/ptl/portals/src/ptl_portals_send.c b/ompi/mca/ptl/portals/src/ptl_portals_send.c deleted file mode 100644 index 5bb5f0dd2e..0000000000 --- a/ompi/mca/ptl/portals/src/ptl_portals_send.c +++ /dev/null @@ -1,259 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "portals_config.h" - -#include "ptl_portals.h" -#include "ptl_portals_compat.h" -#include "ptl_portals_send.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/base/ptl_base_sendreq.h" - -static void mca_ptl_portals_send_frag_construct(mca_ptl_portals_send_frag_t* frag); -static void mca_ptl_portals_send_frag_destruct(mca_ptl_portals_send_frag_t* frag); - -OBJ_CLASS_INSTANCE(mca_ptl_portals_send_frag_t, - mca_ptl_base_send_frag_t, - mca_ptl_portals_send_frag_construct, - mca_ptl_portals_send_frag_destruct); - -static void -mca_ptl_portals_send_frag_construct(mca_ptl_portals_send_frag_t* frag) -{ - frag->frag_vector[0].iov_base = &(frag->frag_send.frag_base.frag_header); - frag->frag_vector[0].iov_len = sizeof(mca_ptl_base_header_t); -} - - -static void -mca_ptl_portals_send_frag_destruct(mca_ptl_portals_send_frag_t* frag) -{ -} - - -int -mca_ptl_portals_send(struct mca_ptl_base_module_t *ptl_base, - struct mca_ptl_base_peer_t *ptl_peer, - struct mca_ptl_base_send_request_t *sendreq, - size_t offset, size_t size, int flags) -{ - mca_ptl_portals_module_t* ptl = (mca_ptl_portals_module_t*) ptl_base; - ptl_process_id_t *peer_id = (ptl_process_id_t*) ptl_peer; - mca_ptl_portals_send_frag_t* sendfrag; - mca_ptl_base_header_t* hdr; - int ret; - - if (sendreq->req_cached && offset == 0) { - sendfrag = (mca_ptl_portals_send_frag_t*)(sendreq+1); - } else { - opal_list_item_t *item; - OMPI_FREE_LIST_GET(&mca_ptl_portals_component.portals_send_frags, - item, ret); - if (NULL == item) return ret; - sendfrag = (mca_ptl_portals_send_frag_t *) item; - } - - /* initialize convertor */ - if (size > 0) { - ompi_convertor_t *convertor; - struct iovec iov; - unsigned int iov_count; - size_t max_data; - int rc; - - /* BWB - first frag, only need to call pack. Can call - personalize if we want to change options. On second frag, - need to clone with ompi_convertor_clone_with_position(), - then repersonalize. In either case, no need to free the - convertors or anything like that. Look in base_fragment - - there's a convertor in there.*/ - - if (offset == 0) { - convertor = &sendreq->req_send.req_convertor; - } else { - convertor = &sendfrag->frag_send.frag_base.frag_convertor; - ompi_convertor_clone_with_position(&sendreq->req_send.req_convertor, - convertor, 1, &offset); - } - - /* if data is contigous convertor will return an offset - * into users buffer - otherwise will return an allocated buffer - * that holds the packed data - */ - iov.iov_base = NULL; - iov.iov_len = size; - iov_count = 1; - max_data = size; - if((rc = ompi_convertor_pack( - convertor, - &iov, - &iov_count, - &max_data, - &(sendfrag->free_data))) < 0) { - return OMPI_ERROR; - } - sendfrag->frag_vector[1].iov_base = iov.iov_base; - sendfrag->frag_vector[1].iov_len = iov.iov_len; - sendfrag->frag_send.frag_base.frag_addr = iov.iov_base; - sendfrag->frag_send.frag_base.frag_size = iov.iov_len; - } else { - sendfrag->frag_send.frag_base.frag_addr = NULL; - sendfrag->frag_send.frag_base.frag_size = 0; - } - - /* setup message header */ - hdr = &sendfrag->frag_send.frag_base.frag_header; - - /* first frag - needs all matching */ - if (offset == 0) { - hdr->hdr_common.hdr_flags = flags; - hdr->hdr_match.hdr_contextid = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_msg_length = sendreq->req_send.req_bytes_packed; - hdr->hdr_match.hdr_msg_seq = sendreq->req_send.req_base.req_sequence; - - /* if an acknoweldgment is not required - can get by with a - shorter header */ - if ((flags & MCA_PTL_FLAGS_ACK) == 0) { - hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_MATCH; - sendfrag->frag_vector[0].iov_len = sizeof(mca_ptl_base_match_header_t); - } else { - hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_RNDV; - hdr->hdr_rndv.hdr_frag_length = sendfrag->frag_send.frag_base.frag_size; - hdr->hdr_rndv.hdr_src_ptr.lval = 0; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */ - hdr->hdr_rndv.hdr_src_ptr.pval = sendfrag; - sendfrag->frag_vector[0].iov_len = sizeof(mca_ptl_base_rendezvous_header_t); - } - - OPAL_OUTPUT_VERBOSE((100, mca_ptl_portals_component.portals_output, - "sending first frag of size %d for msg %lld to %lu", - sendfrag->frag_send.frag_base.frag_size, - sendreq->req_send.req_base.req_sequence, - peer_id->pid)); - - } else { - hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG; - sendfrag->frag_vector[0].iov_len = sizeof(mca_ptl_base_frag_header_t); - hdr->hdr_common.hdr_flags = flags; - hdr->hdr_frag.hdr_frag_offset = offset; - hdr->hdr_frag.hdr_frag_length = sendfrag->frag_send.frag_base.frag_size; - hdr->hdr_frag.hdr_dst_ptr = sendreq->req_peer_match; - - OPAL_OUTPUT_VERBOSE((100, mca_ptl_portals_component.portals_output, - "sending secnd frag of size %d for msg %lld, offset %lld to %lu, %p", - sendfrag->frag_send.frag_base.frag_size, - sendreq->req_send.req_base.req_sequence, - hdr->hdr_frag.hdr_frag_offset, - peer_id->pid, - sendreq->req_peer_match)); - - sendfrag->frag_send.frag_base.frag_size = size; - } - - /* fragment state */ - sendfrag->frag_send.frag_base.frag_owner = ptl_base; - sendfrag->frag_send.frag_request = sendreq; - sendfrag->frag_send.frag_base.frag_peer = ptl_peer; - - /* must update the offset after actual fragment size is determined - * before attempting to send the fragment - */ - mca_ptl_base_send_request_offset(sendreq, - sendfrag->frag_send.frag_base.frag_size); - - return mca_ptl_portals_send_frag(ptl, sendfrag); -} - - -int -mca_ptl_portals_process_send_event(ptl_event_t *ev) -{ - mca_ptl_portals_send_frag_t* frag = - (mca_ptl_portals_send_frag_t*) ev->md.user_ptr; - mca_ptl_base_header_t* hdr = - &(frag->frag_send.frag_base.frag_header); - - if (ev->type == PTL_EVENT_SEND_START) { - OPAL_OUTPUT_VERBOSE((100, mca_ptl_portals_component.portals_output, - "ptl event send start for msg %d", - (int) hdr->hdr_match.hdr_msg_seq)); - } else if (ev->type == PTL_EVENT_SEND_END) { - OPAL_OUTPUT_VERBOSE((100, mca_ptl_portals_component.portals_output, - "ptl event send end for msg %d", - (int) hdr->hdr_match.hdr_msg_seq)); - } else if (ev->type == PTL_EVENT_ACK) { - - if (frag->frag_send.frag_request == NULL) { - OPAL_OUTPUT_VERBOSE((100, mca_ptl_portals_component.portals_output, - "done sending ack for recv request %p to %lu", - hdr->hdr_ack.hdr_dst_match.pval, - ev->initiator.pid)); - assert(MCA_PTL_HDR_TYPE_ACK == hdr->hdr_common.hdr_type); - - /* if request is NULL, it's an ACK - just return the frag - to the pool */ - OMPI_FREE_LIST_RETURN(&mca_ptl_portals_component.portals_send_frags, - (opal_list_item_t*) frag); - } else { - bool frag_ack; - -#if OMPI_ENABLE_DEBUG - if (MCA_PTL_HDR_TYPE_MATCH == hdr->hdr_common.hdr_type || - MCA_PTL_HDR_TYPE_RNDV == hdr->hdr_common.hdr_type) { - OPAL_OUTPUT_VERBOSE((100, mca_ptl_portals_component.portals_output, - "done sending first frag for msg %d to %lu", - (int) hdr->hdr_match.hdr_msg_seq, - ev->initiator.pid)); - } else if (MCA_PTL_HDR_TYPE_FRAG == hdr->hdr_common.hdr_type) { - OPAL_OUTPUT_VERBOSE((100, mca_ptl_portals_component.portals_output, - "done sending secnd frag to req %p, offset %lld", - hdr->hdr_frag.hdr_dst_ptr.pval, - hdr->hdr_frag.hdr_frag_offset)); - } else { - opal_output(mca_ptl_portals_component.portals_output, - "unexpected send event hdr type: %d. aborting", - hdr->hdr_common.hdr_type); - abort(); - } -#endif - - /* it's a completion of a data fragment */ - frag_ack = (hdr->hdr_common.hdr_flags & MCA_PTL_FLAGS_ACK) ? - true : false; - - if (frag_ack == false) { - /* data frag is done and we aren't waiting on an ack. - complete it. if waiting for an ack, will be - completed when process_recv_event sees an ack */ - mca_ptl_portals_complete_send_event(frag); - } - } - - /* unlink memory descriptor */ - PtlMDUnlink(ev->md_handle); - } else { - opal_output_verbose(10, mca_ptl_portals_component.portals_output, - "*** Unknown event for msg %d: %d", - (int) hdr->hdr_match.hdr_msg_seq, ev->type); - } - - return OMPI_SUCCESS; -} diff --git a/ompi/mca/ptl/portals/src/ptl_portals_send.h b/ompi/mca/ptl/portals/src/ptl_portals_send.h deleted file mode 100644 index 96252a575a..0000000000 --- a/ompi/mca/ptl/portals/src/ptl_portals_send.h +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#ifndef MCA_PTL_PORTALS_SENDFRAG_H_ -#define MCA_PTL_PORTALS_SENDFRAG_H_ - -#include "ompi/mca/ptl/base/ptl_base_sendfrag.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" -#include "ompi/mca/ptl/base/ptl_base_sendreq.h" -#include "ptl_portals_recv.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - - struct mca_ptl_portals_send_frag_t { - mca_ptl_base_send_frag_t frag_send; - ptl_md_iovec_t frag_vector[2]; - int32_t free_data; - }; - typedef struct mca_ptl_portals_send_frag_t mca_ptl_portals_send_frag_t; - OBJ_CLASS_DECLARATION (mca_ptl_portals_send_frag_t); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -static inline int -mca_ptl_portals_send_frag(struct mca_ptl_portals_module_t *ptl, - mca_ptl_portals_send_frag_t* sendfrag) -{ - ptl_md_t md; - ptl_handle_md_t md_handle; - int ret; - - /* setup the send and go */ - md.start = sendfrag->frag_vector; - md.length = 2; /* header + data */ - md.threshold = PTL_MD_THRESH_INF; /* unlink based on protocol */ - md.max_size = 0; - md.options = PTL_MD_IOVEC; /* BWB - can we optimize? */ - md.user_ptr = sendfrag; - md.eq_handle = ptl->eq_handles[MCA_PTL_PORTALS_EQ_SEND]; - - /* make a free-floater */ - ret = PtlMDBind(ptl->ni_handle, - md, - PTL_UNLINK, - &md_handle); - if (ret != PTL_OK) { - opal_output(mca_ptl_portals_component.portals_output, - "PtlMDBind failed with error %d", ret); - return OMPI_ERROR; - } - - ret = PtlPut(md_handle, - PTL_ACK_REQ, - *((ptl_process_id_t*) sendfrag->frag_send.frag_base.frag_peer), - PTL_PORTALS_FRAG_TABLE_ID, - 0, /* ac_index */ - 0, /* match bits */ - 0, /* remote offset - not used */ - 0); /* hdr_data - not used */ - if (ret != PTL_OK) { - opal_output(mca_ptl_portals_component.portals_output, - "PtlPut failed with error %d", ret); - PtlMDUnlink(md_handle); - return OMPI_ERROR; - } - - return OMPI_SUCCESS; -} - - -static inline int -mca_ptl_portals_send_ack(struct mca_ptl_portals_module_t *ptl, - mca_ptl_portals_recv_frag_t* recvfrag) -{ - mca_ptl_base_header_t* hdr; - mca_ptl_portals_send_frag_t* sendfrag; - opal_list_item_t *item; - mca_ptl_base_recv_request_t* request = recvfrag->frag_recv.frag_request; - int ret; - - /* get a fragment */ - OMPI_FREE_LIST_GET(&mca_ptl_portals_component.portals_send_frags, - item, ret); - if (NULL == item) return ret; - sendfrag = (mca_ptl_portals_send_frag_t *) item; - - /* no payload */ - sendfrag->frag_vector[1].iov_base = NULL; - sendfrag->frag_vector[1].iov_len = 0; - - /* setup message header */ - hdr = &sendfrag->frag_send.frag_base.frag_header; - - hdr->hdr_ack.hdr_common.hdr_type = MCA_PTL_HDR_TYPE_ACK; - hdr->hdr_ack.hdr_common.hdr_flags = 0; - -#if OMPI_ENABLE_MEM_DEBUG - hdr->hdr_ack.hdr_dst_match.lval = 0; - hdr->hdr_ack.hdr_dst_addr.lval = 0; -#endif - - hdr->hdr_ack.hdr_src_ptr = - recvfrag->frag_recv.frag_base.frag_header.hdr_rndv.hdr_src_ptr; - hdr->hdr_ack.hdr_dst_match.pval = request; - hdr->hdr_ack.hdr_dst_addr.pval = request->req_recv.req_base.req_addr; - hdr->hdr_ack.hdr_dst_size = request->req_recv.req_bytes_packed; - - /* can ignore most of the fragment, but need to make sure the - request is NULL so that process_send_event knows it's an ack - completing */ - sendfrag->frag_send.frag_request = NULL; - sendfrag->frag_send.frag_base.frag_peer = - (struct mca_ptl_base_peer_t*) &(recvfrag->frag_source); - - sendfrag->frag_vector[0].iov_len = sizeof(mca_ptl_base_ack_header_t); - - OPAL_OUTPUT_VERBOSE((100, mca_ptl_portals_component.portals_output, - "sending ack for recv request %p", request)); - - return mca_ptl_portals_send_frag(ptl, sendfrag); -} - - -static inline void -mca_ptl_portals_complete_send_event(mca_ptl_portals_send_frag_t* frag) -{ - frag->frag_send.frag_base.frag_owner-> - ptl_send_progress(frag->frag_send.frag_base.frag_owner, - frag->frag_send.frag_request, - frag->frag_send.frag_base.frag_size); - - /* return frag to freelist if not part of request */ - if (frag->frag_send.frag_request->req_cached == false || - frag->frag_send.frag_base.frag_header.hdr_common.hdr_type == - MCA_PTL_HDR_TYPE_FRAG) { - - if (frag->free_data) { - free(frag->frag_vector[1].iov_base); - } - OMPI_FREE_LIST_RETURN(&mca_ptl_portals_component.portals_send_frags, - (opal_list_item_t*) frag); - } -} - -#endif /* MCA_PTL_PORTALS_SENDFRAG_H_ */ diff --git a/ompi/mca/ptl/portals/src/ptl_portals_stubs.c b/ompi/mca/ptl/portals/src/ptl_portals_stubs.c deleted file mode 100644 index d030b486b0..0000000000 --- a/ompi/mca/ptl/portals/src/ptl_portals_stubs.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" -#include "portals_config.h" - -#include "ptl_portals.h" - -/* BWB - README - BWB - README - BWB - README - BWB - README - BWB - * - * These are stub functions that return error so that the - * initialization code can be developed and the whole thing will - * link. This file will disappear once all functions are - * implemented. Do not implement any functions in this file. - * - * BWB - README - BWB - README - BWB - README - BWB - README - BWB */ - - diff --git a/ompi/mca/ptl/prof/.ompi_ignore b/ompi/mca/ptl/prof/.ompi_ignore deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ompi/mca/ptl/prof/Makefile.am b/ompi/mca/ptl/prof/Makefile.am deleted file mode 100644 index 89d3b98207..0000000000 --- a/ompi/mca/ptl/prof/Makefile.am +++ /dev/null @@ -1,60 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Use the top-level OpenMPI Makefile.options - - - -# According to the MCA spec, we have to make the output library here -# in the top-level directory, and it has to be named -# libompi_ssi_coll_ompi_basic.la - -AM_CPPFLAGS = \ - -I$(top_ompi_builddir)/include \ - -I$(top_ompi_builddir)/src/include \ - -I$(top_ompi_builddir)/src/ompi/event \ - -I$(top_ompi_srcdir)/src \ - -I$(top_ompi_srcdir)/src/include - -prof_la_SOURCES = ptl_prof.c ptl_prof.h \ - ptl_prof_component.c - -if OMPI_BUILD_ptl_prof_DSO -component_noinst = -component_install = mca_ptl_prof.la -else -component_noinst = libmca_ptl_prof.la -component_install = -endif - -mcacomponentdir = $(libdir)/openmpi -mcacomponent_LTLIBRARIES = $(component_install) -mca_ptl_prof_la_SOURCES = $(prof_la_SOURCES) -mca_ptl_prof_la_LIBADD = \ - $(LIBOMPI_LA) \ - $(top_ompi_builddir)/ompi/libmpi.la \ - $(top_ompi_builddir)/orte/liborte.la \ - $(top_ompi_builddir)/opal/libopal.la - -mca_ptl_prof_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_ptl_prof_la_SOURCES = $(prof_la_SOURCES) -libmca_ptl_prof_la_LIBADD = -libmca_ptl_prof_la_LDFLAGS = -module -avoid-version - diff --git a/ompi/mca/ptl/prof/configure.params b/ompi/mca/ptl/prof/configure.params deleted file mode 100644 index 6ebe53a276..0000000000 --- a/ompi/mca/ptl/prof/configure.params +++ /dev/null @@ -1,24 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Specific to this module - -PARAM_INIT_FILE=ptl_prof.c -PARAM_CONFIG_HEADER_FILE="prof_config.h" -PARAM_CONFIG_FILES="Makefile" diff --git a/ompi/mca/ptl/prof/ptl_prof.c b/ompi/mca/ptl/prof/ptl_prof.c deleted file mode 100644 index fac8b1b97d..0000000000 --- a/ompi/mca/ptl/prof/ptl_prof.c +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ptl_prof.h" -#include "ompi/class/ompi_bitmap.h" - -static int ptl_prof_add_procs_fn( struct mca_ptl_base_module_t* ptl, - size_t nprocs, - struct ompi_proc_t** procs, - struct mca_ptl_base_peer_t** peer, - struct ompi_bitmap_t* reachable ) -{ - return 0; -} - -static int ptl_prof_del_procs_fn( struct mca_ptl_base_module_t* ptl, - size_t nprocs, - struct ompi_proc_t** procs, - struct mca_ptl_base_peer_t** peer ) -{ - return 0; -} - -static int ptl_prof_finalize_fn( struct mca_ptl_base_module_t* ptl ) -{ - return 0; -} - -static int ptl_prof_send_fn( struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_peer_t* ptl_base_peer, - struct mca_pml_base_send_request_t* request, - size_t offset, - size_t size, - int flags ) -{ - return 0; -} - -static int ptl_prof_put_fn( struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_peer_t* ptl_base_peer, - struct mca_pml_base_send_request_t* request, - size_t offset, - size_t size, - int flags ) -{ - return 0; -} - -static int ptl_prof_get_fn( struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_peer_t* ptl_base_peer, - struct mca_pml_base_recv_request_t* request, - size_t offset, - size_t size, - int flags ) -{ - return 0; -} - -static void ptl_prof_matched_fn( struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_recv_frag_t* request ) -{ -} - -static int ptl_prof_request_init_fn( struct mca_ptl_base_module_t* ptl, - struct mca_pml_base_send_request_t* request ) -{ - return 0; -} - -static void ptl_prof_request_fini_fn( struct mca_ptl_base_module_t* ptl, struct mca_pml_base_send_request_t* request ) -{ -} - -static bool ptl_prof_match_fn( struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_recv_frag_t* recv_frag, - struct mca_ptl_base_match_header_t* header ) -{ - return true; -} - -static void ptl_prof_send_progress_fn( struct mca_ptl_base_module_t* ptl, - struct mca_pml_base_send_request_t* send_request, - size_t bytes_sent ) -{ -} - -static void ptl_prof_recv_progress_fn( struct mca_ptl_base_module_t* ptl, - struct mca_pml_base_recv_request_t* recv_request, - size_t bytes_received, - size_t bytes_delivered ) -{ -} - -/* The default profiling PTL. We will canibalize all others PTL - * except this one. It's just a simple way to have the control function - * called. - */ -mca_ptl_prof_t mca_ptl_prof = { - { NULL, - 0, /* maximum size of request cache for this PTL */ - 0, /* number of bytes required by PTL for request cache */ - 0, /* ptl_frag_first_size */ - 0, /* ptl_frag_min_size */ - 0, /* ptl_frag_max_size */ - 0, /* ptl_exclusivity */ - 0, /* ptl_latency */ - 0, /* ptl_bandwidth */ - MCA_PTL_PUT | MCA_PTL_GET, /* ptl flags */ - ptl_prof_add_procs_fn, - ptl_prof_del_procs_fn, - ptl_prof_finalize_fn, - ptl_prof_send_fn, - ptl_prof_put_fn, - ptl_prof_get_fn, - ptl_prof_matched_fn, - ptl_prof_request_init_fn, - ptl_prof_request_fini_fn, - ptl_prof_match_fn, - ptl_prof_send_progress_fn, - ptl_prof_recv_progress_fn, - NULL, /* the stack :) */ - NULL, - } -}; - -static void ptl_prof_construct(mca_ptl_prof_t* ptl) -{ - ptl->super.ptl_add_procs = ptl_prof_add_procs_fn; - ptl->super.ptl_del_procs = ptl_prof_del_procs_fn; - ptl->super.ptl_finalize = ptl_prof_finalize_fn; - ptl->super.ptl_send = ptl_prof_send_fn; - ptl->super.ptl_put = ptl_prof_put_fn; - ptl->super.ptl_get = ptl_prof_get_fn; - ptl->super.ptl_matched = ptl_prof_matched_fn; - ptl->super.ptl_request_init = ptl_prof_request_init_fn; - ptl->super.ptl_request_fini = ptl_prof_request_fini_fn; - ptl->super.ptl_match = ptl_prof_match_fn; - ptl->super.ptl_send_progress = ptl_prof_send_progress_fn; - ptl->super.ptl_recv_progress = ptl_prof_recv_progress_fn; - ptl->super.ptl_stack = NULL; -} - -static void ptl_prof_destruct(mca_ptl_prof_t* ptl) -{ - /* deregistering the profiling ids from the profiling layer */ -} - -OBJ_CLASS_INSTANCE( mca_ptl_prof_t, opal_object_t, - ptl_prof_construct, - ptl_prof_destruct ); - diff --git a/ompi/mca/ptl/prof/ptl_prof.h b/ompi/mca/ptl/prof/ptl_prof.h deleted file mode 100644 index c1bc068a62..0000000000 --- a/ompi/mca/ptl/prof/ptl_prof.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * */ -/** - * * @file - * */ - -#ifndef PTL_PROF_H_HAS_BEEN_INCLUDED -#define PTL_PROF_H_HAS_BEEN_INCLUDED - -#include "opal/event/event.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/pml/base/pml_base_recvreq.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif -typedef struct mca_ptl_prof mca_ptl_prof_t; -typedef struct mca_ptl_prof_module_1_0_0 mca_ptl_prof_module_1_0_0_t; - -/** - * PROF PTL module. - */ -struct mca_ptl_prof_module_1_0_0 { - mca_ptl_base_component_t super; /**< base PTL module */ - mca_ptl_prof_t** prof_ptls; /**< array of available PTLs */ - uint32_t prof_num_ptls; /**< number of ptls actually used */ - uint32_t prof_max_ptls; /**< maximum number of ptls - available kernel ifs */ - uint32_t prof_buf_size; /**< the size of the internal buffer used to profile each PTL */ -}; -/** - * Profiling module. - */ -struct mca_ptl_prof { - mca_ptl_base_module_t super; - uint32_t prof_create_id; - uint32_t prof_start_send_id; - uint32_t prof_start_recv_id; - uint32_t prof_complete_id; -}; -OBJ_CLASS_DECLARATION(mca_ptl_prof_t); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif /* PTL_PROF_H_HAS_BEEN_INCLUDED */ diff --git a/ompi/mca/ptl/prof/ptl_prof_component.c b/ompi/mca/ptl/prof/ptl_prof_component.c deleted file mode 100644 index c7e0921b95..0000000000 --- a/ompi/mca/ptl/prof/ptl_prof_component.c +++ /dev/null @@ -1,130 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#include -#include -#include -#include -#include -#include -#include -#include - -#include "constants.h" -#include "opal/event/event.h" -#include "opal/util/if.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/pml/base/pml_base_sendreq.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" -#include "opal/mca/base/mca_base_param.h" -#include "ompi/mca/pml/base/pml_base_module_exchange.h" -#include "ptl_prof.h" - -static int mca_ptl_prof_component_open_fn( void ); -static int mca_ptl_prof_component_close_fn( void ); -static struct mca_ptl_base_module_t** ptl_prof_component_init_fn( - int *num_ptls, - bool enable_progress_threads, bool enable_mpi_threads); -static int ptl_prof_component_control_fn( int param, void* value, size_t size ); - -mca_ptl_prof_module_1_0_0_t mca_ptl_prof_component = { - { - /* First, the mca_base_module_t struct containing meta information - about the module itself */ - - { - /* Indicate that we are a pml v1.0.0 module (which also implies a - specific MCA version) */ - - MCA_PTL_BASE_VERSION_1_0_0, - - "prof", /* MCA module name */ - OMPI_MAJOR_VERSION, /* MCA module major version */ - OMPI_MINOR_VERSION, /* MCA module minor version */ - OMPI_RELEASE_VERSION, /* MCA module release version */ - mca_ptl_prof_component_open_fn, /* module open */ - mca_ptl_prof_component_close_fn /* module close */ - }, - - /* Next the MCA v1.0.0 module meta data */ - - { - /* Whether the module is checkpointable or not */ - true - }, - - ptl_prof_component_init_fn, - ptl_prof_component_control_fn, - NULL, - } -}; - -/** - * This is the moment to grab all existing modules, and then replace their - * functions with my own. In same time the ptl_stack will be initialized - * with the pointer to a ptl automatically generate, which will contain - * the correct pointers. - */ -static int ptl_prof_component_control_fn( int param, void* value, size_t size ) -{ - /* check in mca_ptl_base_modules_initialized */ - return 0; -} - -/* We have to create at least one PTL, just to allow the PML to call the control - * function associated with this PTL. - */ -extern mca_ptl_prof_t mca_ptl_prof; -static struct mca_ptl_base_module_t** ptl_prof_component_init_fn( - int *num_ptls, - bool enable_progress_threads, - bool enable_mpi_threads) -{ - mca_ptl_prof_t** ptl_array; - - *num_ptls = 1; - ptl_array = (mca_ptl_prof_t**)malloc( (*num_ptls) * sizeof(mca_ptl_prof_t*) ); - ptl_array[0] = &mca_ptl_prof; - mca_ptl_prof.super.ptl_component = (mca_ptl_base_component_t*)&mca_ptl_prof_component; - mca_ptl_prof_component.prof_ptls = ptl_array; - return (struct mca_ptl_base_module_t**)ptl_array; -} - -static int mca_ptl_prof_component_open_fn( void ) -{ - return OMPI_SUCCESS; -} - -static int mca_ptl_prof_component_close_fn( void ) -{ -#if 0 - /* JMS This should only occur if this component was selected -- if - it wasn't selected, it appears that the PTL base takes care of - freeing this (ptl_base_select.c line 124) */ - if( NULL != mca_ptl_prof_component.prof_ptls ) { - free( mca_ptl_prof_component.prof_ptls ); - mca_ptl_prof_component.prof_ptls = NULL; - } -#endif - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/ptl/ptl.h b/ompi/mca/ptl/ptl.h deleted file mode 100644 index 100fb5c072..0000000000 --- a/ompi/mca/ptl/ptl.h +++ /dev/null @@ -1,716 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * P2P Transport Layer (PTL) - * - * An MCA component type that allows the PML (mca_pml_t) to support a - * variety of network transports concurrently. The PTL layer is - * responsible for the reliable delivery of message fragments, while - * the assignment and scheduling of fragments to PTLs is handled by - * the upper layer. - * - * PTL Initialization: - * - * During library initialization, all available PTL components are - * loaded and opened via their mca_base_open_component_fn_t - * function. The PTL open function should register any mca parameters - * used to tune/adjust the behaviour of the ptl (mca_base_param_register_int(), - * mca_base_param_register_string()). Note that the open function may fail - * if the resources (e.g. shared libraries, etc) required by the network - * transport are not available. - * - * The mca_ptl_base_component_init_fn_t() is then called for each of the - * components that are succesfully opened. The component init function may - * return either: - * - * (1) a NULL list of PTL instances if the transport is not available, - * (2) a list containing a single PTL instance, where the PTL provides - * a layer of abstraction over multiple physical devices (e.g. NICs), - * (3) a list containing multiple PTL instances where each PTL instance - * corresponds to a single physical device. - * - * If multiple network devices are available for a given transport, - * the preferred approach is (3) above. In this case, the PML layer - * will handle scheduling across the available resources, and - * fail-over in the event of a PTL failure. If the second approach is - * used, and a single PTL instance abstracts multiple physical - * devices, the PTL assumes all responsibility for scheduling/failover - * within those devices. - * - * During module initialization, the module should post any addressing - * information required by its peers. An example would be the TCP - * listen port opened by the TCP module for incoming connection - * requests. This information is published to peers via the - * mca_pml_base_modex_send() interface. Note that peer information is not - * guaranteed to be available via mca_pml_base_modex_recv() during the - * module's init function. However, it will be available during - * PTL selection (mca_ptl_base_add_proc_fn_t()). - * - * PTL Selection: - * - * The PML builds an ordered list of the available PTL instances sorted - * by their exclusivity ranking. This is a relative ranking that is used - * to determine the set of PTLs that may be used to reach a given destination. - * During startup the PTL modules are queried via their - * mca_ptl_base_add_proc_fn_t() to determine if they are able to reach - * a given destination. The PTL module with the highest ranking that - * returns success is selected. Subsequent PTL modules are selected only - * if they have the same exclusivity ranking. - * - * An example of how this might be used: - * - * PTL Exclusivity Comments - * -------- ----------- ------------------ - * LO 100 Selected exclusively for local process - * SM 50 Selected exclusively for other processes on host - * IB 0 Selected based on network reachability - * IB 0 Selected based on network reachability - * TCP 0 Selected based on network reachability - * TCP 0 Selected based on network reachability - * - * When a PTL module is selected, it may choose to optionally return a - * pointer to an an mca_ptl_base_peer_t data structure to the PML. - * This pointer is treated as an opaque handle by the PML and is - * returned to the PTL on subsequent data transfer calls to the - * corresponding destination process. The actual contents of the - * data structure are defined on a per PTL basis, and may be used to - * cache addressing or connection information, such as a TCP socket - * or IB queue pair. - * - * Send Path: - * - * When multiple PTLs are available to reach a given destination, - * a single request (that is large enough) will be split across the - * available PTLs. For each destination process, the PML maintains two - * list of PTLs, one set of PTLs that exhibit the lowest latency, and - * a second set that are used for bulk data transfer. The set of low - * latency PTLs are used in a round-robin fashion to schedule the first - * fragment of a message, while the remainder of the message will be - * scheduled across the second set based on the bandwidth of the available - * PTLs. - * - * The PML is responsible for managing the state (allocation, initialization, - * and release) of send request descriptors (mca_ptl_base_send_request_t). - * However, to minimize the latency associated with allocating resources to - * a request, the PML provides the capability to cache send requests - * descriptors on a per-PTL basis. Each PTL exports two variables - * (ptl_cache_size and ptl_cache_bytes) that control this behaviour. The - * variable ptl_cache_size specifies the maximum size of the cache. If a - * request cannot be provided from the cache, a request descriptor from the - * global pool will be used instead, and the req_cached attribute of the - * request set to false. The request cache initially starts off empty and - * is grown by the PML up to the specified limit. The PTL variable, - * ptl_cache_bytes, can be used to specify that additional memory should be - * allocated by the PML in one contigous block along with the base send request - * (mca_ptl_base_send_request_t) for use by the PTL. The PTLs ptl_request_init() - * method is then called to initialize this additional memory and associating - * any PTL specific resources with the request. If a request is removed from - * the cache, the ptl_request_fini() method will be called to allow the PTL - * to release any resources associated with the request descriptor. - * - * When the request is started, the PML will call the selected PTL's - * ptl_send() method with up to the PTL's threshold (ptl_first_frag_size) - * bytes of the request. The PTL should attempt to deliver the requested - * number of bytes. However, this may not be possible due to resource - * contraints or datatype alignment/offset. The PTL is responsible for - * updating the number of bytes actually fragmented and queued for delivery - * on the send request (mca_ptl_base_send_request.req_offset) to reflect - * the current offset into the send buffer. - * - * If the request is larger than ptl_first_frag_size, the remainder of - * the request will be scheduled upon an acknowledgment from the peer - * that the request has been matched on the receive side. The PTL receiving - * the fragment is responsible for generating an acknowledgment when the - * MCA_PTL_FLAGS_ACK bit is set in the flags field of the fragment - * header. The PTL receiving an ack is responsible for updating the - * the send request descriptor to point to the matched recv descriptor - * and the destination buffer address at the remote process. The address of - * the recv descriptor is sent back in the header of subsequent fragments - * to avoid the cost of matching the additional fragments at the receiver - * while the remote address of the destination buffer may be used in - * subsequent data transfer operations to support RDMA put operations. - * - * On receipt of an acknowedgment the PTL should call the ptl_send_progress() - * function to update the status (number of bytes delivered) of the send request. - * Note that although this function is associated with the PTL, it is provided/ - * set by the PML during initialization. - * - * If this was the initial fragment of a large message, the PML will schedule - * the remaining fragments during this callback. For subsequent fragments - * the PML will call the PTLs ptl_put() interface function. Since the destination - * address in the remote process is available, RDMA put operations could be used - * if supported by the underlying network transport. Note that currently the PML - * makes no other distinction between ptl_send/ptl_put. - * - * As subsequent fragments are completed by the PTLs, the ptl_send_progress() - * function should be called to update the status of request. Note that this - * may be based on local completion semantics or could require a PTL specific - * acknowledgment based on the underlying transfer protocol. Upon completion, - * the PTL is responsible for managing all resources associated with send - * fragments and their return to internal caches/free lists. - * - * Recv Path: - * - * The PML sets two additional callback functions on the PTL during - * initialization. These callbacks are used by the PTL to notify the - * PML of receipt of the initial fragment of a new message (ptl_match) - * and to update the status of a pending receive as fragment(s) complete - * (ptl_recv_progress). - * - * The first fragment of a message is sent with a header type of - * mca_ptl_base_match_header_t. When a header of this type is received, - * the PTL should call the ptl_match() function as soon as the entire header - * is available, to determine if a matching receive has been posted. When a - * matching receive is posted the PTLs ptl_matched() function is called to - * process the fragment and if required generate an acknowledgment. Note that - * this call (ptl_matched()) may occur during the call to ptl_match() or at - * a later point in time if a matching recv has not yet been posted or MPI - * ordering constraints are not satisfied. - * - * Prior to calling ptl_matched(), the PML updates the recv fragment descriptor - * (mca_ptl_recv_frag_t) to point to the matching recv request. If the data - * associated with the fragment has been received prior to the ptl_matched() - * call, the PTL should utilize the datatype convertor associated with the - * recv fragment to copy the data into the users buffer. Note that the datatype - * convertor provides the capability to unpack the fragment at an arbitrary - * (e.g. fragment based) offset into the destination buffer. On completion of - * the data copy, the PTL should call the ptl_recv_progress() function, to update - * the request completion status. - * - * If the initial fragment is matched prior to receiving any data associated - * with the fragment, or in the case of subsequent fragments, the datatype - * convertor may be used to generate an iovec array of contiguous blocks - * pointing into the destination buffer, which can be used for zero-copy - * receives if the underlying transport supports scatter/gather operations. - * - * The ptl_matched() function should additionally generate, if required, an - * ack to the source process. An ack is required if the MCA_PTL_FLAGS_ACK - * bit is set by the source in the flags field of the initial message header. - * As described above, the generated ack should contain a pointer to the matched - * receive request, along with the pointer to the destination buffer. - * - * On receipt of the ack, the source will schedule any remaining fragments. - * The selected PTLs should generate the remaining fragments with an - * mca_ptl_base_frag_header_t, which contains a placeholder for a pointer - * to the matched receive request. This allows the receiver to avoid calling the - * matching logic for subsequent fragments. On completion of these fragments, - * the PTL should call the ptl_recv_progress() function to update the - * request completion status. As fragments are completed, the PTL is responsible - * for freeing any resources associated with recv fragment descriptors and/or - * returning them to internal free lists/caches. - * - * Progress: - * - * By default, the library provides for polling based progress of outstanding - * requests. The PTL component exports an interface function (ptlm_progress) - * that is called in a polling mode by the PML during calls into the MPI - * library. Note that the ptlm_progress() function is called on the PTL component - * rather than each PTL instance. This implies that the PTL author is responsible - * for iterating over the pending operations in each of the PTL modules associated - * with the component. - * - * On platforms where threading support is provided, the library provides the - * option of building with asynchronous threaded progress. In this case, the PTL - * author is responsible for providing a thread to progress pending operations. - * A thread is associated with the PTL component/module such that transport specific - * functionality/APIs may be used to block the thread until a pending operation - * completes. This thread MUST NOT poll for completion as this would oversubscribe - * the CPU. - * - * Note that in the threaded case the PML may choose to use a hybrid approach, - * such that polling is implemented from the user thread for a fixed number of - * cycles before relying on the background thread(s) to complete requests. If - * possible the PTL should support the use of both modes concurrently. - * - */ - -/* Thses are unprotected because if the pml is direct called, pml.h - has a dependencies on ptl.h and must have ptl.h fully included - before pml.h is parsed. It's weird, but there isn't a better way - without doing some strange type forward declarations. */ -#include "opal/mca/mca.h" -#include "ompi/mca/pml/pml.h" - -#ifndef MCA_PTL_H -#define MCA_PTL_H - -#include "ompi/types.h" - -/* - * PTL types - */ - -struct mca_ptl_base_module_t; -struct mca_ptl_base_peer_t; -struct mca_ptl_base_fragment_t; -struct mca_ptl_base_recv_request_t; -struct mca_ptl_base_send_request_t; -struct mca_ptl_base_recv_frag_t; -struct mca_ptl_base_send_frag_t; -struct mca_ptl_base_match_header_t; - -typedef uint64_t mca_ptl_sequence_t; -typedef uint64_t mca_ptl_tstamp_t; -typedef struct opal_list_t mca_ptl_queue_t; - -typedef enum { - MCA_PTL_ENABLE -} mca_ptl_control_t; - -/** - * PTL flags - */ -#define MCA_PTL_PUT 1 -#define MCA_PTL_GET 2 - -/* - * PTL component interface functions and datatype. - */ - -/** - * MCA->PTL Intializes the PTL component and creates specific PTL - * module(s). - * - * @param num_ptls (OUT) Returns the number of ptl instances created, or 0 - * if the transport is not available. - * - * @param enable_progress_threads (IN) Whether this component is - * allowed to run a hidden/progress thread or not. - * - * @param enable_mpi_threads (IN) Whether support for multiple MPI - * threads is enabled or not (i.e., MPI_THREAD_MULTIPLE), which - * indicates whether multiple threads may invoke this component - * simultaneously or not. - * - * @return Array of pointers to PTL modules, or NULL if the transport - * is not available. - * - * During component initialization, the PTL component should discover - * the physical devices that are available for the given transport, - * and create a PTL instance to represent each device. Any addressing - * information required by peers to reach the device should be published - * during this function via the mca_pml_base_modex_send() interface. - * - */ -typedef struct mca_ptl_base_module_t** (*mca_ptl_base_component_init_fn_t)( - int *num_ptls, - bool enable_progress_threads, - bool enable_mpi_threads -); - - -/** - * MCA->PTL Called to dynamically change a component parameter. - * - * @param flag (IN) Parameter to change. - * @param value (IN) Optional parameter value. - * - * @return OMPI_SUCCESS or error code on failure. - * - * The only supported parameter is currently MCA_PTL_ENABLE, - * which can be used by the PML to enable/disable forwarding - * by the PTL. - */ -typedef int (*mca_ptl_base_component_control_fn_t)( - int param, - void* value, - size_t size -); - - -/** - * MCA->PTL Called to progress outstanding requests for - * non-threaded polling environments. - * - * @param tstamp Current time. - * @return OMPI_SUCCESS or error code on failure. - */ -typedef int (*mca_ptl_base_component_progress_fn_t)( - mca_ptl_tstamp_t tstamp -); - - -/** - * PTL component descriptor. Contains component version information - * and component open/close/init functions. - */ - -struct mca_ptl_base_component_1_0_0_t { - mca_base_component_t ptlm_version; - mca_base_component_data_1_0_0_t ptlm_data; - - mca_ptl_base_component_init_fn_t ptlm_init; - mca_ptl_base_component_control_fn_t ptlm_control; - mca_ptl_base_component_progress_fn_t ptlm_progress; -}; -typedef struct mca_ptl_base_component_1_0_0_t mca_ptl_base_component_1_0_0_t; -typedef struct mca_ptl_base_component_1_0_0_t mca_ptl_base_component_t; - - -/* - * PTL instance interface functions and datatype. - */ - - -/** - * MCA->PTL Clean up any resources held by PTL instance before the - * module is unloaded. - * - * @param ptl (IN) PTL instance. - * - * Prior to unloading a PTL module, the MCA framework will call - * the PTL finalize method of the module. Any resources held by - * the PTL should be released and if required the memory corresponding - * to the PTL module freed. - * - */ -typedef int (*mca_ptl_base_module_finalize_fn_t)( - struct mca_ptl_base_module_t* ptl -); - -/** - * PML->PTL notification of change in the process list. - * - * @param ptl (IN) PTL instance - * @param nprocs (IN) Number of processes - * @param procs (IN) Set of processes - * @param peer (OUT) Set of (optional) mca_ptl_base_peer_t instances returned by PTL. - * @param reachable (OUT) Bitmask indicating set of peer processes that are reachable by this PTL. - * @return OMPI_SUCCESS or error status on failure. - * - * The mca_ptl_base_module_add_procs_fn_t() is called by the PML to - * determine the set of PTLs that should be used to reach each process. - * Any addressing information exported by the peer via the mca_pml_base_modex_send() - * function should be available during this call via the corresponding - * mca_pml_base_modex_recv() function. The PTL may utilize this information to - * determine reachability of each peer process. - * - * For each process that is reachable by the PTL, the bit corresponding to the index - * into the proc array (nprocs) should be set in the reachable bitmask. The PML - * provides the PTL the option to return a pointer to a data structure defined - * by the PTL that is returned to the PTL on subsequent calls to the PTL data - * transfer functions (e.g ptl_send). This may be used by the PTL to cache any addressing - * or connection information (e.g. TCP socket, IP queue pair). - */ -typedef int (*mca_ptl_base_module_add_procs_fn_t)( - struct mca_ptl_base_module_t* ptl, - size_t nprocs, - struct ompi_proc_t** procs, - struct mca_ptl_base_peer_t** peer, - struct ompi_bitmap_t* reachable -); - -/** - * PML->PTL notification of change to the process list. - * - * @param ptl (IN) PTL instance - * @param nprocs (IN) Number of processes - * @param proc (IN) Set of processes - * @param peer (IN) Set of peer addressing information. - * @return Status indicating if cleanup was successful - * - * When the process list changes, the PML notifies the PTL of the - * change, to provide the opportunity to cleanup or release any - * resources associated with the peer. - */ -typedef int (*mca_ptl_base_module_del_procs_fn_t)( - struct mca_ptl_base_module_t* ptl, - size_t nprocs, - struct ompi_proc_t** procs, - struct mca_ptl_base_peer_t** -); - -/** - * PML->PTL Initialize a send request for use by the PTL. - * - * @param ptl (IN) PTL instance - * @param request (IN) Pointer to allocated request. - * - * To reduce latency (number of required allocations), the PML allocates up - * to ptl_cache_bytes of additional space contigous w/ the base send request. - * This space may be used by the PTL for additional control information (e.g. - * first fragment descriptor). - * - * The ptl_request_init() function is called by the PML when requests are - * allocated to the PTLs cache. These requests will be cached by the PML - * on completion and re-used by the same PTL w/out additional calls to - * ptl_request_init(). - * - * If the cache size is exceeded, the PML may pass requests to ptl_send/ptl_put - * that have been taken from the global pool and have not been initialized by the - * PTL. These requests will have the req_cached attribute set to false. - * - */ -typedef int (*mca_ptl_base_module_request_init_fn_t)( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_send_request_t* request -); - - -/** - * PML->PTL Cleanup any resources that may have been associated with the - * request by the PTL. - * - * @param ptl (IN) PTL instance - * @param request (IN) Pointer to allocated request. - * - * The ptl_request_fini function is called when the PML removes a request - * from the PTLs cache (due to resource constraints). This routine provides - * the PTL the chance to cleanup/release any resources cached on the send - * descriptor by the PTL. - */ - -typedef void (*mca_ptl_base_module_request_fini_fn_t)( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_send_request_t* request -); - -/** - * PML->PTL Initiate a send to the peer. - * - * @param ptl (IN) PTL instance - * @param ptl_base_peer (IN) PTL peer addressing - * @param request (IN) Send request - * @param offset Current offset into packed/contiguous buffer. - * @param size (IN) Number of bytes PML is requesting PTL to deliver, - * @param flags (IN) Flags that should be passed to the peer via the message header. - * @param request (OUT) OMPI_SUCCESS if the PTL was able to queue one or more fragments - * - * The PML implements a rendevouz protocol, with up to the PTL threshold - * (ptl_first_frag_size) bytes of the message sent in eager send mode. The ptl_send() - * function is called by the PML to initiate the send of the first message fragment. - * - * The PTL is responsible for updating the current data offset (req_offset) in the - * request to reflect the actual number of bytes fragmented. This may be less than - * the requested size, due to resource constraints or datatype alighnment/offset. If - * an acknowledgment is required, the MCA_PTL_FLAGS_ACK bit will be set in the - * flags parameter. In this case, the PTL should not call ptl_send_progress() function - * to indicate completion of the fragment until the ack is received. For all other - * fragments ptl_send_progress() may be called based on local completion semantics. - */ -typedef int (*mca_ptl_base_module_send_fn_t)( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_peer_t* ptl_base_peer, - struct mca_ptl_base_send_request_t* request, - size_t offset, - size_t size, - int flags -); - -/** - * PML->PTL Initiate a put to the peer. - * - * @param ptl (IN) PTL instance - * @param ptl_base_peer (IN) PTL peer addressing - * @param request (IN) Send request - * @param offset Current offset into packed/contiguous buffer. - * @param size (IN/OUT) Number of bytes PML is requesting PTL to deliver, - * PTL returns number of bytes sucessfully fragmented - * @param flags (IN) Flags that should be passed to the peer via the message header. - * @param request (OUT) OMPI_SUCCESS if the PTL was able to queue one or more fragments - * - * When the message exceeds the PTLs initial fragment size (ptl_first_frag_size), - * the PML schedules the remainder of the message after an ack is received for - * the first fragment. When the remaining fragments are scheduled the PML calls the - * the ptl_put() I/F function rather than ptl_send(), to indicate that the address of - * the destination buffer at the remote process is available, allowing for an RDMA put - * if supported by the underlying transport. - * - * The PTL is responsible for updating the current data offset (req_offset) in the - * request to reflect the actual number of bytes fragmented. This may be less than - * the requested size, due to resource constraints or datatype alighnment/offset. - * The PTL must call the ptl_send_progress() function to indicate completion of each - * fragment. - */ - -typedef int (*mca_ptl_base_module_put_fn_t)( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_peer_t* ptl_base_peer, - struct mca_ptl_base_send_request_t* request, - size_t offset, - size_t size, - int flags -); - - -/** - * PML->PTL Initiate a get from a peer. (NOT IMPLEMENTED) - * - * @param ptl (IN) PTL instance - * @param ptl_base_peer (IN) PTL peer addressing - * @param request (IN) Recv request - * @param offset Current offset into packed/contiguous buffer. - * @param size (IN/OUT) Number of bytes PML is requesting PTL to pull from peer, - * PTL returns number of bytes sucessfully fragmented. - * @param flags (IN) - * @param request (OUT) OMPI_SUCCESS if the PTL was able to queue one or more fragments - * - * The PML does NOT currently utilize this I/F. - */ - -typedef int (*mca_ptl_base_module_get_fn_t)( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_peer_t* ptl_base_peer, - struct mca_ptl_base_recv_request_t* request, - size_t offset, - size_t size, - int flags -); - -/** - * PTL->PML Notification from the PTL to the PML that a new fragment - * has arrived and can be matched against posted receives. - * - * @param ptl (IN) PTL instance - * @param recv_frag Receive fragment - * @param header (IN) Message header - * - * The ptl_match() function is called by the PTL on receipt of an - * initial fragment of a new message. The PML sets a default - * matching function on the PTL (ptl_match) when the PTL is initialized. - * This function attempts to match the header corresponding to the - * receive fragment to posted receives. When a match is made, the - * PTLs ptl_matched() function is called. Note that this may occur - * during the call to ptl_match(), or later in time if a matching receive - * has not yet been posted or the receive fragment is out-of-order. - */ -typedef bool (*mca_ptl_base_module_match_fn_t)( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_recv_frag_t* recv_frag, - struct mca_ptl_base_match_header_t* header -); - - -/** - * PML->PTL Notification from the PML to the PTL that a receive - * has been posted and matched against the indicated fragment. - * - * @param ptl (IN) PTL instance - * @param recv_frag Matched fragment - * - * The ptl_matched() function is called by the PML when a fragment - * is matched to a posted receive. This may occur during a call to - * ptl_match() if the receive is matched, or at a later point in time - * when a matching receive is posted. - * - * When this routine is called, the PTL is responsible for generating - * an acknowledgment to the peer if the MCA_PTL_FLAGS_ACK - * bit is set in the original fragment header. Additionally, the PTL - * is responsible for transferring any data associated with the fragment - * into the users buffer utilizing the datatype engine, and notifying - * the PML that the fragment has completed via the ptl_recv_progress() - * function. - */ - -typedef void (*mca_ptl_base_module_matched_fn_t)( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_recv_frag_t* request -); - -/** - * PTL->PML Notification from the PTL to the PML that a fragment - * has completed (e.g. been successfully delivered into users buffer) - * - * @param ptr(IN) PTL instance - * @param recv_request (IN) Receive Request - * @param bytes_received (IN) Number of bytes received from peer. - * @param bytes_delivered (IN) Number of bytes delivered to application. - * - * The PML sets this function pointer during module initialization - * to allow the PTL to make upcalls back into the PML as fragments - * complete. - */ -typedef void (*mca_ptl_base_module_recv_progress_fn_t)( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_recv_request_t* recv_request, - size_t bytes_received, - size_t bytes_delivered -); - -/** - * PTL->PML Notification from the PTL to the PML that a fragment - * has completed (e.g. been successfully delivered to peer) - * - * @param ptr(IN) PTL instance - * @param send_request (IN) Send Request - * @param bytes_sent (IN) Number of bytes sent to peer. - * - * The PML sets this function pointer during module initialization - * to allow the PTL to make upcalls back into the PML as fragments - * complete. - */ - -typedef void (*mca_ptl_base_module_send_progress_fn_t)( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_send_request_t* send_request, - size_t bytes_sent -); - -/** - * PTL instance interface functions and attributes. - */ -struct mca_ptl_base_module_t { - - /* PTL common attributes */ - mca_ptl_base_component_t* ptl_component; /**< pointer back to the PTL component structure */ - size_t ptl_cache_size; /**< maximum size of request cache for this PTL */ - size_t ptl_cache_bytes; /**< number of bytes required by PTL for request cache */ - size_t ptl_first_frag_size; /**< maximum size of first fragment -- eager send */ - size_t ptl_min_frag_size; /**< threshold below which the PTL will not fragment */ - size_t ptl_max_frag_size; /**< maximum fragment size supported by the PTL */ - uint32_t ptl_exclusivity; /**< indicates this PTL should be used exclusively */ - uint32_t ptl_latency; /**< relative ranking of latency used to prioritize ptls */ - uint32_t ptl_bandwidth; /**< bandwidth (Mbytes/sec) supported by each endpoint */ - uint32_t ptl_flags; /**< flags (put/get...) */ - - /* PML->PTL function table */ - mca_ptl_base_module_add_procs_fn_t ptl_add_procs; - mca_ptl_base_module_del_procs_fn_t ptl_del_procs; - mca_ptl_base_module_finalize_fn_t ptl_finalize; - mca_ptl_base_module_send_fn_t ptl_send; - mca_ptl_base_module_put_fn_t ptl_put; - mca_ptl_base_module_get_fn_t ptl_get; - mca_ptl_base_module_matched_fn_t ptl_matched; - mca_ptl_base_module_request_init_fn_t ptl_request_init; - mca_ptl_base_module_request_fini_fn_t ptl_request_fini; - - /* PTL->PML function table - filled in by PML during module init */ - mca_ptl_base_module_match_fn_t ptl_match; - mca_ptl_base_module_send_progress_fn_t ptl_send_progress; - mca_ptl_base_module_recv_progress_fn_t ptl_recv_progress; - - /* Allow the canibalization of the PTL */ - struct mca_ptl_base_module_t* ptl_stack; - - /* for use by PML only */ - struct mca_pml_base_ptl_t* ptl_base; -}; -typedef struct mca_ptl_base_module_t mca_ptl_base_module_t; - -/* - * Macro for use in modules that are of type ptl v1.0.0 - */ -#define MCA_PTL_BASE_VERSION_1_0_0 \ - /* coll v1.0 is chained to MCA v1.0 */ \ - MCA_BASE_VERSION_1_0_0, \ - /* ptl v1.0 */ \ - "ptl", 1, 0, 0 - -#endif /* OMPI_MCA_PTL_H */ diff --git a/ompi/mca/ptl/self/Makefile.am b/ompi/mca/ptl/self/Makefile.am deleted file mode 100644 index d1c6116730..0000000000 --- a/ompi/mca/ptl/self/Makefile.am +++ /dev/null @@ -1,49 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Use the top-level Makefile.options - - - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if OMPI_BUILD_ptl_self_DSO -component_noinst = -component_install = mca_ptl_self.la -else -component_noinst = libmca_ptl_self.la -component_install = -endif - -self_SOURCES = ptl_self.c ptl_self.h ptl_self_component.c - -mcacomponentdir = $(libdir)/openmpi -mcacomponent_LTLIBRARIES = $(component_install) -mca_ptl_self_la_SOURCES = $(self_SOURCES) -mca_ptl_self_la_LIBADD = \ - $(top_ompi_builddir)/ompi/libmpi.la \ - $(top_ompi_builddir)/orte/liborte.la \ - $(top_ompi_builddir)/opal/libopal.la -mca_ptl_self_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_ptl_self_la_SOURCES = $(self_SOURCES) -libmca_ptl_self_la_LIBADD = -libmca_ptl_self_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/ptl/self/configure.params b/ompi/mca/ptl/self/configure.params deleted file mode 100644 index 18a97f406a..0000000000 --- a/ompi/mca/ptl/self/configure.params +++ /dev/null @@ -1,24 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Specific to this module - -PARAM_INIT_FILE=ptl_self.c -PARAM_CONFIG_HEADER_FILE="self_config.h" -PARAM_CONFIG_FILES="Makefile" diff --git a/ompi/mca/ptl/self/ptl_self.c b/ompi/mca/ptl/self/ptl_self.c deleted file mode 100644 index 74612c6644..0000000000 --- a/ompi/mca/ptl/self/ptl_self.c +++ /dev/null @@ -1,218 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#include -#ifdef HAVE_SYS_TYPES_H -#include -#endif - -#include "ompi/constants.h" -#include "ompi/class/ompi_bitmap.h" -#include "opal/event/event.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "opal/mca/base/mca_base_param.h" -#include "ompi/datatype/datatype.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/pml/base/pml_base_sendreq.h" -#include "ompi/mca/ptl/base/ptl_base_sendfrag.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" -#include "ptl_self.h" -#include "ompi/communicator/communicator.h" - -mca_ptl_base_module_t mca_ptl_self_module = { - &mca_ptl_self_component.super, - 8, /* ptl_cache_size */ - sizeof(mca_ptl_base_recv_frag_t), /* ptl_cache_bytes */ - 0, /* ptl_frag_first_size */ - 0, /* ptl_frag_min_size */ - 0, /* ptl_frag_max_size */ - 65535, /* ptl_exclusivity */ - 0, /* ptl_latency */ - 0, /* ptl_bandwidth */ - MCA_PTL_PUT, /* ptl flags */ - mca_ptl_self_add_proc, - mca_ptl_self_del_proc, - mca_ptl_self_finalize, - mca_ptl_self_send, /* put */ - mca_ptl_self_send, /* put */ - NULL, /* get */ - mca_ptl_self_matched, /* matched */ - mca_ptl_self_request_init, - mca_ptl_self_request_fini, - NULL, /* match */ - NULL, - NULL -}; - -extern mca_ptl_self_component_t mca_ptl_self_component; - - -int mca_ptl_self_add_proc(struct mca_ptl_base_module_t* ptl, size_t nprocs, struct ompi_proc_t **ompi_proc, struct mca_ptl_base_peer_t** peer_ret, ompi_bitmap_t* reachable) -{ - size_t i, count; - - for( i = 0, count = 0; i < nprocs; i++ ) { - if( ompi_proc[i] == mca_ptl_self_component.self_local ) { - ompi_bitmap_set_bit( reachable, i ); - count++; - } - } - return OMPI_SUCCESS; -} - -int mca_ptl_self_del_proc(struct mca_ptl_base_module_t* ptl, size_t nprocs, struct ompi_proc_t **proc, struct mca_ptl_base_peer_t** ptl_peer) -{ - return OMPI_SUCCESS; -} - -/* before the module is unloaded (called once)*/ -int mca_ptl_self_finalize(struct mca_ptl_base_module_t* ptl) -{ - return OMPI_SUCCESS; -} - -int mca_ptl_self_request_init(struct mca_ptl_base_module_t* ptl, mca_ptl_base_send_request_t* request) -{ - OBJ_CONSTRUCT(request+1, mca_ptl_base_recv_frag_t); - return OMPI_SUCCESS; -} - -void mca_ptl_self_request_fini(struct mca_ptl_base_module_t* ptl, mca_ptl_base_send_request_t* request) -{ - OBJ_DESTRUCT(request+1); -} - -/* - * Initiate a send. If this is the first fragment, use the fragment - * descriptor allocated with the send requests, otherwise obtain - * one from the free list. Initialize the fragment and foward - * on to the peer. - */ - -int mca_ptl_self_send( struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_peer_t* ptl_base_peer, - struct mca_ptl_base_send_request_t* request, - size_t offset, - size_t size, - int flags ) -{ - mca_ptl_self_send_request_t* req = (mca_ptl_self_send_request_t*)request; - mca_ptl_base_header_t* hdr = &(req->req_frag.frag_base.frag_header); - bool match; - - hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_MATCH; - hdr->hdr_common.hdr_flags = flags; - hdr->hdr_match.hdr_contextid = request->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = request->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_dst = request->req_send.req_base.req_peer; - hdr->hdr_match.hdr_tag = request->req_send.req_base.req_tag; - hdr->hdr_match.hdr_msg_length = request->req_send.req_bytes_packed; - hdr->hdr_match.hdr_msg_seq = request->req_send.req_base.req_sequence; - hdr->hdr_rndv.hdr_src_ptr.lval = 0; - hdr->hdr_rndv.hdr_src_ptr.pval = request; - req->req_frag.frag_base.frag_peer = ptl_base_peer; - req->req_frag.frag_base.frag_size = request->req_send.req_bytes_packed; - req->req_frag.frag_base.frag_owner = &mca_ptl_self_module; - req->req_frag.frag_request = NULL; - req->req_frag.frag_is_buffered = 0; - match = ptl->ptl_match( ptl, &(req->req_frag), &(hdr->hdr_match) ); -#if !OMPI_ENABLE_MPI_THREADS && 0 - /* If we are in a non threaded case and the send is blocking for MPI correctness - * the receive should be already posted. Otherwise the program will lead to a deadlock. - */ - if( (false == match) && (MCA_PML_REQUEST_SEND == req->req_send.req_base.req_type) ) { - opal_output( 0, "OMPI reach a dead-lock situation. A send to self was posted without a proper receive\n" ); - return OMPI_ERROR; - } -#endif /* OMPI_ENABLE_MPI_THREADS */ - return OMPI_SUCCESS; -} - - -/* - * A posted receive has been matched - if required send an - * ack back to the peer and process the fragment. - */ -void mca_ptl_self_matched( mca_ptl_base_module_t* ptl, - mca_ptl_base_recv_frag_t* frag) -{ - mca_ptl_self_send_request_t* sendreq = (mca_ptl_self_send_request_t*) - frag->frag_base.frag_header.hdr_rndv.hdr_src_ptr.pval; - mca_ptl_base_recv_request_t* recvreq = frag->frag_request; - - if( (recvreq->req_recv.req_base.req_count != 0) && - (sendreq->req_ptl.req_send.req_base.req_count != 0) ) { - /* Did we have the same datatype or not ? If yes we can use an optimized version - * for the copy function, if not we have to use a temporary buffer to pack/unpack - * - * Note that if this is a buffered send - the data has already been packed into - * a contigous buffer and the convertor on the send request initialized to point - * into this buffer. - */ - if( sendreq->req_ptl.req_send.req_datatype == recvreq->req_recv.req_base.req_datatype ) { - ompi_ddt_copy_content_same_ddt( recvreq->req_recv.req_base.req_datatype, - recvreq->req_recv.req_base.req_count > sendreq->req_ptl.req_send.req_count ? - sendreq->req_ptl.req_send.req_count : recvreq->req_recv.req_base.req_count, - (char *)recvreq->req_recv.req_base.req_addr, - (char *)sendreq->req_ptl.req_send.req_addr ); - } else { - ompi_convertor_t *send_convertor, *recv_convertor; - struct iovec iov[1]; - int32_t completed, freeAfter, length; - uint32_t iov_count; - size_t max_data; - char* buf; - - /* We use a temporary buffer as it look to be faster on much architectures */ - length = 64 * 1024; - buf = (char *)malloc( length * sizeof(char) ); - - recv_convertor = &(recvreq->req_recv.req_convertor); - send_convertor = &(sendreq->req_ptl.req_send.req_convertor); - - completed = 0; - freeAfter = 0; - while( !completed ) { - iov[0].iov_base = buf; - iov[0].iov_len = length; - iov_count = 1; - max_data = length; - completed |= ompi_convertor_pack( send_convertor, iov, &iov_count, - &max_data, &freeAfter ); - assert( freeAfter == 0 ); - completed |= ompi_convertor_unpack( recv_convertor, iov, &iov_count, - &max_data, &freeAfter ); - assert( freeAfter == 0 ); - } - free( buf ); - } - } - ptl->ptl_send_progress( ptl, &sendreq->req_ptl, - sendreq->req_ptl.req_send.req_bytes_packed ); - ptl->ptl_recv_progress( ptl, recvreq, - frag->frag_base.frag_header.hdr_match.hdr_msg_length, - frag->frag_base.frag_size ); -} - diff --git a/ompi/mca/ptl/self/ptl_self.h b/ompi/mca/ptl/self/ptl_self.h deleted file mode 100644 index 0da6bd27db..0000000000 --- a/ompi/mca/ptl/self/ptl_self.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - *Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - *Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - *Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - *Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - *$COPYRIGHT$ - * - *Additional copyrights may follow - * - *$HEADER$ - */ -/** - * @file - */ - -#ifndef PTL_SELF_H_HAS_BEEN_INCLUDED -#define PTL_SELF_H_HAS_BEEN_INCLUDED - -#include "ompi/class/ompi_free_list.h" -#include "opal/event/event.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/ptl/base/ptl_base_sendreq.h" -#include "ompi/mca/ptl/base/ptl_base_recvreq.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/** - * SELF PTL component. - */ -struct mca_ptl_self_component_t { - mca_ptl_base_component_1_0_0_t super; /**< base PTL component */ - uint32_t self_buf_size; /**< the size of the internal buffer used to pack/unpack the data */ - uint32_t self_is_non_blocking; /**< how the memcopy operations are done segmented or not */ - int32_t self_free_list_num; /**< initial size of free lists */ - int32_t self_free_list_max; /**< maximum size of free lists */ - int32_t self_free_list_inc; /**< number of elements to alloc when growing free lists */ - ompi_free_list_t self_send_requests; /**< free list of self send requests -- sendreq + sendfrag */ - struct ompi_proc_t* self_local; /**< the self proc instance corresponding to the local process */ -}; -typedef struct mca_ptl_self_component_t mca_ptl_self_component_t; - -/** - * Self send request derived type. The send request contains both the - * base send request, and the base receive fragment which will be used to do the match. - */ -struct mca_ptl_self_send_request_t { - mca_ptl_base_send_request_t req_ptl; - mca_ptl_base_recv_frag_t req_frag; /* first fragment */ -}; -typedef struct mca_ptl_self_send_request_t mca_ptl_self_send_request_t; -OBJ_CLASS_DECLARATION(mca_ptl_self_send_request_t); - -extern mca_ptl_self_component_t mca_ptl_self_component; - -/** - * Register SELF component parameters with the MCA framework - */ -extern int mca_ptl_self_component_open(void); - -/** - * Any final cleanup before being unloaded. - */ -extern int mca_ptl_self_component_close(void); - -/** - * SELF component initialization. - * - * @param num_ptls (OUT) Number of PTLs returned in PTL array. - * @param allow_multi_user_threads (OUT) Flag indicating wether PTL supports user threads (TRUE) - * @param have_hidden_threads (OUT) Flag indicating wether PTL uses threads (TRUE) - * - * (1) prepare the local buffering and initialize the SELF - * engine. - */ -extern mca_ptl_base_module_t** mca_ptl_self_component_init( - int *num_ptls, - bool enable_progress_threads, - bool enable_mpi_threads -); - -int mca_ptl_self_add_proc(struct mca_ptl_base_module_t* ptl, size_t nprocs, struct ompi_proc_t **ompi_proc, struct mca_ptl_base_peer_t** peer_ret, struct ompi_bitmap_t* reachable); -int mca_ptl_self_del_proc(struct mca_ptl_base_module_t* ptl, size_t nprocs, struct ompi_proc_t **proc, struct mca_ptl_base_peer_t** ptl_peer); -int mca_ptl_self_finalize(struct mca_ptl_base_module_t* ptl); -int mca_ptl_self_request_init(struct mca_ptl_base_module_t* ptl, struct mca_ptl_base_send_request_t* request); -void mca_ptl_self_request_fini(struct mca_ptl_base_module_t* ptl, struct mca_ptl_base_send_request_t* request); -int mca_ptl_self_send( struct mca_ptl_base_module_t* ptl, struct mca_ptl_base_peer_t* ptl_base_peer, struct mca_ptl_base_send_request_t* request, - size_t offset, size_t size, int flags ); -void mca_ptl_self_matched( mca_ptl_base_module_t* ptl, mca_ptl_base_recv_frag_t* frag ); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif /* PTL_SELF_H_HAS_BEEN_INCLUDED */ - diff --git a/ompi/mca/ptl/self/ptl_self_component.c b/ompi/mca/ptl/self/ptl_self_component.c deleted file mode 100644 index 232be5b3a1..0000000000 --- a/ompi/mca/ptl/self/ptl_self_component.c +++ /dev/null @@ -1,166 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#include -#include -#ifdef HAVE_SYS_TYPES_H -#include -#endif - -#include "ompi/constants.h" -#include "opal/event/event.h" -#include "opal/util/if.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/pml/base/pml_base_sendreq.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" -#include "opal/mca/base/mca_base_param.h" -#include "ptl_self.h" -#include "ompi/proc/proc.h" - -mca_ptl_self_component_t mca_ptl_self_component = { - { - /* First, the mca_base_component_t struct containing meta information - about the component itself */ - - { - /* Indicate that we are a pml v1.0.0 component (which also implies a - specific MCA version) */ - - MCA_PTL_BASE_VERSION_1_0_0, - - "self", /* MCA component name */ - OMPI_MAJOR_VERSION, /* MCA component major version */ - OMPI_MINOR_VERSION, /* MCA component minor version */ - OMPI_RELEASE_VERSION, /* MCA component release version */ - mca_ptl_self_component_open, /* component open */ - mca_ptl_self_component_close /* component close */ - }, - - /* Next the MCA v1.0.0 component meta data */ - - { - /* Whether the component is checkpointable or not */ - true - }, - - mca_ptl_self_component_init, - NULL, - NULL, - } -}; - -extern mca_ptl_base_module_t mca_ptl_self_module; - - -static void mca_ptl_self_send_request_construct(mca_ptl_self_send_request_t* request) -{ - OBJ_CONSTRUCT(&request->req_frag, mca_ptl_base_recv_frag_t); -} - -static void mca_ptl_self_send_request_destruct(mca_ptl_self_send_request_t* request) -{ - OBJ_DESTRUCT(&request->req_frag); -} - -OBJ_CLASS_INSTANCE( mca_ptl_self_send_request_t, - mca_pml_base_send_request_t, - mca_ptl_self_send_request_construct, - mca_ptl_self_send_request_destruct ); - -/* - * utility routines for parameter registration - */ - -static inline int mca_ptl_self_param_register_int( - const char* param_name, - int default_value ) -{ - int id = mca_base_param_register_int("ptl","self",param_name,NULL,default_value); - int param_value = default_value; - mca_base_param_lookup_int(id,¶m_value); - return param_value; -} - -/* - * Called by MCA framework to open the component, registers - * component parameters. - */ - -int mca_ptl_self_component_open(void) -{ - /* register SELF component parameters */ - mca_ptl_self_component.self_buf_size = - mca_ptl_self_param_register_int("buffer_size", 64*1024); - mca_ptl_self_component.self_is_non_blocking = - mca_ptl_self_param_register_int("nonblocking", 1); - return OMPI_SUCCESS; -} - -int mca_ptl_self_component_close(void) -{ - if( NULL == mca_ptl_self_component.self_local ) - return OMPI_SUCCESS; - - OBJ_DESTRUCT( &(mca_ptl_self_component.self_send_requests) ); - - return OMPI_SUCCESS; -} - - -mca_ptl_base_module_t** mca_ptl_self_component_init(int *num_ptl_modules, - bool enable_progress_threads, - bool enable_mpi_threads) -{ - mca_ptl_base_module_t** modules; - - *num_ptl_modules = 0; - - modules = (mca_ptl_base_module_t **)malloc(sizeof(mca_ptl_base_module_t*)); - if( NULL == modules ) - return NULL; - - modules[0] = &mca_ptl_self_module; - - mca_ptl_self_component.self_free_list_num = 4; - mca_ptl_self_component.self_free_list_max = -1; - mca_ptl_self_component.self_free_list_inc = 4; - - /* Initialize the local pointer to the processor */ - mca_ptl_self_component.self_local = ompi_proc_local(); - - *num_ptl_modules = 1; - - OBJ_CONSTRUCT(&mca_ptl_self_component.self_send_requests, ompi_free_list_t); - ompi_free_list_init(&mca_ptl_self_component.self_send_requests, - sizeof(mca_ptl_self_send_request_t), - OBJ_CLASS(mca_ptl_self_send_request_t), - mca_ptl_self_component.self_free_list_num, - mca_ptl_self_component.self_free_list_max, - mca_ptl_self_component.self_free_list_inc, - NULL); /* use default allocator */ - - return modules; -} diff --git a/ompi/mca/ptl/sm/Makefile.am b/ompi/mca/ptl/sm/Makefile.am deleted file mode 100644 index 19a1ab159f..0000000000 --- a/ompi/mca/ptl/sm/Makefile.am +++ /dev/null @@ -1,83 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Use the top-level Makefile.options - - - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if OMPI_BUILD_ptl_sm_DSO -component_noinst = -component_install = mca_ptl_sm.la -else -component_noinst = libmca_ptl_sm.la -component_install = -endif - -EXTRA_DIST = -sm_SOURCES = \ - ptl_sm.c \ - ptl_sm.h \ - ptl_sm_address.h \ - ptl_sm_component.c \ - ptl_sm_frag.h \ - ptl_sm_frag.c \ - ptl_sm_peer.h \ - ptl_sm_recvfrag.h \ - ptl_sm_recvfrag.c \ - ptl_sm_send.c \ - ptl_sm_sendreq.c \ - ptl_sm_sendreq.h \ - ptl_sm_sendfrag.c \ - ptl_sm_sendfrag.h - -# See ompi/mca/common/sm/Makefile.am for an explanation of -# libmca_common_sm.la. - -mcacomponentdir = $(libdir)/openmpi -mcacomponent_LTLIBRARIES = $(component_install) -mca_ptl_sm_la_SOURCES = $(sm_SOURCES) -mca_ptl_sm_la_DEPENDENCIES = librecompile.la -mca_ptl_sm_la_LIBADD = librecompile.la \ - $(top_ompi_builddir)/ompi/mca/common/sm/libmca_common_sm.la \ - $(top_ompi_builddir)/ompi/libmpi.la \ - $(top_ompi_builddir)/orte/liborte.la \ - $(top_ompi_builddir)/opal/libopal.la -mca_ptl_sm_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) librecompile.la -libmca_ptl_sm_la_SOURCES = $(sm_SOURCES) -libmca_ptl_sm_la_DEPENDENCIES = librecompile.la -libmca_ptl_sm_la_LDFLAGS = -module -avoid-version -libmca_ptl_sm_la_LIBADD = librecompile.la - -nodist_librecompile_la_SOURCES = ptl_sm_send_alternate.c -librecompile_la_CPPFLAGS = -DSM_COMMON_BASE_ADDR - -# -# The "send alternate" file is really the same as the "send" file, but -# compiled with a different #define -# -ptl_sm_send_alternate.c: - if test ! -f $@; then \ - ln -sf $(srcdir)/ptl_sm_send.c ptl_sm_send_alternate.c; \ - fi - diff --git a/ompi/mca/ptl/sm/configure.params b/ompi/mca/ptl/sm/configure.params deleted file mode 100644 index 174bb7cf6d..0000000000 --- a/ompi/mca/ptl/sm/configure.params +++ /dev/null @@ -1,24 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Specific to this module - -PARAM_INIT_FILE=ptl_sm.c -PARAM_CONFIG_HEADER_FILE="sm_config.h" -PARAM_CONFIG_FILES="Makefile" diff --git a/ompi/mca/ptl/sm/ptl_sm.c b/ompi/mca/ptl/sm/ptl_sm.c deleted file mode 100644 index 3c1691a0a4..0000000000 --- a/ompi/mca/ptl/sm/ptl_sm.c +++ /dev/null @@ -1,990 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#ifdef HAVE_STDLIB_H -#include -#endif /* HAVE_STDLIB_H */ -#ifdef HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#ifdef HAVE_SYS_STAT_H -#include -#endif /* HAVE_SYS_STAT_H */ -#ifdef HAVE_FCNTL_H -#include -#endif /* HAVE_FCNTL_H */ -#include -#ifdef HAVE_SCHED_H -#include -#endif /* HAVE_SCHED_H */ - -#include "opal/util/output.h" -#include "opal/util/if.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/ptl/base/ptl_base_sendreq.h" -#include "ompi/mca/ptl/base/ptl_base_recvreq.h" -#include "ompi/mca/ptl/base/ptl_base_header.h" -#include "ompi/mca/ptl/base/ptl_base_sendfrag.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" -#include "ompi/mca/mpool/base/base.h" -#include "orte/mca/oob/base/base.h" -#include "ompi/mca/common/sm/common_sm_mmap.h" -#include "ptl_sm.h" -#include "orte/util/sys_info.h" -#include "ompi/mca/ptl/sm/ptl_sm_peer.h" -#include "ompi/mca/common/sm/common_sm_mmap.h" -#include "orte/util/proc_info.h" -#include "opal/util/printf.h" -#include "ompi/mca/ptl/sm/ptl_sm_sendreq.h" -#include "ompi/class/ompi_fifo.h" -#include "ompi/class/ompi_free_list.h" -#include "opal/threads/mutex.h" -#include "ompi/datatype/datatype.h" -#include "opal/sys/atomic.h" -#include "ompi/proc/proc.h" -#include "ompi/communicator/communicator.h" - -mca_ptl_sm_t mca_ptl_sm[2] = { - { - { - &mca_ptl_sm_component.super, - 20, /* number of elements in the send descriptor cache */ - sizeof(mca_ptl_sm_send_request_t) - - sizeof(mca_ptl_base_send_request_t), /* size of shared memory send - descriptor */ - 1, /* ptl_first_frag_size */ - 0, /* ptl_min_frag_size */ - 0, /* ptl_max_frag_size */ - 0, /* ptl_exclusivity */ - 0, /* ptl_latency */ - 0, /* ptl_bandwidth */ - MCA_PTL_PUT, /* ptl flags */ - mca_ptl_sm_add_procs_same_base_addr, - mca_ptl_sm_del_procs, - mca_ptl_sm_finalize, - mca_ptl_sm_send, /* first fragment send function */ - mca_ptl_sm_send_continue, /* second and subsequent send function */ - NULL, /* get function */ - mca_ptl_sm_matched_same_base_addr, /* function called after match is made */ - mca_ptl_sm_request_alloc, /* initialization routine */ - mca_ptl_sm_request_return - } - }, - { - { - &mca_ptl_sm_component.super, - 20, /* number of elements in the send descriptor cache */ - sizeof(mca_ptl_sm_send_request_t) - - sizeof(mca_ptl_base_send_request_t), /* size of shared memory - send descriptor */ - 1, /* ptl_first_frag_size */ - 0, /* ptl_min_frag_size */ - 0, /* ptl_max_frag_size */ - 0, /* ptl_exclusivity */ - 0, /* ptl_latency */ - 0, /* ptl_bandwidth */ - MCA_PTL_PUT, /* ptl flags */ - mca_ptl_sm_add_procs, - mca_ptl_sm_del_procs, - mca_ptl_sm_finalize, - mca_ptl_sm_send, /* first fragment send function */ - mca_ptl_sm_send_continue, /* second and subsequent send function */ - NULL, /* get function */ - mca_ptl_sm_matched, /* function called after match is made */ - mca_ptl_sm_request_alloc, /* initialization routine */ - mca_ptl_sm_request_return - } - } -}; - -/* track information needed to synchronise a Shared Memory PTL module */ -mca_ptl_sm_module_resource_t mca_ptl_sm_module_resource; - - -int mca_ptl_sm_add_procs_same_base_addr( - struct mca_ptl_base_module_t* ptl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_ptl_base_peer_t **peers, - ompi_bitmap_t* reachability) -{ - int return_code=OMPI_SUCCESS; - size_t i,j,proc,size,n_to_allocate,length; - int n_local_procs,cnt,len; - ompi_proc_t* my_proc; /* pointer to caller's proc structure */ - mca_ptl_sm_t *ptl_sm; - ompi_fifo_t *my_fifos; - ompi_fifo_t * volatile *fifo_tmp; - bool same_sm_base; - ssize_t diff; - volatile char **tmp_ptr; - - /* initializion */ - for(i=0 ; i < nprocs ; i++ ) { - peers[i]=NULL; - } - ptl_sm=(mca_ptl_sm_t *)ptl; - - /* allocate array to hold setup shared memory from all - * other procs */ - mca_ptl_sm_component.sm_proc_connect=(int *) malloc(nprocs*sizeof(int)); - if( NULL == mca_ptl_sm_component.sm_proc_connect ){ - return_code=OMPI_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - - /* initialize and sm_proc_connect*/ - for(proc=0 ; proc < nprocs ; proc++ ) { - mca_ptl_sm_component.sm_proc_connect[proc]=0; - } - - /* get pointer to my proc structure */ - my_proc=ompi_proc_local(); - if( NULL == my_proc ) { - return_code=OMPI_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - - /* Get unique host identifier for each process in the list, - * and idetify procs that are on this host. Add procs on this - * host to shared memory reachbility list. Also, get number - * of local procs in the prcs list. */ - n_local_procs=0; - for( proc=0 ; proc < nprocs; proc++ ) { -#if OMPI_ENABLE_PROGRESS_THREADS == 1 - char path[PATH_MAX]; -#endif - struct mca_ptl_base_peer_t *peer; - - /* check to see if this is me */ - if( my_proc == procs[proc] ) { - mca_ptl_sm_component.my_smp_rank=n_local_procs; - } - - /* check to see if this proc can be reached via shmem (i.e., - if they're on my local host and in my job) */ - else if (procs[proc]->proc_name.jobid != my_proc->proc_name.jobid || - 0 == (procs[proc]->proc_flags & OMPI_PROC_FLAG_LOCAL)) { - continue; - } - - /* initialize the peers information */ - peer = peers[proc]=malloc(sizeof(struct mca_ptl_base_peer_t)); - if( NULL == peer ){ - return_code=OMPI_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - peer->peer_smp_rank=n_local_procs+ - mca_ptl_sm_component.num_smp_procs; - -#if OMPI_ENABLE_PROGRESS_THREADS == 1 - sprintf(path, "%s/sm_fifo.%lu", orte_process_info.job_session_dir, - (unsigned long)procs[proc]->proc_name.vpid); - peer->fifo_fd = open(path, O_WRONLY); - if(peer->fifo_fd < 0) { - opal_output(0, "mca_ptl_sm_add_procs: open(%s) failed with errno=%d\n", path, errno); - goto CLEANUP; - } -#endif - n_local_procs++; - mca_ptl_sm_component.sm_proc_connect[proc]=SM_CONNECTED; - } - if( n_local_procs == 0) { - return_code = OMPI_SUCCESS; - goto CLEANUP; - } - - /* lookup shared memory pool */ - if(NULL == mca_ptl_sm_component.sm_mpool) { - mca_ptl_sm_component.sm_mpool = - mca_mpool_base_module_lookup(mca_ptl_sm_component.sm_mpool_name); - if (NULL == mca_ptl_sm_component.sm_mpool) { - mca_ptl_sm_component.sm_mpool = - mca_mpool_base_module_create(mca_ptl_sm_component.sm_mpool_name,NULL,NULL); - } - - /* Sanity check to ensure that we found it */ - if (NULL == mca_ptl_sm_component.sm_mpool) { - return_code = OMPI_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - mca_ptl_sm_component.sm_mpool_base = - mca_ptl_sm_component.sm_mpool->mpool_base(mca_ptl_sm_component.sm_mpool); - } - - /* make sure that my_smp_rank has been defined */ - if( 0xFFFFFFFF == mca_ptl_sm_component.my_smp_rank ) { - return_code=OMPI_ERROR; - goto CLEANUP; - } - - /* see if need to allocate space for extra procs */ - if( 0 > mca_ptl_sm_component.sm_max_procs ) { - - /* no limit */ - if( 0 <= mca_ptl_sm_component.sm_extra_procs ) { - /* limit */ - mca_ptl_sm_component.sm_max_procs=n_local_procs+ - mca_ptl_sm_component.sm_extra_procs; - } else { - /* no limit */ - mca_ptl_sm_component.sm_max_procs=2*n_local_procs; - } - } - n_to_allocate=mca_ptl_sm_component.sm_max_procs; - - /* make sure n_to_allocate is greater than 0 */ - - if ( !mca_ptl_sm[0].ptl_inited ) { - /* set the shared memory offset */ - mca_ptl_sm_component.sm_offset=(ssize_t *) - malloc(n_to_allocate*sizeof(ssize_t)); - if(NULL == mca_ptl_sm_component.sm_offset ) { - return_code=OMPI_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - - /* create a list of peers */ - mca_ptl_sm_component.sm_peers=(struct mca_ptl_base_peer_t**) - malloc(n_to_allocate*sizeof(struct mca_ptl_base_peer_t*)); - if(NULL == mca_ptl_sm_component.sm_peers ) { - return_code=OMPI_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - } - - /* set local proc's smp rank in the peers structure for - * rapid access */ - for( proc=0 ; proc < nprocs; proc++ ) { - struct mca_ptl_base_peer_t* peer = peers[proc]; - if(NULL != peer) { - mca_ptl_sm_component.sm_peers[peer->peer_smp_rank] = peer; - peer->my_smp_rank=mca_ptl_sm_component.my_smp_rank; - } - } - - /* Allocate Shared Memory PTL process coordination - * data structure. This will reside in shared memory */ - - /* - * Create backing file - only first time through - */ - if ( !mca_ptl_sm[0].ptl_inited ) { - /* set file name */ - len=asprintf(&(mca_ptl_sm_component.sm_resouce_ctl_file), - "%s/shared_mem_ptl_module.%s",orte_process_info.job_session_dir, - orte_system_info.nodename); - if( 0 > len ) { - goto CLEANUP; - } - - size=sizeof(mca_ptl_sm_module_resource_t); - if(NULL==(mca_ptl_sm_component.mmap_file=mca_common_sm_mmap_init(size, - mca_ptl_sm_component.sm_resouce_ctl_file, - sizeof(mca_ptl_sm_module_resource_t), 0))) - { - opal_output(0, "mca_ptl_sm_add_procs: unable to create shared memory PTL coordinating strucure :: size %ld \n", - size); - return_code=OMPI_ERROR; - goto CLEANUP; - } - - /* set the pointer to the shared memory control structure */ - mca_ptl_sm_component.sm_ctl_header=(mca_ptl_sm_module_resource_t *) - mca_ptl_sm_component.mmap_file->map_seg; - - - /* Allocate a fixed size pointer array for the 2-D Shared memory queues. - * Excess slots will be allocated for future growth. One could - * make this array growable, but then one would need to uses mutexes - * for any access to these queues to ensure data consistancy when - * the array is grown */ - - if(0 == mca_ptl_sm_component.my_smp_rank ) { - /* allocate ompi_fifo_t strucutes for each fifo of the queue - * pairs - one per pair of local processes */ - /* check to make sure number of local procs is within the - * specified limits */ - if( ( 0 < mca_ptl_sm_component.sm_max_procs ) && - ( n_local_procs > mca_ptl_sm_component.sm_max_procs) ) { - return_code=OMPI_ERROR; - goto CLEANUP; - } - - /* allocate array of ompi_fifo_t* elements - - * offset relative to base segement is stored, so that - * this can be used by other procs */ - mca_ptl_sm_component.sm_ctl_header->fifo= - mca_ptl_sm_component.sm_mpool->mpool_alloc - (mca_ptl_sm_component.sm_mpool, - n_to_allocate*sizeof(ompi_fifo_t *), - 0, - CACHE_LINE_SIZE, NULL); - if ( NULL == mca_ptl_sm_component.sm_ctl_header->fifo ) { - return_code=OMPI_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - /* initiazlize the pointer array */ - for(i=0 ; i < n_to_allocate ; i++ ) { - mca_ptl_sm_component.sm_ctl_header->fifo[i]=NULL; - } - - /* allocate and initialize the array to hold the virtual address - * of the shared memory base */ - mca_ptl_sm_component.sm_ctl_header->segment_header. - base_shared_mem_segment = ( volatile char **) - mca_ptl_sm_component.sm_mpool->mpool_alloc - (mca_ptl_sm_component.sm_mpool, - n_to_allocate*sizeof(char *), CACHE_LINE_SIZE, 0, NULL); - if ( NULL == mca_ptl_sm_component.sm_ctl_header->segment_header. - base_shared_mem_segment ) { - return_code=OMPI_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - /* initialize the pointer array */ - for(i=0 ; i < n_to_allocate ; i++ ) { - mca_ptl_sm_component.sm_ctl_header->segment_header. - base_shared_mem_segment[i]=NULL; - } - - /* allocate and initialize the array of flags indicating - * when the virtual address of the shared memory address - * has been set */ - mca_ptl_sm_component.sm_ctl_header->segment_header. - base_shared_mem_flags = ( int *) - mca_ptl_sm_component.sm_mpool->mpool_alloc - (mca_ptl_sm_component.sm_mpool, - n_to_allocate*sizeof(int), CACHE_LINE_SIZE, 0, NULL); - if ( NULL == mca_ptl_sm_component.sm_ctl_header->segment_header. - base_shared_mem_flags ) { - return_code=OMPI_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - for(i=0 ; i < n_to_allocate ; i++ ) { - mca_ptl_sm_component.sm_ctl_header->segment_header. - base_shared_mem_flags[i]=0; - } - - /* set the addresses to be a relative, so that - * they can be used by other procs */ - mca_ptl_sm_component.sm_ctl_header->fifo= - (volatile ompi_fifo_t **) - ( (char *)(mca_ptl_sm_component.sm_ctl_header->fifo)- - (char *)(mca_ptl_sm_component.sm_mpool->mpool_base(mca_ptl_sm_component.sm_mpool)) ); - - mca_ptl_sm_component.sm_ctl_header->segment_header. - base_shared_mem_segment=( volatile char **) - ( (char *)(mca_ptl_sm_component.sm_ctl_header-> - segment_header.base_shared_mem_segment) - - (char *)(mca_ptl_sm_component.sm_mpool->mpool_base(mca_ptl_sm_component.sm_mpool)) ); - - /* allow other procs to use this shared memory map */ - mca_ptl_sm_component.mmap_file->map_seg->seg_inited=true; - - /* memory barrier to ensure this flag is set before other - * flags are set */ - opal_atomic_mb(); - } - - /* Note: Need to make sure that proc 0 initializes control - * structures before any of the other procs can progress */ - if( 0 != mca_ptl_sm_component.my_smp_rank ) - { - /* spin until local proc 0 initializes the segment */ -#ifdef HAVE_SCHED_YIELD - while(!mca_ptl_sm_component.mmap_file->map_seg->seg_inited) - { sched_yield(); } -#else -#if defined(__WINDOWS__) - sleep(50); /* milliseconds */ -#else - usleep(50000); /* microseconds */ -#endif -#endif /* HAVE_SCHED_YIELD */ - } - - /* set the base of the shared memory segment, and flag - * indicating that it is set */ - tmp_ptr=(volatile char **) - ( (char *)(mca_ptl_sm_component.sm_ctl_header->segment_header. - base_shared_mem_segment) + - (long )(mca_ptl_sm_component.sm_mpool->mpool_base(mca_ptl_sm_component.sm_mpool)) ); - tmp_ptr[mca_ptl_sm_component.my_smp_rank]= - mca_ptl_sm_component.sm_mpool->mpool_base(mca_ptl_sm_component.sm_mpool); - /* memory barrier to ensure this flag is set before other - * flags are set */ - opal_atomic_mb(); - - mca_ptl_sm_component.sm_ctl_header->segment_header. - base_shared_mem_flags[mca_ptl_sm_component.my_smp_rank]=1; - - /* - * initialize the array of fifo's "owned" by this process - * The virtual addresses are valid only in the sender's - * address space - unless the base of the shared memory - * segment is mapped at the same location in the reader's - * virtual address space. - */ - my_fifos=( ompi_fifo_t *) - mca_ptl_sm_component.sm_mpool->mpool_alloc - (mca_ptl_sm_component.sm_mpool, - n_to_allocate*sizeof(ompi_fifo_t), CACHE_LINE_SIZE, 0, NULL); - if ( NULL == my_fifos ) { - return_code=OMPI_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - - for( j=0 ; j < n_to_allocate ; j++ ) { - my_fifos[j].head=OMPI_CB_FREE; - my_fifos[j].tail=OMPI_CB_FREE; - opal_atomic_unlock(&(my_fifos[j].head_lock)); - opal_atomic_unlock(&(my_fifos[j].tail_lock)); - } - fifo_tmp=(ompi_fifo_t * volatile *) - ( (char *)(mca_ptl_sm_component.sm_ctl_header->fifo) + - (long)(mca_ptl_sm_component.sm_mpool->mpool_base(mca_ptl_sm_component.sm_mpool)) ); - /* RLG : need memory barrier */ - fifo_tmp[mca_ptl_sm_component.my_smp_rank]=my_fifos; - - /* cache the pointer to the 2d fifo array. These addresses - * are valid in the current process space */ - mca_ptl_sm_component.fifo=(ompi_fifo_t **) - malloc(sizeof(ompi_fifo_t *)*n_to_allocate); - if( NULL == mca_ptl_sm_component.fifo ) { - return_code=OMPI_ERROR; - goto CLEANUP; - } - mca_ptl_sm_component.fifo[mca_ptl_sm_component.my_smp_rank]=my_fifos; - } - - /* cache the pointers to the rest of the fifo arrays */ - fifo_tmp=(ompi_fifo_t * volatile *) - ( (char *)(mca_ptl_sm_component.sm_ctl_header->fifo) + - (long)(mca_ptl_sm_component.sm_mpool->mpool_base(mca_ptl_sm_component.sm_mpool)) ); - for( j=mca_ptl_sm_component.num_smp_procs ; j < - mca_ptl_sm_component.num_smp_procs+n_local_procs ; j++ ) { - - /* spin until this element is allocated */ - while ( NULL == fifo_tmp[j] ) - { ; } - - tmp_ptr=(volatile char **) - ( (char *)mca_ptl_sm_component.sm_ctl_header-> - segment_header.base_shared_mem_segment + - (long)mca_ptl_sm_component.sm_mpool->mpool_base(mca_ptl_sm_component.sm_mpool)); - diff= tmp_ptr[mca_ptl_sm_component.my_smp_rank]-tmp_ptr[j]; - mca_ptl_sm_component.fifo[j]= - ( ompi_fifo_t *)( (char *)fifo_tmp[j]+diff); - mca_ptl_sm_component.sm_offset[j]=tmp_ptr[j]- - tmp_ptr[mca_ptl_sm_component.my_smp_rank]; - - } - - /* initialize some of the free-lists */ - if( !mca_ptl_sm[0].ptl_inited ) { - /* some initialization happens only the first time this routine - * is called, i.e. when ptl_inited is false */ - - /* initialize fragment descriptor free list */ - - /* - * first fragment - */ - - /* allocation will be for the fragment descriptor, payload buffer, - * and padding to ensure proper alignment can be acheived */ - length=sizeof(mca_ptl_sm_frag_t)+mca_ptl_sm_component.fragment_alignment+ - mca_ptl_sm_component.first_fragment_size; - - ompi_free_list_init(&mca_ptl_sm_component.sm_first_frags, length, - OBJ_CLASS(mca_ptl_sm_frag_t), - mca_ptl_sm_component.sm_first_frag_free_list_num, - mca_ptl_sm_component.sm_first_frag_free_list_max, - mca_ptl_sm_component.sm_first_frag_free_list_inc, - mca_ptl_sm_component.sm_mpool); /* use shared-memory pool */ - - /* - * second and beyond fragments - */ - - /* allocation will be for the fragment descriptor, payload buffer, - * and padding to ensure proper alignment can be acheived */ - length=sizeof(mca_ptl_sm_frag_t)+mca_ptl_sm_component.fragment_alignment+ - mca_ptl_sm_component.max_fragment_size; - - ompi_free_list_init(&mca_ptl_sm_component.sm_second_frags, length, - OBJ_CLASS(mca_ptl_sm_second_frag_t), - mca_ptl_sm_component.sm_second_frag_free_list_num, - mca_ptl_sm_component.sm_second_frag_free_list_max, - mca_ptl_sm_component.sm_second_frag_free_list_inc, - mca_ptl_sm_component.sm_mpool); /* use shared-memory pool */ - - /* set up mca_ptl_sm_component.list_smp_procs_same_base_addr */ - mca_ptl_sm_component.list_smp_procs_same_base_addr=(int *) - malloc(mca_ptl_sm_component.sm_max_procs*sizeof(int)); - if( NULL == mca_ptl_sm_component.list_smp_procs_same_base_addr ){ - return_code=OMPI_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - - /* set up mca_ptl_sm_component.list_smp_procs_different_base_addr */ - mca_ptl_sm_component.list_smp_procs_different_base_addr=(int *) - malloc(mca_ptl_sm_component.sm_max_procs*sizeof(int)); - if( NULL == mca_ptl_sm_component.list_smp_procs_different_base_addr ){ - return_code=OMPI_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - - /* set flag indicating ptl has been inited */ - ptl_sm->ptl_inited=true; - } - - /* set connectivity */ - cnt=0; - for(proc = 0 ; proc < nprocs ; proc++ ) { - - struct mca_ptl_base_peer_t* peer = peers[proc]; - if(peer == NULL) - continue; - - tmp_ptr=(volatile char **) - ( (char *)mca_ptl_sm_component.sm_ctl_header-> - segment_header.base_shared_mem_segment + - (long)mca_ptl_sm_component.sm_mpool->mpool_base(mca_ptl_sm_component.sm_mpool)); - same_sm_base=(tmp_ptr[peer->peer_smp_rank] == - tmp_ptr[mca_ptl_sm_component.my_smp_rank]); - - if( SM_CONNECTED == mca_ptl_sm_component.sm_proc_connect[proc] ) { - if( same_sm_base ){ - - /* don't count if same process */ - if( (mca_ptl_sm_component.num_smp_procs+cnt ) == - mca_ptl_sm_component.my_smp_rank) { - cnt++; - continue; - } - /* set up the list of local processes with the same base - * shared memory virtual address as this process */ - mca_ptl_sm_component.list_smp_procs_same_base_addr - [mca_ptl_sm_component.num_smp_procs_same_base_addr]= - cnt; - mca_ptl_sm_component.num_smp_procs_same_base_addr++; - cnt++; - /* add this proc to shared memory accessability list */ - return_code=ompi_bitmap_set_bit(reachability,proc); - if( OMPI_SUCCESS != return_code ){ - goto CLEANUP; - } - } else { - /* set up the list of local processes with the same base - * shared memory virtual address as this process */ - mca_ptl_sm_component.list_smp_procs_different_base_addr - [mca_ptl_sm_component.num_smp_procs_different_base_addr]= - cnt; - mca_ptl_sm_component.num_smp_procs_different_base_addr++; - cnt++; - mca_ptl_sm_component.sm_proc_connect[proc]= - SM_CONNECTED_DIFFERENT_BASE_ADDR; - } - } - } - - /* update the local smp process count */ - mca_ptl_sm_component.num_smp_procs+=n_local_procs; - -CLEANUP: - return return_code; -} - -/* Note:: this routine assumes that mca_ptl_sm_add_procs_same_base_addr - * has already been called to set up data structures needed by this - * routine */ -int mca_ptl_sm_add_procs( - struct mca_ptl_base_module_t* ptl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_ptl_base_peer_t **peers, - ompi_bitmap_t* reachability) -{ - int return_code = OMPI_SUCCESS, tmp_cnt; - uint32_t proc, n_local_procs; - - /* initializion */ - for(proc=0 ; proc < nprocs ; proc++ ) { - peers[proc]=NULL; - } - - /* figure out total number of local procs in current set */ - tmp_cnt=0; - for(proc = 0 ; proc < nprocs ; proc++ ) { - if( (SM_CONNECTED_DIFFERENT_BASE_ADDR == - mca_ptl_sm_component.sm_proc_connect[proc]) || - (SM_CONNECTED == - mca_ptl_sm_component.sm_proc_connect[proc]) ) { - tmp_cnt++; - } - } - /* set connectivity */ - n_local_procs=0; - for(proc = 0 ; proc < nprocs ; proc++ ) { - if( (SM_CONNECTED_DIFFERENT_BASE_ADDR == - mca_ptl_sm_component.sm_proc_connect[proc]) || - (SM_CONNECTED == - mca_ptl_sm_component.sm_proc_connect[proc]) ) { - n_local_procs++; - } - - if( (SM_CONNECTED_DIFFERENT_BASE_ADDR == - mca_ptl_sm_component.sm_proc_connect[proc]) ) { - - /* add this proc to shared memory accessability list */ - return_code=ompi_bitmap_set_bit(reachability,proc); - if( OMPI_SUCCESS != return_code ){ - goto CLEANUP; - } - - /* initialize the peers information */ - peers[proc]=malloc(sizeof(struct mca_ptl_base_peer_t)); - if( NULL == peers[proc] ){ - return_code=OMPI_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - peers[proc]->my_smp_rank=mca_ptl_sm_component.my_smp_rank; - /* subtract tmp_cnt, since mca_ptl_sm_add_procs_same_base_addr - * already added these into num_smp_procs */ - peers[proc]->peer_smp_rank=n_local_procs+ - mca_ptl_sm_component.num_smp_procs-tmp_cnt; - n_local_procs++; - } - } - -CLEANUP: - /* free local memory */ - if(mca_ptl_sm_component.sm_proc_connect){ - free(mca_ptl_sm_component.sm_proc_connect); - mca_ptl_sm_component.sm_proc_connect=NULL; - } - - return return_code; -} - -int mca_ptl_sm_del_procs( - struct mca_ptl_base_module_t* ptl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_ptl_base_peer_t **peers) -{ - return OMPI_SUCCESS; -} - - -int mca_ptl_sm_finalize(struct mca_ptl_base_module_t* ptl) -{ - return OMPI_SUCCESS; -} - - -int mca_ptl_sm_request_alloc(struct mca_ptl_base_module_t* ptl, struct mca_ptl_base_send_request_t* request) -{ - mca_ptl_sm_send_request_t *sm_request; - opal_list_item_t* item; - int rc; - - /* allocate shared memory, first fragment */ - OMPI_FREE_LIST_GET(&(mca_ptl_sm_component.sm_first_frags),item,rc); - if( OMPI_SUCCESS != rc ) { - return rc; - } - - /* associate this fragment with the send descriptor */ - sm_request=(mca_ptl_sm_send_request_t *)request; - sm_request->req_frag=(mca_ptl_sm_frag_t *)item; - - return OMPI_SUCCESS; -} - - -void mca_ptl_sm_request_return(struct mca_ptl_base_module_t* ptl, struct mca_ptl_base_send_request_t* request) -{ - mca_ptl_sm_send_request_t *sm_request; - opal_list_item_t* item; - - /* return the fragment descriptor to the free list */ - sm_request=(mca_ptl_sm_send_request_t *)request; - item=(opal_list_item_t *)sm_request->req_frag; - OMPI_FREE_LIST_RETURN(&(mca_ptl_sm_component.sm_first_frags),item); - -} - -/* - * Initiate a send. The fragment descriptor allocated with the - * send requests. If the send descriptor is NOT obtained from - * the cache, this implementation will ONLY return an error code. - * If we don't do this, then, because we rely on memory ordering - * to provide the required MPI message ordering, we would need to - * add logic to check and see if there are any other sends waiting - * on resrouces to progress and complete all of them, before the - * current one can continue. To reduce latency, and because the - * actual amount of shared memory resrouces can be set at run time, - * this ptl implementation does not do this. Initialize the - * fragment and foward on to the peer. - * - * NOTE: this routine assumes that only one sending thread will be accessing - * the send descriptor at a time. - */ - -int mca_ptl_sm_send( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_peer_t* ptl_peer, - struct mca_ptl_base_send_request_t* sendreq, - size_t offset, - size_t size, - int flags) -{ - mca_ptl_sm_send_request_t *sm_request; - mca_ptl_sm_frag_t *send_frag; - int my_local_smp_rank, peer_local_smp_rank; - int return_status=OMPI_SUCCESS; - ompi_fifo_t *send_fifo; - mca_ptl_base_header_t* hdr; - void *sm_data_ptr ; - - /* cast to shared memory send descriptor */ - sm_request=(mca_ptl_sm_send_request_t *)sendreq; - - /* determine if send descriptor is obtained from the cache. If - * so, all the memory resource needed have been obtained */ - if( !sm_request->super.req_cached) { - /* in this ptl, we will only use the cache, or fail */ - return OMPI_ERR_OUT_OF_RESOURCE; - } - send_frag = sm_request->req_frag; - - /* if needed, pack data in payload buffer */ - if( 0 < size ) { - ompi_convertor_t *convertor; - unsigned int iov_count; - size_t max_data; - int free_after=0; - struct iovec address; - - convertor = &sendreq->req_send.req_convertor; - sm_data_ptr=sm_request->req_frag->buff; - - /* set up the shared memory iovec */ - address.iov_base=sm_data_ptr; - address.iov_len= (size < send_frag->buff_length) ? size : send_frag->buff_length; - - iov_count=1; - max_data=address.iov_len; - return_status=ompi_convertor_pack(convertor,&address,&iov_count, - &max_data, &free_after); - if( 0 > return_status ) { - return OMPI_ERROR; - } - size = max_data; - } - - /* fill in the fragment descriptor */ - /* get pointer to the fragment header */ - hdr = &(send_frag->super.frag_base.frag_header); - hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_MATCH; - hdr->hdr_common.hdr_flags = flags; - hdr->hdr_match.hdr_contextid = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_msg_length = sendreq->req_send.req_bytes_packed; - hdr->hdr_match.hdr_msg_seq = sendreq->req_send.req_base.req_sequence; - - /* update the offset within the payload */ - sendreq->req_offset += size; - - /* - * update the fragment descriptor - */ - send_frag->send_req = sendreq; - send_frag->send_offset = offset; - send_frag->super.frag_base.frag_size=size; - - /* - * post the descriptor in the queue - post with the relative - * address - */ - /* see if queues are allocated */ - my_local_smp_rank=ptl_peer->my_smp_rank; - peer_local_smp_rank=ptl_peer->peer_smp_rank; - - send_fifo=&(mca_ptl_sm_component.fifo - [my_local_smp_rank][peer_local_smp_rank]); - - /* thread lock */ - if(opal_using_threads()) - opal_atomic_lock(&send_fifo->head_lock); - if(OMPI_CB_FREE == send_fifo->head) { - /* no queues have been allocated - allocate now */ - return_status=ompi_fifo_init_same_base_addr( - mca_ptl_sm_component.size_of_cb_queue, - mca_ptl_sm_component.cb_lazy_free_freq, - /* at this stage we are not doing anything with memory - * locality */ - 0,0,0, - send_fifo, mca_ptl_sm_component.sm_mpool); - if( return_status != OMPI_SUCCESS ) { - if(opal_using_threads()) - opal_atomic_unlock(&(send_fifo->head_lock)); - return return_status; - } - } - - /* post descriptor */ - return_status=ompi_fifo_write_to_head_same_base_addr(sm_request->req_frag, - send_fifo, mca_ptl_sm_component.sm_mpool); - if( 0 <= return_status ) { - MCA_PTL_SM_SIGNAL_PEER(ptl_peer); - return_status=OMPI_SUCCESS; - } - if(opal_using_threads()) - opal_atomic_unlock(&send_fifo->head_lock); - - /* if this is the entire message - signal request is complete */ - if(sendreq->req_send.req_bytes_packed == size && - sendreq->req_send.req_send_mode != MCA_PML_BASE_SEND_SYNCHRONOUS) { - ompi_request_complete( &(sendreq->req_send.req_base.req_ompi) ); - } - - /* return */ - return return_status; -} - -/* - * Continue a send. Second fragment and beyond. - * - * NOTE: this routine assumes that only one sending thread will be accessing - * the send descriptor at a time. - */ - -int mca_ptl_sm_send_continue( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_peer_t* ptl_peer, - struct mca_ptl_base_send_request_t* sendreq, - size_t offset, - size_t size, - int flags) -{ - mca_ptl_sm_send_request_t *sm_request; - int my_local_smp_rank, peer_local_smp_rank, return_code; - int return_status=OMPI_SUCCESS, free_after=0; - ompi_fifo_t *send_fifo; - mca_ptl_base_header_t* hdr; - void *sm_data_ptr ; - opal_list_item_t* item; - mca_ptl_sm_second_frag_t *send_frag; - ompi_convertor_t *convertor; - struct iovec address; - unsigned int iov_count; - size_t max_data; - - /* cast to shared memory send descriptor */ - sm_request=(mca_ptl_sm_send_request_t *)sendreq; - - /* obtain fragment descriptor and payload from free list */ - OMPI_FREE_LIST_GET(&mca_ptl_sm_component.sm_second_frags, item, return_code); - - /* if we don't get a fragment descriptor, return w/o - * updating any counters. The PML will re-issue the - * request */ - if(NULL == (send_frag = (mca_ptl_sm_second_frag_t *)item)){ - return return_code; - } - - /* pack data in payload buffer */ - convertor = &sendreq->req_send.req_convertor; - sm_data_ptr=send_frag->buff; - - /* set up the shared memory iovec */ - address.iov_base=sm_data_ptr; - address.iov_len=(size < send_frag->buff_length) ? size : send_frag->buff_length; - - iov_count=1; - max_data=address.iov_len; - return_status=ompi_convertor_pack(convertor,&address,&iov_count, - &max_data, &free_after); - if( 0 > return_status ) { - return OMPI_ERROR; - } - size = max_data; - - /* fill in the fragment descriptor */ - /* get pointer to the fragment header */ - hdr = &(send_frag->super.frag_base.frag_header); - - hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG; - hdr->hdr_frag.hdr_src_ptr.pval = sendreq; - /* set offset into the "packed" user send buffer */ - hdr->hdr_frag.hdr_frag_offset=offset; - send_frag->super.frag_request= - ((mca_ptl_base_recv_frag_t *)(sm_request->req_frag))-> - frag_request; - - /* update the offset within the payload */ - sendreq->req_offset += size; - - /* - * update the fragment descriptor - */ - send_frag->send_req = sendreq; - send_frag->send_offset = offset; - send_frag->super.frag_base.frag_size=size; - - /* - * post the descriptor in the queue - post with the relative - * address - */ - /* see if queues are allocated */ - my_local_smp_rank=ptl_peer->my_smp_rank; - peer_local_smp_rank=ptl_peer->peer_smp_rank; - send_fifo=&(mca_ptl_sm_component.fifo - [my_local_smp_rank][peer_local_smp_rank]); - - /* since the first fragment has already been posted, - * the queue has already been initialized, so no need to check */ - - /* lock for thread safety - using atomic lock, not mutex, since - * we need shared memory access to these lock, and in some pthread - * implementation, such mutex's don't work correctly */ - if(opal_using_threads()) - opal_atomic_lock(&send_fifo->head_lock); - - /* post descriptor */ - return_status=ompi_fifo_write_to_head_same_base_addr(send_frag, - send_fifo, mca_ptl_sm_component.sm_mpool); - if( 0 <= return_status ) { - MCA_PTL_SM_SIGNAL_PEER(ptl_peer); - return_status=OMPI_SUCCESS; - } - - /* release thread lock */ - if(opal_using_threads()) - opal_atomic_unlock(&send_fifo->head_lock); - return return_status; -} diff --git a/ompi/mca/ptl/sm/ptl_sm.h b/ompi/mca/ptl/sm/ptl_sm.h deleted file mode 100644 index a7be353f3b..0000000000 --- a/ompi/mca/ptl/sm/ptl_sm.h +++ /dev/null @@ -1,446 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_SM_H -#define MCA_PTL_SM_H - -#include -#include -#ifdef HAVE_SYS_SOCKET_H -#include -#endif -#ifdef HAVE_NETINET_IN_H -#include -#endif - -#include "ompi/class/ompi_free_list.h" -#include "ompi/class/ompi_bitmap.h" -#include "ompi/class/ompi_fifo.h" -#include "opal/event/event.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/mpool/mpool.h" -#include "ompi/mca/common/sm/common_sm_mmap.h" -#include "ompi/mca/ptl/sm/ptl_sm_peer.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * Shared Memory resource managment - */ -struct mca_ptl_sm_module_resource_t { - /* base control structures */ - mca_common_sm_file_header_t segment_header; - - /* fifo queues - offsets relative to the base of the share memory - * segment will be stored here */ - volatile ompi_fifo_t **fifo; -}; -typedef struct mca_ptl_sm_module_resource_t mca_ptl_sm_module_resource_t; -extern mca_ptl_sm_module_resource_t mca_ptl_sm_module_resource; - -#define SM_CONNECTED 1 -#define SM_CONNECTED_SAME_BASE_ADDR 2 -#define SM_CONNECTED_DIFFERENT_BASE_ADDR 3 - -#if OMPI_ENABLE_PROGRESS_THREADS == 1 -#define DATA (char)0 -#define DONE (char)1 -#endif -/** - * Shared Memory (SM) PTL module. - */ -struct mca_ptl_sm_component_t { - mca_ptl_base_component_1_0_0_t super; /**< base PTL component */ - int sm_first_frag_free_list_num; /**< initial size of free lists */ - int sm_first_frag_free_list_max; /**< maximum size of free lists */ - int sm_first_frag_free_list_inc; /**< number of elements to alloc when growing free lists */ - int sm_second_frag_free_list_num; /**< initial size of free lists */ - int sm_second_frag_free_list_max; /**< maximum size of free lists */ - int sm_second_frag_free_list_inc; /**< number of elements to alloc when growing free lists */ - int sm_max_procs; /**< upper limit on the number of processes using the shared memory pool */ - int sm_extra_procs; /**< number of extra procs to allow */ - char* sm_mpool_name; /**< name of shared memory pool module */ - mca_mpool_base_module_t* sm_mpool; /**< shared memory pool */ - void* sm_mpool_base; /**< base address of shared memory pool */ - size_t first_fragment_size; /**< first fragment size */ - size_t max_fragment_size; /**< maximum (second and - beyone) fragment size */ - size_t fragment_alignment; /**< fragment alignment */ - opal_mutex_t sm_lock; - char* sm_resouce_ctl_file; /**< name of shared memory file used - to coordinate resource usage */ - mca_common_sm_mmap_t *mmap_file; /**< description of mmap'ed - file */ - mca_ptl_sm_module_resource_t *sm_ctl_header; /* control header in - shared memory */ - ompi_fifo_t **fifo; /**< cached copy of the pointer to the 2D - fifo array. The address in the shared - memory segment sm_ctl_header is a relative, - but this one, in process private memory, is - a real virtual address */ - size_t size_of_cb_queue; /**< size of each circular buffer queue array */ - size_t cb_lazy_free_freq; /**< frequency of lazy free */ - ssize_t *sm_offset; /**< offset to be applied to shared memory - addresses, per local process value */ - int *sm_proc_connect; /* scratch array used by the 0'th ptl to - * set indicate sm connectivty. Used by - * the 1'st ptl */ - size_t num_smp_procs; /**< current number of smp procs on this - host */ - int num_smp_procs_same_base_addr; /* number of procs with same - base shared memory virtual - address as this process */ - int num_smp_procs_different_base_addr; /* number of procs with - different base shared memory - virtual address as this - process */ - int *list_smp_procs_same_base_addr; /* number of procs with same - base shared memory virtual - address as this process */ - int *list_smp_procs_different_base_addr; /* number of procs with different - base shared memory virtual - address as this process */ - uint32_t my_smp_rank; /**< My SMP process rank. Used for accessing - * SMP specfic data structures. */ - ompi_free_list_t sm_first_frags; /**< free list of sm first - fragments */ - ompi_free_list_t sm_second_frags; /**< free list of sm second - and above fragments */ - ompi_free_list_t sm_send_requests; /**< free list of sm send requests -- sendreq + sendfrag */ - ompi_free_list_t sm_first_frags_to_progress; /**< list of first - fragments that are - awaiting resources */ - opal_mutex_t sm_pending_ack_lock; - opal_list_t sm_pending_ack; /**< list of fragmnent that need to be - acked */ - - struct mca_ptl_base_peer_t **sm_peers; -#if OMPI_ENABLE_PROGRESS_THREADS == 1 - char sm_fifo_path[PATH_MAX]; /**< path to fifo used to signal this process */ - int sm_fifo_fd; /**< file descriptor corresponding to opened fifo */ - opal_thread_t sm_fifo_thread; -#endif -}; -typedef struct mca_ptl_sm_component_t mca_ptl_sm_component_t; -extern mca_ptl_sm_component_t mca_ptl_sm_component; - -/** - * Register shared memory module parameters with the MCA framework - */ -extern int mca_ptl_sm_component_open(void); - -/** - * Any final cleanup before being unloaded. - */ -extern int mca_ptl_sm_component_close(void); - -/** - * SM module initialization. - * - * @param num_ptls (OUT) Number of PTLs returned in PTL array. - * @param enable_progress_threads (IN) Flag indicating whether PTL is allowed to have progress threads - * @param enable_mpi_threads (IN) Flag indicating whether PTL must support multilple simultaneous invocations from different threads - * - */ -extern mca_ptl_base_module_t** mca_ptl_sm_component_init( - int *num_ptls, - bool enable_progress_threads, - bool enable_mpi_threads -); - -/** - * shared memory component control. - */ -extern int mca_ptl_sm_component_control( - int param, - void* value, - size_t size -); - -/** - * shared memory component progress. - */ -extern int mca_ptl_sm_component_progress( - mca_ptl_tstamp_t tstamp -); - -/** - * SM PTL Interface - */ -struct mca_ptl_sm_t { - mca_ptl_base_module_t super; /**< base PTL interface */ - - bool ptl_inited; /**< flag indicating if ptl has been inited */ -}; -typedef struct mca_ptl_sm_t mca_ptl_sm_t; - -extern mca_ptl_sm_t mca_ptl_sm[2]; - - -/** - * Cleanup any resources held by the PTL. - * - * @param ptl PTL instance. - * @return OMPI_SUCCESS or error status on failure. - */ - -extern int mca_ptl_sm_finalize( - struct mca_ptl_base_module_t* ptl -); - - -/** - * PML->PTL notification of change in the process list. - * PML->PTL Notification that a receive fragment has been matched. - * Called for message that is send from process with the virtual - * address of the shared memory segment being different than that of - * the receiver. - * - * @param ptl (IN) - * @param proc (IN) - * @param peer (OUT) - * @return OMPI_SUCCESS or error status on failure. - * - */ - -extern int mca_ptl_sm_add_procs( - struct mca_ptl_base_module_t* ptl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_ptl_base_peer_t** peers, - struct ompi_bitmap_t* reachability -); - - -/** - * PML->PTL notification of change in the process list. - * PML->PTL Notification that a receive fragment has been matched. - * Called for message that is send from process with the virtual - * address of the shared memory segment being the same as that of - * the receiver. - * - * @param ptl (IN) - * @param proc (IN) - * @param peer (OUT) - * @return OMPI_SUCCESS or error status on failure. - * - */ - -extern int mca_ptl_sm_add_procs_same_base_addr( - struct mca_ptl_base_module_t* ptl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_ptl_base_peer_t** peers, - ompi_bitmap_t* reachability -); - - -/** - * PML->PTL notification of change in the process list. - * - * @param ptl (IN) PTL instance - * @param proc (IN) Peer process - * @param peer (IN) Peer addressing information. - * @return Status indicating if cleanup was successful - * - */ -extern int mca_ptl_sm_del_procs( - struct mca_ptl_base_module_t* ptl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_ptl_base_peer_t **peers -); - -/** - * PML->PTL Allocate a send request from the PTL modules free list. - * - * @param ptl (IN) PTL instance - * @param request (OUT) Pointer to allocated request. - * @return Status indicating if allocation was successful. - * - */ -extern int mca_ptl_sm_request_alloc( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_send_request_t* -); - -/** - * PML->PTL Return a send request to the PTL modules free list. - * - * @param ptl (IN) PTL instance - * @param request (IN) Pointer to allocated request. - * - */ -extern void mca_ptl_sm_request_return( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_send_request_t* -); - -/** - * PML->PTL Notification that a receive fragment has been matched. - * Called for message that is send from process with the virtual - * address of the shared memory segment being the same as that of - * the receiver. - * - * @param ptl (IN) PTL instance - * @param recv_frag (IN) Receive fragment - * - */ -extern void mca_ptl_sm_matched_same_base_addr( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_recv_frag_t* frag -); - -/** - * PML->PTL notification of change in the process list. - * - * @param ptl (IN) PTL instance - * @param proc (IN) Peer process - * @param peer (IN) Peer addressing information. - * @return Status indicating if cleanup was successful - * - */ -extern int mca_ptl_sm_del_procs( - struct mca_ptl_base_module_t* ptl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_ptl_base_peer_t **peers -); - -/** - * PML->PTL Allocate a send request from the PTL modules free list. - * - * @param ptl (IN) PTL instance - * @param request (OUT) Pointer to allocated request. - * @return Status indicating if allocation was successful. - * - */ -extern int mca_ptl_sm_request_alloc( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_send_request_t* -); - -/** - * PML->PTL Return a send request to the PTL modules free list. - * - * @param ptl (IN) PTL instance - * @param request (IN) Pointer to allocated request. - * - */ -extern void mca_ptl_sm_request_return( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_send_request_t* -); - -/** - * PML->PTL Notification that a receive fragment has been matched. - * Called for message that is send from process with the virtual - * address of the shared memory segment being different than that of - * the receiver. - * - * @param ptl (IN) PTL instance - * @param recv_frag (IN) Receive fragment - * - */ -extern void mca_ptl_sm_matched( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_recv_frag_t* frag -); - -/** - * PML->PTL Notification that a receive fragment has been matched. - * Called for message that is send from process with the virtual - * address of the shared memory segment being the same as that of - * the receiver. - * - * @param ptl (IN) PTL instance - * @param recv_frag (IN) Receive fragment - * - */ -extern void mca_ptl_sm_matched_common_base_addr( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_recv_frag_t* frag -); - -/** - * PML->PTL Initiate a send of the specified size. - * - * @param ptl (IN) PTL instance - * @param ptl_base_peer (IN) PTL peer addressing - * @param send_request (IN/OUT) Send request (allocated by PML via mca_ptl_base_request_alloc_fn_t) - * @param size (IN) Number of bytes PML is requesting PTL to deliver - * @param flags (IN) Flags that should be passed to the peer via the message header. - * @param request (OUT) OMPI_SUCCESS if the PTL was able to queue one or more fragments - */ -extern int mca_ptl_sm_send( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_peer_t* ptl_peer, - struct mca_ptl_base_send_request_t*, - size_t offset, - size_t size, - int flags -); - -/** - * PML->PTL send second and subsequent fragments - * - * @param ptl (IN) PTL instance - * @param ptl_base_peer (IN) PTL peer addressing - * @param send_request (IN/OUT) Send request (allocated by PML via mca_ptl_base_request_alloc_fn_t) - * @param size (IN) Number of bytes PML is requesting PTL to deliver - * @param flags (IN) Flags that should be passed to the peer via the message header. - * @param request (OUT) OMPI_SUCCESS if the PTL was able to queue one or more fragments - */ -extern int mca_ptl_sm_send_continue( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_peer_t* ptl_peer, - struct mca_ptl_base_send_request_t*, - size_t offset, - size_t size, - int flags -); - - -#if OMPI_ENABLE_PROGRESS_THREADS == 1 -void mca_ptl_sm_component_event_thread(opal_object_t*); -#endif - - -#if OMPI_ENABLE_PROGRESS_THREADS == 1 -#define MCA_PTL_SM_SIGNAL_PEER(peer) \ -{ \ - unsigned char cmd = DATA; \ - if(write(peer->fifo_fd, &cmd, sizeof(cmd)) != sizeof(cmd)) { \ - opal_output(0, "mca_ptl_sm_send: write fifo failed: errno=%d\n", errno); \ - } \ -} -#else -#define MCA_PTL_SM_SIGNAL_PEER(peer) -#endif - - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif - diff --git a/ompi/mca/ptl/sm/ptl_sm_address.h b/ompi/mca/ptl/sm/ptl_sm_address.h deleted file mode 100644 index fd15d51fc5..0000000000 --- a/ompi/mca/ptl/sm/ptl_sm_address.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_ADDRESS_H -#define MCA_PTL_ADDRESS_H - - -/* - * macro to convert virtual address, to address relative to a base - * offset - */ -#define RELATIVE_ADDRESS(A,B) (void *) ( (char *)(A) - \ - (size_t)(B) ) - - -#endif /* !ADDRESS */ - diff --git a/ompi/mca/ptl/sm/ptl_sm_component.c b/ompi/mca/ptl/sm/ptl_sm_component.c deleted file mode 100644 index f9d4e0b34a..0000000000 --- a/ompi/mca/ptl/sm/ptl_sm_component.c +++ /dev/null @@ -1,619 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#include -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H -#include -#endif /* HAVE_STRING_H */ -#ifdef HAVE_FCNTL_H -#include -#endif /* HAVE_FCNTL_H */ -#ifdef HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#ifdef HAVE_SYS_MMAN_H -#include -#endif /* HAVE_SYS_MMAN_H */ -#ifdef HAVE_SYS_STAT_H -#include /* for mkfifo */ -#endif /* HAVE_SYS_STAT_H */ -#include "ompi/constants.h" -#include "opal/sys/cache.h" -#include "opal/event/event.h" -#include "opal/util/if.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "orte/util/sys_info.h" -#include "orte/util/proc_info.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/pml/base/pml_base_sendreq.h" -#include "opal/mca/base/mca_base_param.h" -#include "ompi/mca/ptl/sm/ptl_sm.h" -#include "ompi/mca/mpool/base/base.h" -#include "orte/mca/oob/base/base.h" -#include "ptl_sm.h" -#include "ptl_sm_sendreq.h" -#include "ptl_sm_sendfrag.h" -#include "ptl_sm_recvfrag.h" -#include "ompi/mca/common/sm/common_sm_mmap.h" - - -/* - * Shared Memory (SM) component instance. - */ - -mca_ptl_sm_component_t mca_ptl_sm_component = { - { /* super is being filled in */ - /* First, the mca_base_component_t struct containing meta information - about the component itself */ - { - /* Indicate that we are a pml v1.0.0 component (which also implies a - specific MCA version) */ - MCA_PTL_BASE_VERSION_1_0_0, - "sm", /* MCA component name */ - OMPI_MAJOR_VERSION, /* MCA component major version */ - OMPI_MINOR_VERSION, /* MCA component minor version */ - OMPI_RELEASE_VERSION, /* MCA component release version */ - mca_ptl_sm_component_open, /* component open */ - mca_ptl_sm_component_close /* component close */ - }, - - /* Next the MCA v1.0.0 component meta data */ - { - /* Whether the component is checkpointable or not */ - false - }, - - mca_ptl_sm_component_init, - mca_ptl_sm_component_control, - mca_ptl_sm_component_progress, - } /* end super */ -}; - - -/* - * utility routines for parameter registration - */ - -static inline char* mca_ptl_sm_param_register_string( - const char* param_name, - const char* default_value) -{ - char *param_value; - int id = mca_base_param_register_string("ptl","sm",param_name,NULL,default_value); - mca_base_param_lookup_string(id, ¶m_value); - return param_value; -} - -static inline int mca_ptl_sm_param_register_int( - const char* param_name, - int default_value) -{ - int id = mca_base_param_register_int("ptl","sm",param_name,NULL,default_value); - int param_value = default_value; - mca_base_param_lookup_int(id,¶m_value); - return param_value; -} - - -/* - * Called by MCA framework to open the component, registers - * component parameters. - */ - -int mca_ptl_sm_component_open(void) -{ - /* register SM component parameters */ - mca_ptl_sm_component.sm_first_frag_free_list_num = - mca_ptl_sm_param_register_int("first_frag_free_list_num", 256); - mca_ptl_sm_component.sm_first_frag_free_list_max = - mca_ptl_sm_param_register_int("first_frag_free_list_max", -1); - mca_ptl_sm_component.sm_first_frag_free_list_inc = - mca_ptl_sm_param_register_int("first_frag_free_list_inc", 256); - mca_ptl_sm_component.sm_second_frag_free_list_num = - mca_ptl_sm_param_register_int("second_frag_free_list_num", 256); - mca_ptl_sm_component.sm_second_frag_free_list_max = - mca_ptl_sm_param_register_int("second_frag_free_list_max", -1); - mca_ptl_sm_component.sm_second_frag_free_list_inc = - mca_ptl_sm_param_register_int("second_frag_free_list_inc", 256); - mca_ptl_sm_component.sm_max_procs = - mca_ptl_sm_param_register_int("max_procs", -1); - mca_ptl_sm_component.sm_extra_procs = - mca_ptl_sm_param_register_int("sm_extra_procs", -1); - mca_ptl_sm_component.sm_mpool_name = - mca_ptl_sm_param_register_string("mpool", "sm"); - mca_ptl_sm_component.first_fragment_size = - mca_ptl_sm_param_register_int("first_fragment_size", 1024); - mca_ptl_sm_component.max_fragment_size = - mca_ptl_sm_param_register_int("max_fragment_size", 8*1024); - mca_ptl_sm_component.fragment_alignment = - mca_ptl_sm_param_register_int("fragment_alignment", - CACHE_LINE_SIZE); - mca_ptl_sm_component.size_of_cb_queue = - mca_ptl_sm_param_register_int("size_of_cb_queue", 128); - mca_ptl_sm_component.cb_lazy_free_freq = - mca_ptl_sm_param_register_int("cb_lazy_free_freq", 120); - /* make sure that queue size and lazy free frequency are consistent - - * want to make sure that slots are freed at a rate they can be - * reused, w/o allocating extra new circular buffer fifo arrays */ - if( (float)(mca_ptl_sm_component.cb_lazy_free_freq) >= - 0.95*(float)(mca_ptl_sm_component.size_of_cb_queue) ) { - /* upper limit */ - mca_ptl_sm_component.cb_lazy_free_freq= - (int)(0.95*(float)(mca_ptl_sm_component.size_of_cb_queue)); - /* lower limit */ - if( 0>= mca_ptl_sm_component.cb_lazy_free_freq ) { - mca_ptl_sm_component.cb_lazy_free_freq=1; - } - } - - /* default number of extra procs to allow for future growth */ - mca_ptl_sm_component.sm_extra_procs = - mca_ptl_sm_param_register_int("sm_extra_procs", 2); - - /* initialize objects */ - OBJ_CONSTRUCT(&mca_ptl_sm_component.sm_lock, opal_mutex_t); - OBJ_CONSTRUCT(&mca_ptl_sm_component.sm_send_requests, ompi_free_list_t); - OBJ_CONSTRUCT(&mca_ptl_sm_component.sm_first_frags, ompi_free_list_t); - OBJ_CONSTRUCT(&mca_ptl_sm_component.sm_second_frags, ompi_free_list_t); - OBJ_CONSTRUCT(&mca_ptl_sm_component.sm_pending_ack_lock, opal_mutex_t); - OBJ_CONSTRUCT(&mca_ptl_sm_component.sm_pending_ack, opal_list_t); - - return OMPI_SUCCESS; -} - - -/* - * component cleanup - sanity checking of queue lengths - */ - -int mca_ptl_sm_component_close(void) -{ - int return_value=OMPI_SUCCESS; - - OBJ_DESTRUCT(&mca_ptl_sm_component.sm_lock); - OBJ_DESTRUCT(&mca_ptl_sm_component.sm_send_requests); - OBJ_DESTRUCT(&mca_ptl_sm_component.sm_first_frags); - OBJ_DESTRUCT(&mca_ptl_sm_component.sm_second_frags); - OBJ_DESTRUCT(&mca_ptl_sm_component.sm_pending_ack_lock); - OBJ_DESTRUCT(&mca_ptl_sm_component.sm_pending_ack); - - /* unmap the shared memory control structure */ - if(mca_ptl_sm_component.mmap_file != NULL) { - return_value = mca_common_sm_mmap_fini( mca_ptl_sm_component.mmap_file ); - if(-1 == return_value) { - return_value=OMPI_ERROR; - opal_output(0," munmap failed :: file - %s :: errno - %d \n", - mca_ptl_sm_component.mmap_file->map_addr, - errno); - goto CLEANUP; - } - - /* unlink file, so that it will be deleted when all references - * to it are gone - no error checking, since we want all procs - * to call this, so that in an abnormal termination scanario, - * this file will still get cleaned up */ - unlink(mca_ptl_sm_component.mmap_file->map_path); - } - -#if OMPI_ENABLE_PROGRESS_THREADS == 1 - /* close/cleanup fifo create for event notification */ - if(mca_ptl_sm_component.sm_fifo_fd > 0) { - /* write a done message down the pipe */ - unsigned char cmd = DONE; - if( write(mca_ptl_sm_component.sm_fifo_fd,&cmd,sizeof(cmd)) != - sizeof(cmd)){ - opal_output(0, "mca_ptl_sm_component_close: write fifo failed: errno=%d\n", - errno); - } - opal_thread_join(&mca_ptl_sm_component.sm_fifo_thread, NULL); - close(mca_ptl_sm_component.sm_fifo_fd); - unlink(mca_ptl_sm_component.sm_fifo_path); - } -#endif - - -CLEANUP: - - /* return */ - return return_value; -} - - -/* - * SM component initialization - */ -mca_ptl_base_module_t** mca_ptl_sm_component_init( - int *num_ptls, - bool enable_progress_threads, - bool enable_mpi_threads) -{ - mca_ptl_base_module_t **ptls = NULL; - int i; - - *num_ptls = 0; - - /* lookup/create shared memory pool only when used */ - mca_ptl_sm_component.sm_mpool = NULL; - mca_ptl_sm_component.sm_mpool_base = NULL; - -#if OMPI_ENABLE_PROGRESS_THREADS == 1 - /* create a named pipe to receive events */ - sprintf(mca_ptl_sm_component.sm_fifo_path, - "%s/sm_fifo.%lu", orte_process_info.job_session_dir, - (unsigned long)orte_process_info.my_name->vpid); - if(mkfifo(mca_ptl_sm_component.sm_fifo_path, 0660) < 0) { - opal_output(0, "mca_ptl_sm_component_init: mkfifo failed with errno=%d\n",errno); - return NULL; - } - mca_ptl_sm_component.sm_fifo_fd = open(mca_ptl_sm_component.sm_fifo_path, O_RDWR); - if(mca_ptl_sm_component.sm_fifo_fd < 0) { - opal_output(0, "mca_ptl_sm_component_init: open(%s) failed with errno=%d\n", - mca_ptl_sm_component.sm_fifo_path, errno); - return NULL; - } - - OBJ_CONSTRUCT(&mca_ptl_sm_component.sm_fifo_thread, opal_thread_t); - mca_ptl_sm_component.sm_fifo_thread.t_run = (opal_thread_fn_t) mca_ptl_sm_component_event_thread; - opal_thread_start(&mca_ptl_sm_component.sm_fifo_thread); -#endif - - /* allocate the Shared Memory PTL */ - *num_ptls = 2; - ptls = malloc((*num_ptls)*sizeof(mca_ptl_base_module_t*)); - if (NULL == ptls) { - return NULL; - } - - /* get pointer to the ptls */ - ptls[0] = (mca_ptl_base_module_t *)(&(mca_ptl_sm[0])); - ptls[1] = (mca_ptl_base_module_t *)(&(mca_ptl_sm[1])); - - /* set scheduling parameters */ - for( i=0 ; i < 2 ; i++ ) { - mca_ptl_sm[i].super.ptl_cache_size=mca_ptl_sm_component.sm_first_frag_free_list_max; - mca_ptl_sm[i].super.ptl_cache_bytes=sizeof(mca_ptl_sm_send_request_t) - - sizeof(mca_ptl_base_send_request_t); - mca_ptl_sm[i].super.ptl_first_frag_size=mca_ptl_sm_component.first_fragment_size; - mca_ptl_sm[i].super.ptl_min_frag_size=mca_ptl_sm_component.max_fragment_size; - mca_ptl_sm[i].super.ptl_max_frag_size=mca_ptl_sm_component.max_fragment_size; - mca_ptl_sm[i].super.ptl_exclusivity=100; /* always use this ptl */ - mca_ptl_sm[i].super.ptl_latency=100; /* lowest latency */ - mca_ptl_sm[i].super.ptl_bandwidth=900; /* not really used now since - exclusivity is set to 100 */ - } - - /* initialize some PTL data */ - /* start with no SM procs */ - mca_ptl_sm_component.num_smp_procs = 0; - mca_ptl_sm_component.my_smp_rank = 0xFFFFFFFF; /* not defined */ - - /* set flag indicating ptl not inited */ - mca_ptl_sm[0].ptl_inited=false; - mca_ptl_sm[1].ptl_inited=false; - - return ptls; -} - -/* - * SM component control - */ - -int mca_ptl_sm_component_control(int param, void* value, size_t size) -{ - switch(param) { - case MCA_PTL_ENABLE: - break; - default: - break; - } - return OMPI_SUCCESS; -} - - -/* - * SM component progress. - */ - -#if OMPI_ENABLE_PROGRESS_THREADS == 1 -void mca_ptl_sm_component_event_thread(opal_object_t* thread) -{ - while(1) { - unsigned char cmd; - if(read(mca_ptl_sm_component.sm_fifo_fd, &cmd, sizeof(cmd)) != sizeof(cmd)) { - /* error condition */ - return; - } - if( DONE == cmd ){ - /* return when done message received */ - return; - } - mca_ptl_sm_component_progress(0); - } -} -#endif - - -int mca_ptl_sm_component_progress(mca_ptl_tstamp_t tstamp) -{ - /* local variables */ - int my_local_smp_rank, proc; - unsigned int peer_local_smp_rank ; - mca_ptl_sm_frag_t *header_ptr; - ompi_fifo_t *send_fifo = NULL; - bool frag_matched; - mca_ptl_base_match_header_t *matching_header; - mca_ptl_base_send_request_t *base_send_req; - opal_list_item_t *item; - int return_status = 0; - - my_local_smp_rank=mca_ptl_sm_component.my_smp_rank; - - /* send progress is made by the PML */ - - /* - * receive progress - */ - - /* poll each fifo */ - - /* loop over fifo's - procs with same base shared memory - * virtual address as this process */ - for( proc=0 ; proc < mca_ptl_sm_component.num_smp_procs_same_base_addr - ; proc++ ) - { - peer_local_smp_rank= - mca_ptl_sm_component.list_smp_procs_same_base_addr[proc]; - - send_fifo=&(mca_ptl_sm_component.fifo - [peer_local_smp_rank][my_local_smp_rank]); - - /* if fifo is not yet setup - continue - not data has been sent*/ - if(OMPI_CB_FREE == send_fifo->tail){ - continue; - } - - /* aquire thread lock */ - if( opal_using_threads() ) { - opal_atomic_lock( &(send_fifo->tail_lock) ); - } - - /* get pointer - pass in offset to change queue pointer - * addressing from that of the sender */ - header_ptr = (mca_ptl_sm_frag_t *) - ompi_fifo_read_from_tail_same_base_addr( send_fifo ); - if( OMPI_CB_FREE == header_ptr ) { - /* release thread lock */ - if( opal_using_threads() ) { - opal_atomic_unlock(&(send_fifo->tail_lock)); - } - continue; - } - - /* release thread lock */ - if( opal_using_threads() ) { - opal_atomic_unlock(&(send_fifo->tail_lock)); - } - - /* figure out what type of message this is */ - return_status++; - switch - (header_ptr->super.frag_base.frag_header.hdr_common.hdr_type) - { - - case MCA_PTL_HDR_TYPE_MATCH: - /* set the owning ptl */ - header_ptr->super.frag_base.frag_owner= - (mca_ptl_base_module_t *) (&mca_ptl_sm[0]); - /* attempt match */ - matching_header= &(header_ptr->super.frag_base.frag_header.hdr_match); - frag_matched = header_ptr->super.frag_base.frag_owner->ptl_match( - header_ptr->super.frag_base.frag_owner, &(header_ptr->super), - matching_header ); - break; - - case MCA_PTL_HDR_TYPE_FRAG: - /* set the owning ptl */ - header_ptr->super.frag_base.frag_owner= - (mca_ptl_base_module_t *) (&mca_ptl_sm[0]); - /* second and beyond fragment - just need to deliver - * the data, and ack */ - mca_ptl_sm_matched_same_base_addr( - (mca_ptl_base_module_t *)&mca_ptl_sm, - (mca_ptl_base_recv_frag_t *)header_ptr); - break; - - case MCA_PTL_HDR_TYPE_ACK: - /* ack */ - /* update the send statistics */ - /* NOTE !!! : need to change the update stats, - * so that MPI_Wait/Test on the send can complete - * as soon as the data is copied intially into - * the shared memory buffers */ - - header_ptr->send_ptl->ptl_send_progress( - (mca_ptl_base_module_t *)&mca_ptl_sm, - header_ptr->send_req, - header_ptr->super.frag_base.frag_size); - - /* if this is not the first fragment, recycle - * resources. The first fragment is handled by - * the PML */ - if( 0 < header_ptr->send_offset ) { - OMPI_FREE_LIST_RETURN(&mca_ptl_sm_component.sm_second_frags, - (opal_list_item_t *)header_ptr); - } - break; - - default: - fprintf(stderr," Warnning: mca_ptl_sm_component_progress - unrecognized fragment type \n"); - fflush(stderr); - - } - - } /* end peer_local_smp_rank loop */ - - /* loop over fifo's - procs with different base shared memory - * virtual address as this process */ - for( proc=0 ; proc < mca_ptl_sm_component.num_smp_procs_different_base_addr - ; proc++ ) - { - peer_local_smp_rank= - mca_ptl_sm_component.list_smp_procs_different_base_addr[proc]; - - send_fifo=&(mca_ptl_sm_component.fifo - [peer_local_smp_rank][my_local_smp_rank]); - - /* if fifo is not yet setup - continue - not data has been sent*/ - if(OMPI_CB_FREE == send_fifo->tail){ - continue; - } - - /* aquire thread lock */ - if( opal_using_threads() ) { - opal_atomic_lock(&(send_fifo->tail_lock)); - } - - /* get pointer - pass in offset to change queue pointer - * addressing from that of the sender */ - header_ptr=(mca_ptl_sm_frag_t *)ompi_fifo_read_from_tail( send_fifo, - mca_ptl_sm_component.sm_offset[peer_local_smp_rank]); - if( OMPI_CB_FREE == header_ptr ) { - /* release thread lock */ - if( opal_using_threads() ) { - opal_atomic_unlock(&(send_fifo->tail_lock)); - } - continue; - } - - /* release thread lock */ - if( opal_using_threads() ) { - opal_atomic_unlock(&(send_fifo->tail_lock)); - } - - /* change the address from address relative to the shared - * memory address, to a true virtual address */ - header_ptr = (mca_ptl_sm_frag_t *)( (char *)header_ptr+ - mca_ptl_sm_component.sm_offset[peer_local_smp_rank]); - - - /* figure out what type of message this is */ - return_status++; - switch - (header_ptr->super.frag_base.frag_header.hdr_common.hdr_type) - { - - case MCA_PTL_HDR_TYPE_MATCH: - /* set the owning ptl */ - header_ptr->super.frag_base.frag_owner= - (mca_ptl_base_module_t *) (&mca_ptl_sm[1]); - /* attempt match */ - matching_header= &(header_ptr->super.frag_base.frag_header.hdr_match); - frag_matched = header_ptr->super.frag_base.frag_owner->ptl_match( - header_ptr->super.frag_base.frag_owner, &(header_ptr->super), - matching_header ); - break; - - case MCA_PTL_HDR_TYPE_FRAG: - /* set the owning ptl */ - header_ptr->super.frag_base.frag_owner= - (mca_ptl_base_module_t *) (&mca_ptl_sm[1]); - /* second and beyond fragment - just need to deliver - * the data, and ack */ - mca_ptl_sm_matched((mca_ptl_base_module_t *)&mca_ptl_sm, - (mca_ptl_base_recv_frag_t *)header_ptr); - break; - - case MCA_PTL_HDR_TYPE_ACK: - /* ack */ - /* update the send statistics */ - /* NOTE !!! : need to change the update stats, - * so that MPI_Wait/Test on the send can complete - * as soon as the data is copied intially into - * the shared memory buffers */ - base_send_req=header_ptr->super.frag_base.frag_header. - hdr_rndv.hdr_src_ptr.pval; - - header_ptr->send_ptl->ptl_send_progress( - (mca_ptl_base_module_t *)&mca_ptl_sm, - base_send_req, - header_ptr->super.frag_base.frag_size); - - /* if this is not the first fragment, recycle - * resources. The first fragment is handled by - * the PML */ - if( 0 < header_ptr->send_offset ) { - OMPI_FREE_LIST_RETURN(&mca_ptl_sm_component.sm_second_frags, - (opal_list_item_t *)header_ptr); - } - break; - - default: - fprintf(stderr," Warnning: mca_ptl_sm_component_progress - unrecognized fragment type \n"); - fflush(stderr); - - } - - } /* end peer_local_smp_rank loop */ - - - /* progress acks */ - if( !opal_list_is_empty(&(mca_ptl_sm_component.sm_pending_ack)) ) { - - OPAL_THREAD_LOCK(&(mca_ptl_sm_component.sm_pending_ack_lock)); - - /* remove ack from list - need to remove from list before - * sending the ack, so that when the ack is recieved, - * manipulated, and put on a new list, it is not also - * on a different list */ - item = opal_list_remove_first(&(mca_ptl_sm_component.sm_pending_ack)); - while ( item != opal_list_get_end(&(mca_ptl_sm_component.sm_pending_ack)) ) { - int rc; - /* get fragment pointer */ - header_ptr = (mca_ptl_sm_frag_t *)item; - - /* try and send an ack - no need to check and see if a send - * queue has been allocated, since entries are put here only - * if the queue was previously full */ - - /* fragment already marked as an ack */ - - rc=ompi_fifo_write_to_head_same_base_addr(header_ptr, - send_fifo, mca_ptl_sm_component.sm_mpool); - - /* if ack failed, break */ - if( 0 > rc ) { - /* put the descriptor back on the list */ - opal_list_prepend(&(mca_ptl_sm_component.sm_pending_ack),item); - break; - } - MCA_PTL_SM_SIGNAL_PEER(mca_ptl_sm_component.sm_peers[header_ptr->queue_index]); - - /* get next fragment to ack */ - item = opal_list_remove_first(&(mca_ptl_sm_component.sm_pending_ack)); - - } - - OPAL_THREAD_UNLOCK(&(mca_ptl_sm_component.sm_pending_ack_lock)); - } - return return_status; -} diff --git a/ompi/mca/ptl/sm/ptl_sm_frag.c b/ompi/mca/ptl/sm/ptl_sm_frag.c deleted file mode 100644 index 8c0a77ce4c..0000000000 --- a/ompi/mca/ptl/sm/ptl_sm_frag.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#ifdef HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#include "ptl_sm.h" -#include "ptl_sm_frag.h" -#include "ptl_sm_address.h" - - -static void mca_ptl_sm_first_frag_construct(mca_ptl_sm_frag_t* frag); -static void mca_ptl_sm_first_frag_destruct(mca_ptl_sm_frag_t* frag); -static void mca_ptl_sm_second_frag_construct(mca_ptl_sm_frag_t* frag); -static void mca_ptl_sm_second_frag_destruct(mca_ptl_sm_frag_t* frag); - -OBJ_CLASS_INSTANCE( - mca_ptl_sm_frag_t, - mca_ptl_base_recv_frag_t, - mca_ptl_sm_first_frag_construct, - mca_ptl_sm_first_frag_destruct -); - -OBJ_CLASS_INSTANCE( - mca_ptl_sm_second_frag_t, - mca_ptl_base_recv_frag_t, - mca_ptl_sm_second_frag_construct, - mca_ptl_sm_second_frag_destruct -); - - -/* - * shared memory recv fragment constructor - */ - -static void mca_ptl_sm_first_frag_construct(mca_ptl_sm_frag_t* frag) -{ - char *ptr; - - /* set the buffer length */ - frag->buff_length=(size_t)mca_ptl_sm_component.first_fragment_size; - - /* set local rank */ - frag->queue_index=mca_ptl_sm_component.my_smp_rank; - - /* set pointer to the sending ptl */ - frag->send_ptl=(mca_ptl_base_module_t *)(&mca_ptl_sm); - - /* set buffer pointer */ - ptr=((char *)frag)+sizeof(mca_ptl_sm_frag_t)+ - mca_ptl_sm_component.fragment_alignment; - /* align */ - ptr=ptr-(((size_t)ptr)%(mca_ptl_sm_component.fragment_alignment)); - frag->buff=ptr; -} - - -/* - * shared memory recv fragment destructor - */ - -static void mca_ptl_sm_first_frag_destruct(mca_ptl_sm_frag_t* frag) -{ -} - -/* - * shared memory second and above fragments - */ - -static void mca_ptl_sm_second_frag_construct(mca_ptl_sm_frag_t* frag) -{ - char *ptr; - - /* set the buffer length */ - frag->buff_length=(size_t)mca_ptl_sm_component.max_fragment_size; - - /* set local rank */ - frag->queue_index=mca_ptl_sm_component.my_smp_rank; - - /* set pointer to the sending ptl */ - frag->send_ptl=(mca_ptl_base_module_t *)(&mca_ptl_sm); - - /* set buffer pointer */ - ptr=((char *)frag)+sizeof(mca_ptl_sm_frag_t)+ - mca_ptl_sm_component.fragment_alignment; - /* align */ - ptr=ptr-(((size_t)ptr)%(mca_ptl_sm_component.fragment_alignment)); - frag->buff=ptr; -} - - -/* - * shared memory second and above fragments - */ - -static void mca_ptl_sm_second_frag_destruct(mca_ptl_sm_frag_t* frag) -{ -} - diff --git a/ompi/mca/ptl/sm/ptl_sm_frag.h b/ompi/mca/ptl/sm/ptl_sm_frag.h deleted file mode 100644 index 577bda0bfa..0000000000 --- a/ompi/mca/ptl/sm/ptl_sm_frag.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PTL_SM_RECV_FRAG_H -#define MCA_PTL_SM_RECV_FRAG_H - -#include -#include -#include "opal/sys/atomic.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" -#include "ptl_sm.h" - - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif -OBJ_CLASS_DECLARATION(mca_ptl_sm_frag_t); - -OBJ_CLASS_DECLARATION(mca_ptl_sm_second_frag_t); - - -/** - * shared memory received fragment derived type - because of - * the way lists are initialized in Open MPI, this is good - * only for the first fragment. - */ -struct mca_ptl_sm_frag_t { - mca_ptl_base_recv_frag_t super; /**< base receive fragment descriptor */ - size_t buff_length; /**< size of buffer */ - int queue_index; /**< local process index, cached for fast - acking */ - struct mca_ptl_base_module_t* send_ptl; /**< PTL that is selected for first fragment */ - struct mca_ptl_base_send_request_t* send_req; - size_t send_offset; - void *buff; /**< pointer to buffer */ -}; -typedef struct mca_ptl_sm_frag_t mca_ptl_sm_frag_t; - -/** - * shared memory received fragment derived type - because of - * the way lists are initialized in Open MPI, this is good - * only for the second and beyond fragments. - */ -typedef struct mca_ptl_sm_frag_t mca_ptl_sm_second_frag_t; -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif - diff --git a/ompi/mca/ptl/sm/ptl_sm_peer.h b/ompi/mca/ptl/sm/ptl_sm_peer.h deleted file mode 100644 index 3438980e01..0000000000 --- a/ompi/mca/ptl/sm/ptl_sm_peer.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_SM_PEER_H -#define MCA_PTL_SM_PEER_H - -#if OMPI_ENABLE_PROGRESS_THREADS == 1 -#include "opal/event/event.h" -#endif - -/** - * An abstraction that represents a connection to a peer process. - * An instance of mca_ptl_base_peer_t is associated w/ each process - * and PTL pair at startup. - */ - -struct mca_ptl_base_peer_t { - int my_smp_rank; /**< My SMP process rank. Used for accessing - * SMP specfic data structures. */ - int peer_smp_rank; /**< My peer's SMP process rank. Used for accessing - * SMP specfic data structures. */ -#if OMPI_ENABLE_PROGRESS_THREADS == 1 - int fifo_fd; /**< pipe/fifo used to signal peer that data is queued */ -#endif -}; - -#endif - diff --git a/ompi/mca/ptl/sm/ptl_sm_recvfrag.c b/ompi/mca/ptl/sm/ptl_sm_recvfrag.c deleted file mode 100644 index 64555b3633..0000000000 --- a/ompi/mca/ptl/sm/ptl_sm_recvfrag.c +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#include -#include "ptl_sm.h" -#include "ptl_sm_recvfrag.h" - - -static void mca_ptl_sm_recv_frag_construct(mca_ptl_sm_recv_frag_t* frag); -static void mca_ptl_sm_recv_frag_destruct(mca_ptl_sm_recv_frag_t* frag); - -OBJ_CLASS_INSTANCE( - mca_ptl_sm_recv_frag_t, - mca_ptl_base_recv_frag_t, - mca_ptl_sm_recv_frag_construct, - mca_ptl_sm_recv_frag_destruct -); - - -/* - * shared memory recv fragment constructor - */ - -static void mca_ptl_sm_recv_frag_construct(mca_ptl_sm_recv_frag_t* frag) -{ -} - - -/* - * shared memory recv fragment destructor - */ - -static void mca_ptl_sm_recv_frag_destruct(mca_ptl_sm_recv_frag_t* frag) -{ -} - diff --git a/ompi/mca/ptl/sm/ptl_sm_recvfrag.h b/ompi/mca/ptl/sm/ptl_sm_recvfrag.h deleted file mode 100644 index b0a27a910c..0000000000 --- a/ompi/mca/ptl/sm/ptl_sm_recvfrag.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PTL_SM_RECV_FRAG_H -#define MCA_PTL_SM_RECV_FRAG_H - -#include -#include -#include "opal/sys/atomic.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" -#include "ptl_sm.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -OBJ_CLASS_DECLARATION(mca_ptl_sm_recv_frag_t); - - -/** - * shared memory received fragment derived type. - */ -struct mca_ptl_sm_recv_frag_t { - mca_ptl_base_recv_frag_t super; /**< base receive fragment descriptor */ -}; -typedef struct mca_ptl_sm_recv_frag_t mca_ptl_sm_recv_frag_t; - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif - diff --git a/ompi/mca/ptl/sm/ptl_sm_send.c b/ompi/mca/ptl/sm/ptl_sm_send.c deleted file mode 100644 index 77872f8189..0000000000 --- a/ompi/mca/ptl/sm/ptl_sm_send.c +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#include -#include -#include - -#include "opal/util/output.h" -#include "opal/util/if.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/pml/base/pml_base_sendreq.h" -#include "ompi/mca/pml/base/pml_base_recvreq.h" -#include "ompi/mca/ptl/base/ptl_base_header.h" -#include "ompi/mca/ptl/base/ptl_base_sendfrag.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" -#include "ompi/mca/common/sm/common_sm_mmap.h" -#include "ptl_sm.h" -#include "orte/util/sys_info.h" -#include "ompi/mca/ptl/sm/ptl_sm_peer.h" -#include "ompi/mca/common/sm/common_sm_mmap.h" -#include "orte/util/proc_info.h" -#include "opal/util/printf.h" -#include "ompi/mca/ptl/sm/ptl_sm_sendreq.h" -#include "ompi/class/ompi_fifo.h" -#include "ompi/class/ompi_free_list.h" -#include "opal/threads/mutex.h" -#include "ompi/datatype/datatype.h" - - -/* - * If we're "recompiling" (see the Makefile.am for an explanation), - * change the function names. - */ - -#ifdef SM_COMMON_BASE_ADDR -#define mca_ptl_sm_matched mca_ptl_sm_matched_same_base_addr -#endif - -/* - * A posted receive has been matched: - * - deliver data to user buffers - * - update receive request data - * - ack - * - * fragment lists are NOT manipulated. - */ - -void mca_ptl_sm_matched( - mca_ptl_base_module_t* ptl, - mca_ptl_base_recv_frag_t* frag) -{ - mca_ptl_base_recv_request_t* recv_desc; - mca_ptl_sm_frag_t *sm_frag_desc; - struct iovec iov; - ompi_convertor_t frag_convertor; - int free_after,my_local_smp_rank,peer_local_smp_rank, return_status; - unsigned int iov_count; - size_t max_data; - ompi_fifo_t *send_fifo; - - /* copy data from shared memory buffer to user buffer */ - /* get pointer to the matched receive descriptor */ - recv_desc = frag->frag_request; - sm_frag_desc = (mca_ptl_sm_frag_t *)frag; - - my_local_smp_rank=mca_ptl_sm_component.my_smp_rank; - peer_local_smp_rank=sm_frag_desc->queue_index; - - /* copy, only if there is data to copy */ - max_data=0; - if( 0 < sm_frag_desc->super.frag_base.frag_size ) { - - /* - * Initialize convertor and use it to unpack data - */ - ompi_convertor_clone_with_position( &(recv_desc->req_recv.req_convertor), &frag_convertor, - 1, &(sm_frag_desc->send_offset) ); - - /* convert address from sender's address space to my virtual - * address space */ -#ifdef SM_COMMON_BASE_ADDR - iov.iov_base = (void *)( (char *)sm_frag_desc->buff); -#else - iov.iov_base = (void *)( (char *)sm_frag_desc->buff+ - mca_ptl_sm_component.sm_offset[peer_local_smp_rank]); -#endif - iov.iov_len = sm_frag_desc->super.frag_base.frag_size; - iov_count = 1; - max_data = iov.iov_len; - ompi_convertor_unpack( &frag_convertor, &iov, &iov_count, &max_data, &free_after ); - } - - /* update receive request information */ - frag->frag_base.frag_owner->ptl_recv_progress( - ptl, - recv_desc, - sm_frag_desc->super.frag_base.frag_size, - max_data); - - /* ack - ack recycles shared memory fragment resources, so - * don't agragate */ - - send_fifo=&(mca_ptl_sm_component.fifo - [my_local_smp_rank][peer_local_smp_rank]); - - /* lock as multiple processes can attempt to init the head */ - if(opal_using_threads()) - opal_atomic_lock(&send_fifo->head_lock); - - /* check to see if fifo is allocated */ - if(OMPI_CB_FREE == send_fifo->head) { - /* no queues have been allocated - allocate now */ - return_status=ompi_fifo_init_same_base_addr( - mca_ptl_sm_component.size_of_cb_queue, - mca_ptl_sm_component.cb_lazy_free_freq, - /* at this stage we are not doing anything with memory - * locality */ - 0,0,0, - send_fifo, mca_ptl_sm_component.sm_mpool); - if( return_status != OMPI_SUCCESS ) { - if(opal_using_threads()) - opal_atomic_unlock(&send_fifo->head_lock); - return; - } - } - - /* change address to be relative to offset from base of shared - * memory segment - */ - - /* set the fragment type to be an ack */ - sm_frag_desc->super.frag_base.frag_header.hdr_common.hdr_type= - MCA_PTL_HDR_TYPE_ACK; - return_status=ompi_fifo_write_to_head_same_base_addr(sm_frag_desc, - send_fifo, mca_ptl_sm_component.sm_mpool); - - if(opal_using_threads()) - opal_atomic_unlock(&send_fifo->head_lock); - - /* if can't ack, put on list for later delivery */ - if( 0 > return_status ) { - OPAL_THREAD_LOCK(&(mca_ptl_sm_component.sm_pending_ack_lock)); - opal_list_append(&(mca_ptl_sm_component.sm_pending_ack), - (opal_list_item_t *)sm_frag_desc); - OPAL_THREAD_UNLOCK(&(mca_ptl_sm_component.sm_pending_ack_lock)); - } else { - MCA_PTL_SM_SIGNAL_PEER(mca_ptl_sm_component.sm_peers[peer_local_smp_rank]); - } - - - /* return */ - return; -} diff --git a/ompi/mca/ptl/sm/ptl_sm_sendfrag.c b/ompi/mca/ptl/sm/ptl_sm_sendfrag.c deleted file mode 100644 index 904bbaffa4..0000000000 --- a/ompi/mca/ptl/sm/ptl_sm_sendfrag.c +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#ifdef HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#include "ompi/types.h" -#include "ompi/datatype/datatype.h" -#include "ptl_sm.h" -#include "ptl_sm_sendfrag.h" - - -static void mca_ptl_sm_send_frag_construct(mca_ptl_sm_send_frag_t* frag); -static void mca_ptl_sm_send_frag_destruct(mca_ptl_sm_send_frag_t* frag); - - -OBJ_CLASS_INSTANCE( - mca_ptl_sm_send_frag_t, - mca_ptl_base_send_frag_t, - mca_ptl_sm_send_frag_construct, - mca_ptl_sm_send_frag_destruct); - - -/* - * Placeholders for send fragment constructor/destructors. - */ - -static void mca_ptl_sm_send_frag_construct(mca_ptl_sm_send_frag_t* frag) -{ -} - - -static void mca_ptl_sm_send_frag_destruct(mca_ptl_sm_send_frag_t* frag) -{ -} - diff --git a/ompi/mca/ptl/sm/ptl_sm_sendfrag.h b/ompi/mca/ptl/sm/ptl_sm_sendfrag.h deleted file mode 100644 index b060a50518..0000000000 --- a/ompi/mca/ptl/sm/ptl_sm_sendfrag.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_SM_SEND_FRAG_H -#define MCA_PTL_SM_SEND_FRAG_H - -#include -#include "ompi_config.h" -#include "ompi/mca/ptl/base/ptl_base_sendreq.h" -#include "ompi/mca/ptl/base/ptl_base_sendfrag.h" -#include "ptl_sm.h" - - -OBJ_CLASS_DECLARATION(mca_ptl_sm_send_frag_t); - - -/** - * shared memory send fragment derived type. - */ -struct mca_ptl_sm_send_frag_t { - mca_ptl_base_send_frag_t super; /**< base send fragment descriptor */ -}; -typedef struct mca_ptl_sm_send_frag_t mca_ptl_sm_send_frag_t; - - -#define MCA_PTL_SM_SEND_FRAG_ALLOC(item, rc) \ - OMPI_FREE_LIST_GET(&mca_ptl_sm_module.sm_send_frags, item, rc); - - -/** - * Initialize a fragment descriptor. - * - * frag (IN) Fragment - * peer (IN) PTL peer addressing information - * request (IN) Send request - * offset (IN) Current offset into packed buffer - * size (IN/OUT) Requested size / actual size returned - * flags (IN) - */ - -int mca_ptl_sm_send_frag_init( - mca_ptl_sm_send_frag_t*, - struct mca_ptl_base_peer_t*, - struct mca_ptl_base_send_request_t*, - size_t offset, - size_t* size, - int flags); - - -#endif - diff --git a/ompi/mca/ptl/sm/ptl_sm_sendreq.c b/ompi/mca/ptl/sm/ptl_sm_sendreq.c deleted file mode 100644 index 718badee86..0000000000 --- a/ompi/mca/ptl/sm/ptl_sm_sendreq.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#ifdef HAVE_SYS_TYPES_H -#include -#endif /* HAVE_SYS_TYPES_H */ -#include "ompi/types.h" -#include "ompi/mca/pml/base/pml_base_sendreq.h" -#include "ptl_sm.h" -#include "ptl_sm_sendreq.h" -#include "ptl_sm_address.h" - - -static void mca_ptl_sm_send_request_construct(mca_ptl_sm_send_request_t*); -static void mca_ptl_sm_send_request_destruct(mca_ptl_sm_send_request_t*); - - -OBJ_CLASS_INSTANCE( - mca_ptl_sm_send_request_t, - mca_ptl_base_send_request_t, - mca_ptl_sm_send_request_construct, - mca_ptl_sm_send_request_destruct -); - - -/* constructor for the shared memory send descriptor */ -void mca_ptl_sm_send_request_construct(mca_ptl_sm_send_request_t* request) -{ - OBJ_CONSTRUCT(&request->req_frag, mca_ptl_sm_frag_t); -} - - -/* desnstructor for the shared memory send descriptor */ -void mca_ptl_sm_send_request_destruct(mca_ptl_sm_send_request_t* request) -{ - OBJ_DESTRUCT(&request->req_frag); -} - -/* initializtion function to be called when a new shared - * memory send request is initialized. This will attempt - * to allocate fragment descriptor and payload memory - */ -int mca_ptl_sm_send_request_init(struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_send_request_t* request) -{ - - mca_ptl_sm_send_request_t *sm_request; - mca_ptl_sm_t *ptl_sm; - - int return_value=OMPI_SUCCESS; - - /* cast to shared memory send descriptor */ - sm_request=(mca_ptl_sm_send_request_t *)request; - - /* cast to shared memory ptl */ - ptl_sm=(mca_ptl_sm_t *)ptl; - - /* get first fragment descritor from free list - the pointer - * returned is valid only in this process, since different - * processes may have different base addresses - */ - sm_request->req_frag=(mca_ptl_sm_frag_t *)opal_list_get_first( - (void *)&(mca_ptl_sm_component.sm_first_frags)); - if(NULL == sm_request->req_frag){ - return_value=OMPI_ERR_OUT_OF_RESOURCE; - } - - return return_value; -} - diff --git a/ompi/mca/ptl/sm/ptl_sm_sendreq.h b/ompi/mca/ptl/sm/ptl_sm_sendreq.h deleted file mode 100644 index 33c8629e11..0000000000 --- a/ompi/mca/ptl/sm/ptl_sm_sendreq.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PTL_SM_SEND_REQUEST_H -#define MCA_PTL_SM_SEND_REQUEST_H - -#include -#include "ompi_config.h" -#include "ompi/mca/ptl/base/ptl_base_sendreq.h" -#include "ptl_sm_frag.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif -OBJ_CLASS_DECLARATION(mca_ptl_sm_send_request_t); - - -/** - * Shared Memory (SM) send request derived type. The send request contains - * both base send request, and a pointer to the first fragment descriptor. - */ -struct mca_ptl_sm_send_request_t { - - /* base send descriptor */ - mca_ptl_base_send_request_t super; - - /* pointer to first fragment descriptor */ - mca_ptl_sm_frag_t *req_frag; -}; -typedef struct mca_ptl_sm_send_request_t mca_ptl_sm_send_request_t; - -/** - * initializtion function to be called when a new shared - * memory send request is initialized. - */ -int mca_ptl_sm_send_request_init(struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_send_request_t* request); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif - diff --git a/ompi/mca/ptl/tcp/Makefile.am b/ompi/mca/ptl/tcp/Makefile.am deleted file mode 100644 index 13a72313dd..0000000000 --- a/ompi/mca/ptl/tcp/Makefile.am +++ /dev/null @@ -1,67 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Use the top-level Makefile.options - - - -sources = \ - ptl_tcp.c \ - ptl_tcp.h \ - ptl_tcp_addr.h \ - ptl_tcp_component.c \ - ptl_tcp_peer.c \ - ptl_tcp_peer.h \ - ptl_tcp_proc.c \ - ptl_tcp_proc.h \ - ptl_tcp_recvfrag.c \ - ptl_tcp_recvfrag.h \ - ptl_tcp_sendfrag.c \ - ptl_tcp_sendfrag.h \ - ptl_tcp_sendreq.c \ - ptl_tcp_sendreq.h - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if OMPI_BUILD_ptl_tcp_DSO -lib = -lib_sources = -component = mca_ptl_tcp.la -component_sources = $(sources) -else -lib = libmca_ptl_tcp.la -lib_sources = $(sources) -component = -component_sources = -endif - -mcacomponentdir = $(libdir)/openmpi -mcacomponent_LTLIBRARIES = $(component) -mca_ptl_tcp_la_SOURCES = $(component_sources) -mca_ptl_tcp_la_LDFLAGS = -module -avoid-version -mca_ptl_tcp_la_LIBADD = \ - $(top_ompi_builddir)/ompi/libmpi.la \ - $(top_ompi_builddir)/orte/liborte.la \ - $(top_ompi_builddir)/opal/libopal.la - -noinst_LTLIBRARIES = $(lib) -libmca_ptl_tcp_la_SOURCES = $(lib_sources) -libmca_ptl_tcp_la_LDFLAGS = -module -avoid-version - diff --git a/ompi/mca/ptl/tcp/configure.m4 b/ompi/mca/ptl/tcp/configure.m4 deleted file mode 100644 index 54e7fb762f..0000000000 --- a/ompi/mca/ptl/tcp/configure.m4 +++ /dev/null @@ -1,31 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_ptl_tcp_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_ptl_tcp_CONFIG],[ - # check for sockaddr_in (a good sign we have TCP) - AC_CHECK_TYPES([struct sockaddr_in], - [$1], - [$2], - [AC_INCLUDES_DEFAULT -#ifdef HAVE_NETINET_IN_H -#include -#endif]) -])dnl diff --git a/ompi/mca/ptl/tcp/configure.params b/ompi/mca/ptl/tcp/configure.params deleted file mode 100644 index d85574b1f9..0000000000 --- a/ompi/mca/ptl/tcp/configure.params +++ /dev/null @@ -1,24 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Specific to this module - -PARAM_INIT_FILE=ptl_tcp.c -PARAM_CONFIG_HEADER_FILE="tcp_config.h" -PARAM_CONFIG_FILES="Makefile" diff --git a/ompi/mca/ptl/tcp/ptl_tcp.c b/ompi/mca/ptl/tcp/ptl_tcp.c deleted file mode 100644 index e28205b91f..0000000000 --- a/ompi/mca/ptl/tcp/ptl_tcp.c +++ /dev/null @@ -1,307 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include -#include "ompi/class/ompi_bitmap.h" -#include "opal/util/output.h" -#include "opal/util/if.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/ptl/base/ptl_base_header.h" -#include "ompi/mca/ptl/base/ptl_base_sendreq.h" -#include "ompi/mca/ptl/base/ptl_base_sendfrag.h" -#include "ompi/mca/ptl/base/ptl_base_recvreq.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" -#include "ptl_tcp.h" -#include "ptl_tcp_addr.h" -#include "ptl_tcp_peer.h" -#include "ptl_tcp_proc.h" -#include "ptl_tcp_sendreq.h" -#include "ptl_tcp_recvfrag.h" - - -mca_ptl_tcp_module_t mca_ptl_tcp_module = { - { - &mca_ptl_tcp_component.super, - 16, /* max size of request cache */ - sizeof(mca_ptl_tcp_send_request_t) - sizeof(mca_ptl_base_send_request_t), /* bytes required by ptl for a request */ - 0, /* max size of first fragment */ - 0, /* min fragment size */ - 0, /* max fragment size */ - 0, /* exclusivity */ - 0, /* latency */ - 0, /* bandwidth */ - MCA_PTL_PUT, /* ptl flags */ - mca_ptl_tcp_add_procs, - mca_ptl_tcp_del_procs, - mca_ptl_tcp_finalize, - mca_ptl_tcp_send, - mca_ptl_tcp_send, - NULL, - mca_ptl_tcp_matched, - mca_ptl_tcp_request_init, - mca_ptl_tcp_request_fini, - NULL, - NULL, - NULL - } -}; - -/* - * For each peer process: - * (1) Lookup/create a parallel structure that represents the TCP state of the peer process. - * (2) Use the mca_pml_base_modex_recv function determine the endpoints exported by the peer. - * (3) Create a data structure to represent the state of the connection to the peer. - * (4) Select an address exported by the peer to use for this connection. - */ - -int mca_ptl_tcp_add_procs( - struct mca_ptl_base_module_t* ptl, - size_t nprocs, - struct ompi_proc_t **ompi_procs, - struct mca_ptl_base_peer_t** peers, - ompi_bitmap_t* reachable) -{ - size_t i; - mca_ptl_tcp_module_t *ptl_tcp = (mca_ptl_tcp_module_t*)ptl; - struct ompi_proc_t * proc_self = ompi_proc_local(); - - for(i=0; iproc_lock); - if(ptl_proc->proc_addr_count == ptl_proc->proc_peer_count) { - OPAL_THREAD_UNLOCK(&ptl_proc->proc_lock); - continue; - } - - /* The ptl_proc datastructure is shared by all TCP PTL instances that are trying - * to reach this destination. Cache the peer instance on the ptl_proc. - */ - ptl_peer = OBJ_NEW(mca_ptl_tcp_peer_t); - if(NULL == ptl_peer) { - OPAL_THREAD_UNLOCK(&ptl_proc->proc_lock); - return OMPI_ERR_OUT_OF_RESOURCE; - } - ptl_peer->peer_ptl = (mca_ptl_tcp_module_t*)ptl; - rc = mca_ptl_tcp_proc_insert(ptl_proc, ptl_peer); - if(rc != OMPI_SUCCESS) { - OBJ_RELEASE(ptl_peer); - OPAL_THREAD_UNLOCK(&ptl_proc->proc_lock); - continue; /* UNREACHABLE it's not a problem, others PTL can be used to send the data */ - } - /* do we need to convert to/from network byte order */ - if(ompi_proc->proc_arch != proc_self->proc_arch) - ptl_peer->peer_nbo = true; - - ompi_bitmap_set_bit(reachable, i); - OPAL_THREAD_UNLOCK(&ptl_proc->proc_lock); - peers[i] = ptl_peer; - opal_list_append(&ptl_tcp->ptl_peers, (opal_list_item_t*)ptl_peer); - /* we increase the count of MPI users of the event library - once per peer, so that we are used until we aren't - connected to a peer */ - opal_progress_event_increment(); - } - return OMPI_SUCCESS; -} - -/* - * Cleanup the peer datastructure(s) - and remove the cooresponding - * tcp process data structure(s). - */ - -int mca_ptl_tcp_del_procs(struct mca_ptl_base_module_t* ptl, size_t nprocs, struct ompi_proc_t **procs, struct mca_ptl_base_peer_t ** peers) -{ - size_t i; - mca_ptl_tcp_module_t *ptl_tcp = (mca_ptl_tcp_module_t*)ptl; - - for(i=0; iptl_peers, (opal_list_item_t*)peers[i]); - OBJ_RELEASE(peers[i]); - opal_progress_event_decrement(); - } - return OMPI_SUCCESS; -} - -/* - * Cleanup all peer data structures associated w/ the ptl. - */ - -int mca_ptl_tcp_finalize(struct mca_ptl_base_module_t* ptl) -{ - opal_list_item_t* item; - mca_ptl_tcp_module_t *ptl_tcp = (mca_ptl_tcp_module_t*)ptl; - for( item = opal_list_remove_first(&ptl_tcp->ptl_peers); - item != NULL; - item = opal_list_remove_first(&ptl_tcp->ptl_peers)) { - mca_ptl_tcp_peer_t *peer = (mca_ptl_tcp_peer_t*)item; - OBJ_RELEASE(peer); - opal_progress_event_decrement(); - } - free(ptl); - return OMPI_SUCCESS; -} - -/* - * Initialize a request for use by the ptl. Use the extra memory allocated - * along w/ the ptl to cache the first fragment control information. - */ - -int mca_ptl_tcp_request_init(struct mca_ptl_base_module_t* ptl, struct mca_ptl_base_send_request_t* request) -{ - OBJ_CONSTRUCT(request+1, mca_ptl_tcp_send_frag_t); - return OMPI_SUCCESS; -} - - -/* - * Cleanup any resources cached along w/ the request. - */ - -void mca_ptl_tcp_request_fini(struct mca_ptl_base_module_t* ptl, struct mca_ptl_base_send_request_t* request) -{ - OBJ_DESTRUCT(request+1); -} - - -void mca_ptl_tcp_recv_frag_return(struct mca_ptl_base_module_t* ptl, struct mca_ptl_tcp_recv_frag_t* frag) -{ - if(frag->frag_recv.frag_is_buffered) { - free(frag->frag_recv.frag_base.frag_addr); - frag->frag_recv.frag_is_buffered = false; - frag->frag_recv.frag_base.frag_addr = NULL; - } - OMPI_FREE_LIST_RETURN(&mca_ptl_tcp_component.tcp_recv_frags, (opal_list_item_t*)frag); -} - - -void mca_ptl_tcp_send_frag_return(struct mca_ptl_base_module_t* ptl, struct mca_ptl_tcp_send_frag_t* frag) -{ - if(opal_list_get_size(&mca_ptl_tcp_component.tcp_pending_acks)) { - mca_ptl_tcp_recv_frag_t* pending; - OPAL_THREAD_LOCK(&mca_ptl_tcp_component.tcp_lock); - pending = (mca_ptl_tcp_recv_frag_t*)opal_list_remove_first(&mca_ptl_tcp_component.tcp_pending_acks); - if(NULL == pending) { - OPAL_THREAD_UNLOCK(&mca_ptl_tcp_component.tcp_lock); - OMPI_FREE_LIST_RETURN(&mca_ptl_tcp_component.tcp_send_frags, (opal_list_item_t*)frag); - return; - } - OPAL_THREAD_UNLOCK(&mca_ptl_tcp_component.tcp_lock); - mca_ptl_tcp_send_frag_init_ack(frag, ptl, pending->frag_recv.frag_base.frag_peer, pending); - if(frag->frag_send.frag_base.frag_peer->peer_nbo) { - MCA_PTL_BASE_ACK_HDR_HTON(frag->frag_send.frag_base.frag_header.hdr_ack); - } - mca_ptl_tcp_peer_send(pending->frag_recv.frag_base.frag_peer, frag, 0); - mca_ptl_tcp_recv_frag_return(ptl, pending); - } else { - OMPI_FREE_LIST_RETURN(&mca_ptl_tcp_component.tcp_send_frags, (opal_list_item_t*)frag); - } -} - -/* - * Initiate a send. If this is the first fragment, use the fragment - * descriptor allocated with the send requests, otherwise obtain - * one from the free list. Initialize the fragment and foward - * on to the peer. - */ - -int mca_ptl_tcp_send( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_peer_t* ptl_peer, - struct mca_ptl_base_send_request_t* sendreq, - size_t offset, - size_t size, - int flags) -{ - mca_ptl_tcp_send_frag_t* sendfrag; - int rc; - if (offset == 0 && sendreq->req_cached) { - sendfrag = &((mca_ptl_tcp_send_request_t*)sendreq)->req_frag; - } else { - opal_list_item_t* item; - OMPI_FREE_LIST_GET(&mca_ptl_tcp_component.tcp_send_frags, item, rc); - if(NULL == (sendfrag = (mca_ptl_tcp_send_frag_t*)item)) - return rc; - } - rc = mca_ptl_tcp_send_frag_init(sendfrag, ptl_peer, sendreq, offset, &size, flags); - if(rc != OMPI_SUCCESS) - return rc; - /* must update the offset after actual fragment size is determined -- and very important -- - * before attempting to send the fragment - */ - mca_ptl_base_send_request_offset(sendreq, size); - return mca_ptl_tcp_peer_send(ptl_peer, sendfrag, offset); -} - - -/* - * A posted receive has been matched - if required send an - * ack back to the peer and process the fragment. - */ - -void mca_ptl_tcp_matched( - mca_ptl_base_module_t* ptl, - mca_ptl_base_recv_frag_t* frag) -{ - /* send ack back to peer? */ - mca_ptl_base_header_t* header = &frag->frag_base.frag_header; - if(header->hdr_common.hdr_flags & MCA_PTL_FLAGS_ACK) { - int rc; - mca_ptl_tcp_send_frag_t* ack; - mca_ptl_tcp_recv_frag_t* recv_frag = (mca_ptl_tcp_recv_frag_t*)frag; - opal_list_item_t* item; - MCA_PTL_TCP_SEND_FRAG_ALLOC(item, rc); - ack = (mca_ptl_tcp_send_frag_t*)item; - - if(NULL == ack) { - OPAL_THREAD_LOCK(&mca_ptl_tcp_component.tcp_lock); - recv_frag->frag_ack_pending = true; - opal_list_append(&mca_ptl_tcp_component.tcp_pending_acks, (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&mca_ptl_tcp_component.tcp_lock); - } else { - mca_ptl_tcp_send_frag_init_ack(ack, ptl, recv_frag->frag_recv.frag_base.frag_peer, recv_frag); - if(ack->frag_send.frag_base.frag_peer->peer_nbo) { - MCA_PTL_BASE_ACK_HDR_HTON(ack->frag_send.frag_base.frag_header.hdr_ack); - } - mca_ptl_tcp_peer_send(ack->frag_send.frag_base.frag_peer, ack, 0); - } - } - - /* process fragment if complete */ - mca_ptl_tcp_recv_frag_progress((mca_ptl_tcp_recv_frag_t*)frag); -} - - diff --git a/ompi/mca/ptl/tcp/ptl_tcp.h b/ompi/mca/ptl/tcp/ptl_tcp.h deleted file mode 100644 index 4098b747c1..0000000000 --- a/ompi/mca/ptl/tcp/ptl_tcp.h +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_TCP_H -#define MCA_PTL_TCP_H - -#include "ompi_config.h" -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_SYS_SOCKET_H -#include -#endif -#ifdef HAVE_NETINET_IN_H -#include -#endif -#include "ompi/class/ompi_free_list.h" -#include "opal/event/event.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" - -#define MCA_PTL_TCP_STATISTICS 0 -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/** - * TCP PTL module. - */ -struct mca_ptl_tcp_component_t { - mca_ptl_base_component_1_0_0_t super; /**< base PTL component */ - struct mca_ptl_tcp_module_t** tcp_ptl_modules; /**< array of available PTL moduless */ - size_t tcp_num_ptl_modules; /**< number of ptls actually used */ - size_t tcp_max_ptl_modules; /**< maximum number of ptls - available kernel ifs */ - int tcp_listen_sd; /**< listen socket for incoming connection requests */ - unsigned short tcp_listen_port; /**< listen port */ - char* tcp_if_include; /**< comma seperated list of interface to include */ - char* tcp_if_exclude; /**< comma seperated list of interface to exclude */ - int tcp_free_list_num; /**< initial size of free lists */ - int tcp_free_list_max; /**< maximum size of free lists */ - int tcp_free_list_inc; /**< number of elements to alloc when growing free lists */ - int tcp_sndbuf; /**< socket sndbuf size */ - int tcp_rcvbuf; /**< socket rcvbuf size */ - size_t tcp_frag_size; /**< buffer limit for the TCP PTL */ - ompi_free_list_t tcp_send_frags; /**< free list of tcp send fragments */ - ompi_free_list_t tcp_recv_frags; /**< free list of tcp recv fragments */ - opal_hash_table_t tcp_procs; /**< hash table of tcp proc structures */ - opal_list_t tcp_pending_acks; /**< list of pending acks - retry as sends complete */ - opal_list_t tcp_events; /**< list of pending events */ - struct mca_ptl_tcp_proc_t* tcp_local; /**< the tcp proc instance corresponding to the local process */ - opal_event_t tcp_send_event; /**< event structure for sends */ - opal_event_t tcp_recv_event; /**< event structure for recvs */ - opal_mutex_t tcp_lock; /**< lock for accessing module state */ -}; -typedef struct mca_ptl_tcp_component_t mca_ptl_tcp_component_t; -struct mca_ptl_tcp_recv_frag_t; -struct mca_ptl_tcp_send_frag_t; - -OMPI_COMP_EXPORT extern mca_ptl_tcp_component_t mca_ptl_tcp_component; - -/** - * Register TCP module parameters with the MCA framework - */ -extern int mca_ptl_tcp_component_open(void); - -/** - * Any final cleanup before being unloaded. - */ -extern int mca_ptl_tcp_component_close(void); - -/** - * TCP module initialization. - * - * @param num_ptls (OUT) Number of PTLs returned in PTL array. - * @param allow_multi_user_threads (OUT) Flag indicating wether PTL supports user threads (TRUE) - * @param have_hidden_threads (OUT) Flag indicating wether PTL uses threads (TRUE) - * - * (1) read interface list from kernel and compare against module parameters - * then create a PTL instance for selected interfaces - * (2) setup TCP listen socket for incoming connection attempts - * (3) publish PTL addressing info - * - */ -extern mca_ptl_base_module_t** mca_ptl_tcp_component_init( - int *num_ptls, - bool enable_progress_threads, - bool enable_mpi_threads -); - -/** - * TCP module control. - */ -extern int mca_ptl_tcp_component_control( - int param, - void* value, - size_t size -); - -/** - * TCP module progress. - */ -extern int mca_ptl_tcp_component_progress( - mca_ptl_tstamp_t tstamp -); - -/** - * TCP PTL Interface - */ -struct mca_ptl_tcp_module_t { - mca_ptl_base_module_t super; /**< base PTL module interface */ - int ptl_ifindex; /**< PTL interface index */ - struct sockaddr_in ptl_ifaddr; /**< PTL interface address */ - struct sockaddr_in ptl_ifmask; /**< PTL interface netmask */ - opal_list_t ptl_peers; /**< List of all peers for this PTL */ -#if MCA_PTL_TCP_STATISTICS - size_t ptl_bytes_sent; - size_t ptl_bytes_recv; - size_t ptl_send_handler; -#endif -}; -typedef struct mca_ptl_tcp_module_t mca_ptl_tcp_module_t; - -extern mca_ptl_tcp_module_t mca_ptl_tcp_module; - - -/** - * Cleanup any resources held by the PTL. - * - * @param ptl PTL instance. - * @return OMPI_SUCCESS or error status on failure. - */ - -extern int mca_ptl_tcp_finalize( - struct mca_ptl_base_module_t* ptl -); - - -/** - * PML->PTL notification of change in the process list. - * - * @param ptl (IN) - * @param nprocs (IN) Number of processes - * @param procs (IN) Set of processes - * @param peers (OUT) Set of (optional) peer addressing info. - * @param peers (IN/OUT) Set of processes that are reachable via this PTL. - * @return OMPI_SUCCESS or error status on failure. - * - */ - -extern int mca_ptl_tcp_add_procs( - struct mca_ptl_base_module_t* ptl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_ptl_base_peer_t** peers, - struct ompi_bitmap_t* reachable -); - - -/** - * PML->PTL notification of change in the process list. - * - * @param ptl (IN) PTL instance - * @param nproc (IN) Number of processes. - * @param procs (IN) Set of processes. - * @param peers (IN) Set of peer data structures. - * @return Status indicating if cleanup was successful - * - */ -extern int mca_ptl_tcp_del_procs( - struct mca_ptl_base_module_t* ptl, - size_t nprocs, - struct ompi_proc_t **procs, - struct mca_ptl_base_peer_t** peers -); - -/** - * PML->PTL Initialize a send request for TCP cache. - * - * @param ptl (IN) PTL instance - * @param request (IN) Pointer to allocated request. - * - */ -extern int mca_ptl_tcp_request_init( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_send_request_t* -); - -/** - * PML->PTL Cleanup a send request that is being removed from the cache. - * - * @param ptl (IN) PTL instance - * @param request (IN) Pointer to allocated request. - * - */ -extern void mca_ptl_tcp_request_fini( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_send_request_t* -); - -/** - * PML->PTL Notification that a receive fragment has been matched. - * - * @param ptl (IN) PTL instance - * @param recv_frag (IN) Receive fragment - * - */ -extern void mca_ptl_tcp_matched( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_recv_frag_t* frag -); - -/** - * PML->PTL Initiate a send of the specified size. - * - * @param ptl (IN) PTL instance - * @param ptl_base_peer (IN) PTL peer addressing - * @param send_request (IN/OUT) Send request (initialized by PML via mca_ptl_base_request_init_fn_t) - * @param size (IN) Number of bytes PML is requesting PTL to deliver - * @param flags (IN) Flags that should be passed to the peer via the message header. - * @param request (OUT) OMPI_SUCCESS if the PTL was able to queue one or more fragments - */ -extern int mca_ptl_tcp_send( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_peer_t* ptl_peer, - struct mca_ptl_base_send_request_t*, - size_t offset, - size_t size, - int flags -); - -/** - * Return a recv fragment to the modules free list. - * - * @param ptl (IN) PTL instance - * @param frag (IN) TCP receive fragment - * - */ -extern void mca_ptl_tcp_recv_frag_return( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_tcp_recv_frag_t* frag -); - - - -/** - * Return a send fragment to the modules free list. - * - * @param ptl (IN) PTL instance - * @param frag (IN) TCP send fragment - * - */ -extern void mca_ptl_tcp_send_frag_return( - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_tcp_send_frag_t* -); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif - diff --git a/ompi/mca/ptl/tcp/ptl_tcp_addr.h b/ompi/mca/ptl/tcp/ptl_tcp_addr.h deleted file mode 100644 index f03c97a927..0000000000 --- a/ompi/mca/ptl/tcp/ptl_tcp_addr.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_TCP_ADDR_H -#define MCA_PTL_TCP_ADDR_H - -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_SYS_SOCKET_H -#include -#endif -#ifdef HAVE_NETINET_IN_H -#include -#endif - - -/** - * Structure used to publish TCP connection information to peers. - */ -struct mca_ptl_tcp_addr_t { - struct in_addr addr_inet; /**< IPv4 address in network byte order */ - in_port_t addr_port; /**< listen port */ - unsigned short addr_inuse; /**< local meaning only */ -}; -typedef struct mca_ptl_tcp_addr_t mca_ptl_tcp_addr_t; - -#endif - diff --git a/ompi/mca/ptl/tcp/ptl_tcp_component.c b/ompi/mca/ptl/tcp/ptl_tcp_component.c deleted file mode 100644 index 9dbca255d0..0000000000 --- a/ompi/mca/ptl/tcp/ptl_tcp_component.c +++ /dev/null @@ -1,642 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#include "orte/orte_socket_errno.h" -#ifdef HAVE_UNISTD_H -#include -#endif -#include -#include -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_SYS_SOCKET_H -#include -#endif -#ifdef HAVE_NETINET_IN_H -#include -#endif -#ifdef HAVE_ARPA_INET_H -#include -#endif - -#include "ompi/constants.h" -#include "opal/event/event.h" -#include "opal/util/if.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#include "ompi/mca/pml/base/pml_base_sendreq.h" -#include "opal/mca/base/mca_base_param.h" -#include "ompi/mca/pml/base/pml_base_module_exchange.h" -#include "ptl_tcp.h" -#include "ptl_tcp_addr.h" -#include "ptl_tcp_proc.h" -#include "ptl_tcp_recvfrag.h" -#include "ptl_tcp_sendfrag.h" -#include "ptl_tcp_sendreq.h" - -#define IMPORTANT_WINDOWS_COMMENT() \ - /* In windows, many of the socket functions return an EWOULDBLOCK instead of \ - things like EAGAIN, EINPROGRESS, etc. It has been verified that this will \ - not conflict with other error codes that are returned by these functions \ - under UNIX/Linux environments */ - -/* - * Data structure for accepting connections. - */ - -struct mca_ptl_tcp_event_t { - opal_list_item_t item; - opal_event_t event; -}; -typedef struct mca_ptl_tcp_event_t mca_ptl_tcp_event_t; - -static void mca_ptl_tcp_event_construct(mca_ptl_tcp_event_t* event) -{ - OPAL_THREAD_LOCK(&mca_ptl_tcp_component.tcp_lock); - opal_list_append(&mca_ptl_tcp_component.tcp_events, &event->item); - OPAL_THREAD_UNLOCK(&mca_ptl_tcp_component.tcp_lock); -} - -static void mca_ptl_tcp_event_destruct(mca_ptl_tcp_event_t* event) -{ - OPAL_THREAD_LOCK(&mca_ptl_tcp_component.tcp_lock); - opal_list_remove_item(&mca_ptl_tcp_component.tcp_events, &event->item); - OPAL_THREAD_UNLOCK(&mca_ptl_tcp_component.tcp_lock); -} - -OBJ_CLASS_INSTANCE( - mca_ptl_tcp_event_t, - opal_list_item_t, - mca_ptl_tcp_event_construct, - mca_ptl_tcp_event_destruct); - - -/* - * The PTL TCP component - */ - -mca_ptl_tcp_component_t mca_ptl_tcp_component = { - { - /* First, the mca_base_module_t struct containing meta - information about the module itself */ - { - /* Indicate that we are a pml v1.0.0 module (which also - implies a specific MCA version) */ - - MCA_PTL_BASE_VERSION_1_0_0, - - "tcp", /* MCA module name */ - OMPI_MAJOR_VERSION, /* MCA module major version */ - OMPI_MINOR_VERSION, /* MCA module minor version */ - OMPI_RELEASE_VERSION, /* MCA module release version */ - mca_ptl_tcp_component_open, /* module open */ - mca_ptl_tcp_component_close /* module close */ - }, - - /* Next the MCA v1.0.0 module meta data */ - - { - /* Whether the module is checkpointable or not */ - - false - }, - - mca_ptl_tcp_component_init, - mca_ptl_tcp_component_control, - NULL /*mca_ptl_tcp_component_progress*/, - } -}; - -/* - * functions for receiving event callbacks - */ - -static void mca_ptl_tcp_component_recv_handler(int, short, void*); - - -/* - * utility routines for parameter registration - */ - -static inline char* mca_ptl_tcp_param_register_string( - const char* param_name, - const char* default_value) -{ - char *param_value; - int id = mca_base_param_register_string("ptl","tcp",param_name,NULL,default_value); - mca_base_param_lookup_string(id, ¶m_value); - return param_value; -} - -static inline int mca_ptl_tcp_param_register_int( - const char* param_name, - int default_value) -{ - int id = mca_base_param_register_int("ptl","tcp",param_name,NULL,default_value); - int param_value = default_value; - mca_base_param_lookup_int(id,¶m_value); - return param_value; -} - -/* - * Called by MCA framework to open the module, registers - * module parameters. - */ - -int mca_ptl_tcp_component_open(void) -{ -#ifdef __WINDOWS__ - WSADATA win_sock_data; - if (WSAStartup(MAKEWORD(2,2), &win_sock_data) != 0) { - opal_output (0, "mca_ptl_tcp_component_init: failed to initialise windows sockets:%d\n", WSAGetLastError()); - return OMPI_ERROR; - } -#endif - - /* initialize state */ - mca_ptl_tcp_component.tcp_listen_sd = -1; - mca_ptl_tcp_component.tcp_ptl_modules = NULL; - mca_ptl_tcp_component.tcp_num_ptl_modules = 0; - - /* initialize objects */ - OBJ_CONSTRUCT(&mca_ptl_tcp_component.tcp_lock, opal_mutex_t); - OBJ_CONSTRUCT(&mca_ptl_tcp_component.tcp_procs, opal_hash_table_t); - OBJ_CONSTRUCT(&mca_ptl_tcp_component.tcp_pending_acks, opal_list_t); - OBJ_CONSTRUCT(&mca_ptl_tcp_component.tcp_events, opal_list_t); - OBJ_CONSTRUCT(&mca_ptl_tcp_component.tcp_send_frags, ompi_free_list_t); - OBJ_CONSTRUCT(&mca_ptl_tcp_component.tcp_recv_frags, ompi_free_list_t); - opal_hash_table_init(&mca_ptl_tcp_component.tcp_procs, 256); - - /* register TCP module parameters */ - mca_ptl_tcp_component.tcp_if_include = - mca_ptl_tcp_param_register_string("if_include", ""); - mca_ptl_tcp_component.tcp_if_exclude = - mca_ptl_tcp_param_register_string("if_exclude", "lo"); - mca_ptl_tcp_component.tcp_free_list_num = - mca_ptl_tcp_param_register_int("free_list_num", 256); - mca_ptl_tcp_component.tcp_free_list_max = - mca_ptl_tcp_param_register_int("free_list_max", -1); - mca_ptl_tcp_component.tcp_free_list_inc = - mca_ptl_tcp_param_register_int("free_list_inc", 256); - mca_ptl_tcp_component.tcp_sndbuf = - mca_ptl_tcp_param_register_int("sndbuf", 128*1024); - mca_ptl_tcp_component.tcp_rcvbuf = - mca_ptl_tcp_param_register_int("rcvbuf", 128*1024); - mca_ptl_tcp_module.super.ptl_exclusivity = - mca_ptl_tcp_param_register_int("exclusivity", 0); - mca_ptl_tcp_module.super.ptl_first_frag_size = - mca_ptl_tcp_param_register_int("first_frag_size", 64*1024); - mca_ptl_tcp_module.super.ptl_min_frag_size = - mca_ptl_tcp_param_register_int("min_frag_size", 64*1024); - mca_ptl_tcp_module.super.ptl_max_frag_size = - mca_ptl_tcp_param_register_int("max_frag_size", -1); - /* the tcp allocator will never allocate buffers with more than this size */ - mca_ptl_tcp_component.tcp_frag_size = - mca_ptl_tcp_param_register_int("frag_size", 64*1024); - /* adapt the first fragment size to fit with the allowed fragment size */ - if( (mca_ptl_tcp_component.tcp_frag_size != 0) && - (mca_ptl_tcp_module.super.ptl_first_frag_size > mca_ptl_tcp_component.tcp_frag_size) ) { - mca_ptl_tcp_module.super.ptl_first_frag_size = mca_ptl_tcp_component.tcp_frag_size; - } - return OMPI_SUCCESS; -} - -/* - * module cleanup - sanity checking of queue lengths - */ - -int mca_ptl_tcp_component_close(void) -{ - opal_list_item_t* item; -#ifdef __WINDOWS__ - WSACleanup(); -#endif - - if(NULL != mca_ptl_tcp_component.tcp_if_include) - free(mca_ptl_tcp_component.tcp_if_include); - if(NULL != mca_ptl_tcp_component.tcp_if_exclude) - free(mca_ptl_tcp_component.tcp_if_exclude); - if (NULL != mca_ptl_tcp_component.tcp_ptl_modules) - free(mca_ptl_tcp_component.tcp_ptl_modules); - - if (mca_ptl_tcp_component.tcp_listen_sd >= 0) { - opal_event_del(&mca_ptl_tcp_component.tcp_recv_event); - close(mca_ptl_tcp_component.tcp_listen_sd); - mca_ptl_tcp_component.tcp_listen_sd = -1; - } - - /* cleanup any pending events */ - OPAL_THREAD_LOCK(&mca_ptl_tcp_component.tcp_lock); - for(item = opal_list_remove_first(&mca_ptl_tcp_component.tcp_events); - item != NULL; - item = opal_list_remove_first(&mca_ptl_tcp_component.tcp_events)) { - mca_ptl_tcp_event_t* event = (mca_ptl_tcp_event_t*)item; - opal_event_del(&event->event); - OBJ_RELEASE(event); - } - OPAL_THREAD_UNLOCK(&mca_ptl_tcp_component.tcp_lock); - - /* release resources */ - OBJ_DESTRUCT(&mca_ptl_tcp_component.tcp_procs); - OBJ_DESTRUCT(&mca_ptl_tcp_component.tcp_pending_acks); - OBJ_DESTRUCT(&mca_ptl_tcp_component.tcp_events); - OBJ_DESTRUCT(&mca_ptl_tcp_component.tcp_send_frags); - OBJ_DESTRUCT(&mca_ptl_tcp_component.tcp_recv_frags); - OBJ_DESTRUCT(&mca_ptl_tcp_component.tcp_lock); - return OMPI_SUCCESS; -} - - -/* - * Create a ptl instance and add to modules list. - */ - -static int mca_ptl_tcp_create(int if_index, const char* if_name) -{ - mca_ptl_tcp_module_t* ptl = (mca_ptl_tcp_module_t *)malloc(sizeof(mca_ptl_tcp_module_t)); - char param[256]; - if(NULL == ptl) - return OMPI_ERR_OUT_OF_RESOURCE; - memcpy(ptl, &mca_ptl_tcp_module, sizeof(mca_ptl_tcp_module)); - OBJ_CONSTRUCT(&ptl->ptl_peers, opal_list_t); - mca_ptl_tcp_component.tcp_ptl_modules[mca_ptl_tcp_component.tcp_num_ptl_modules++] = ptl; - - /* initialize the ptl */ - ptl->ptl_ifindex = if_index; -#if MCA_PTL_TCP_STATISTICS - ptl->ptl_bytes_recv = 0; - ptl->ptl_bytes_sent = 0; - ptl->ptl_send_handler = 0; -#endif - opal_ifindextoaddr(if_index, (struct sockaddr*)&ptl->ptl_ifaddr, sizeof(ptl->ptl_ifaddr)); - opal_ifindextomask(if_index, (struct sockaddr*)&ptl->ptl_ifmask, sizeof(ptl->ptl_ifmask)); - - /* allow user to specify interface bandwidth */ - sprintf(param, "bandwidth_%s", if_name); - ptl->super.ptl_bandwidth = mca_ptl_tcp_param_register_int(param, 0); - - /* allow user to override/specify latency ranking */ - sprintf(param, "latency_%s", if_name); - ptl->super.ptl_latency = mca_ptl_tcp_param_register_int(param, 0); - -#if OMPI_ENABLE_DEBUG && 0 - opal_output(0,"interface: %s bandwidth %d latency %d\n", - if_name, ptl->super.ptl_bandwidth, ptl->super.ptl_latency); -#endif - return OMPI_SUCCESS; -} - -/* - * Create a TCP PTL instance for either: - * (1) all interfaces specified by the user - * (2) all available interfaces - * (3) all available interfaces except for those excluded by the user - */ - -static int mca_ptl_tcp_component_create_instances(void) -{ - int if_count = opal_ifcount(); - int if_index; - char **include; - char **exclude; - char **argv; - - if(if_count <= 0) - return OMPI_ERROR; - - /* allocate memory for ptls */ - mca_ptl_tcp_component.tcp_max_ptl_modules = if_count; - mca_ptl_tcp_component.tcp_ptl_modules = (mca_ptl_tcp_module_t **)malloc(if_count * sizeof(mca_ptl_tcp_module_t*)); - if(NULL == mca_ptl_tcp_component.tcp_ptl_modules) - return OMPI_ERR_OUT_OF_RESOURCE; - - /* if the user specified an interface list - use these exclusively */ - argv = include = opal_argv_split(mca_ptl_tcp_component.tcp_if_include,','); - while(argv && *argv) { - char* if_name = *argv; - int if_index = opal_ifnametoindex(if_name); - if(if_index < 0) { - opal_output(0,"mca_ptl_tcp_component_init: invalid interface \"%s\"", if_name); - } else { - mca_ptl_tcp_create(if_index, if_name); - } - argv++; - } - opal_argv_free(include); - if(mca_ptl_tcp_component.tcp_num_ptl_modules) - return OMPI_SUCCESS; - - /* if the interface list was not specified by the user, create - * a PTL for each interface that was not excluded. - */ - exclude = opal_argv_split(mca_ptl_tcp_component.tcp_if_exclude,','); - for(if_index = opal_ifbegin(); if_index >= 0; if_index = opal_ifnext(if_index)) { - char if_name[32]; - opal_ifindextoname(if_index, if_name, sizeof(if_name)); - - /* check to see if this interface exists in the exclude list */ - if(opal_ifcount() > 1) { - argv = exclude; - while(argv && *argv) { - if(strncmp(*argv,if_name,strlen(*argv)) == 0) - break; - argv++; - } - /* if this interface was not found in the excluded list - create a PTL */ - if(argv == 0 || *argv == 0) { - mca_ptl_tcp_create(if_index, if_name); - } - } else { - mca_ptl_tcp_create(if_index, if_name); - } - } - opal_argv_free(exclude); - return OMPI_SUCCESS; -} - -/* - * Create a listen socket and bind to all interfaces - */ - -static int mca_ptl_tcp_component_create_listen(void) -{ - int flags; - struct sockaddr_in inaddr; - ompi_socklen_t addrlen; - - /* create a listen socket for incoming connections */ - mca_ptl_tcp_component.tcp_listen_sd = socket(AF_INET, SOCK_STREAM, 0); - if(mca_ptl_tcp_component.tcp_listen_sd < 0) { - opal_output(0,"mca_ptl_tcp_component_init: socket() failed with errno=%d", ompi_socket_errno); - return OMPI_ERROR; - } - mca_ptl_tcp_set_socket_options(mca_ptl_tcp_component.tcp_listen_sd); - - /* bind to all addresses and dynamically assigned port */ - memset(&inaddr, 0, sizeof(inaddr)); - inaddr.sin_family = AF_INET; - inaddr.sin_addr.s_addr = INADDR_ANY; - inaddr.sin_port = 0; - - if(bind(mca_ptl_tcp_component.tcp_listen_sd, (struct sockaddr*)&inaddr, sizeof(inaddr)) < 0) { - opal_output(0,"mca_ptl_tcp_component_init: bind() failed with errno=%d", ompi_socket_errno); - return OMPI_ERROR; - } - - /* resolve system assignend port */ - addrlen = sizeof(struct sockaddr_in); - if(getsockname(mca_ptl_tcp_component.tcp_listen_sd, (struct sockaddr*)&inaddr, &addrlen) < 0) { - opal_output(0, "mca_ptl_tcp_component_init: getsockname() failed with errno=%d", ompi_socket_errno); - return OMPI_ERROR; - } - mca_ptl_tcp_component.tcp_listen_port = inaddr.sin_port; - - /* setup listen backlog to maximum allowed by kernel */ - if(listen(mca_ptl_tcp_component.tcp_listen_sd, SOMAXCONN) < 0) { - opal_output(0, "mca_ptl_tcp_component_init: listen() failed with errno=%d", ompi_socket_errno); - return OMPI_ERROR; - } - - /* set socket up to be non-blocking, otherwise accept could block */ - if((flags = fcntl(mca_ptl_tcp_component.tcp_listen_sd, F_GETFL, 0)) < 0) { - opal_output(0, "mca_ptl_tcp_component_init: fcntl(F_GETFL) failed with errno=%d", ompi_socket_errno); - return OMPI_ERROR; - } else { - flags |= O_NONBLOCK; - if(fcntl(mca_ptl_tcp_component.tcp_listen_sd, F_SETFL, flags) < 0) { - opal_output(0, "mca_ptl_tcp_component_init: fcntl(F_SETFL) failed with errno=%d", ompi_socket_errno); - return OMPI_ERROR; - } - } - - /* register listen port */ - opal_event_set( - &mca_ptl_tcp_component.tcp_recv_event, - mca_ptl_tcp_component.tcp_listen_sd, - OPAL_EV_READ|OPAL_EV_PERSIST, - mca_ptl_tcp_component_recv_handler, - 0); - return OMPI_SUCCESS; -} - -/* - * Register TCP module addressing information. The MCA framework - * will make this available to all peers. - */ - -static int mca_ptl_tcp_component_exchange(void) -{ - int rc=0; - size_t i=0; - size_t size = mca_ptl_tcp_component.tcp_num_ptl_modules * sizeof(mca_ptl_tcp_addr_t); - if(mca_ptl_tcp_component.tcp_num_ptl_modules != 0) { - mca_ptl_tcp_addr_t *addrs = (mca_ptl_tcp_addr_t *)malloc(size); - for(i=0; iptl_ifaddr.sin_addr; - addrs[i].addr_port = mca_ptl_tcp_component.tcp_listen_port; - addrs[i].addr_inuse = 0; - } - rc = mca_pml_base_modex_send(&mca_ptl_tcp_component.super.ptlm_version, addrs, size); - free(addrs); - } - return rc; -} - -/* - * TCP module initialization: - * (1) read interface list from kernel and compare against module parameters - * then create a PTL instance for selected interfaces - * (2) setup TCP listen socket for incoming connection attempts - * (3) register PTL parameters with the MCA - */ -mca_ptl_base_module_t** mca_ptl_tcp_component_init(int *num_ptl_modules, - bool enable_progress_threads, - bool enable_mpi_threads) -{ - mca_ptl_base_module_t **ptls; - *num_ptl_modules = 0; - - ompi_free_list_init(&mca_ptl_tcp_component.tcp_send_frags, - sizeof(mca_ptl_tcp_send_frag_t), - OBJ_CLASS(mca_ptl_tcp_send_frag_t), - mca_ptl_tcp_component.tcp_free_list_num, - mca_ptl_tcp_component.tcp_free_list_max, - mca_ptl_tcp_component.tcp_free_list_inc, - NULL); /* use default allocator */ - - ompi_free_list_init(&mca_ptl_tcp_component.tcp_recv_frags, - sizeof(mca_ptl_tcp_recv_frag_t), - OBJ_CLASS(mca_ptl_tcp_recv_frag_t), - mca_ptl_tcp_component.tcp_free_list_num, - mca_ptl_tcp_component.tcp_free_list_max, - mca_ptl_tcp_component.tcp_free_list_inc, - NULL); /* use default allocator */ - - /* create a PTL TCP module for selected interfaces */ - if(mca_ptl_tcp_component_create_instances() != OMPI_SUCCESS) - return 0; - - /* create a TCP listen socket for incoming connection attempts */ - if(mca_ptl_tcp_component_create_listen() != OMPI_SUCCESS) - return 0; - - /* publish TCP parameters with the MCA framework */ - if(mca_ptl_tcp_component_exchange() != OMPI_SUCCESS) - return 0; - - ptls = (mca_ptl_base_module_t **)malloc(mca_ptl_tcp_component.tcp_num_ptl_modules * - sizeof(mca_ptl_base_module_t*)); - if(NULL == ptls) - return NULL; - - memcpy(ptls, mca_ptl_tcp_component.tcp_ptl_modules, mca_ptl_tcp_component.tcp_num_ptl_modules*sizeof(mca_ptl_tcp_module_t*)); - *num_ptl_modules = mca_ptl_tcp_component.tcp_num_ptl_modules; - - return ptls; -} - -/* - * TCP module control - */ - -int mca_ptl_tcp_component_control(int param, void* value, size_t size) -{ - switch(param) { - case MCA_PTL_ENABLE: - if(*(int*)value) { - opal_event_add(&mca_ptl_tcp_component.tcp_recv_event, 0); - if(opal_hash_table_get_size(&mca_ptl_tcp_component.tcp_procs) > 0) { - opal_progress_events(OPAL_EVLOOP_NONBLOCK); - } - } else { - opal_event_del(&mca_ptl_tcp_component.tcp_recv_event); - } - break; - default: - break; - } - return OMPI_SUCCESS; -} - - -/* - * TCP module progress. - */ - -int mca_ptl_tcp_component_progress(mca_ptl_tstamp_t tstamp) -{ - return OMPI_SUCCESS; -} - - -/* - * Called by mca_ptl_tcp_component_recv() when the TCP listen - * socket has pending connection requests. Accept incoming - * requests and queue for completion of the connection handshake. -*/ - - -static void mca_ptl_tcp_component_accept(void) -{ - while(true) { - ompi_socklen_t addrlen = sizeof(struct sockaddr_in); - struct sockaddr_in addr; - mca_ptl_tcp_event_t *event; - int sd = accept(mca_ptl_tcp_component.tcp_listen_sd, (struct sockaddr*)&addr, &addrlen); - if(sd < 0) { - IMPORTANT_WINDOWS_COMMENT(); - if(ompi_socket_errno == EINTR) - continue; - if(ompi_socket_errno != EAGAIN || ompi_socket_errno != EWOULDBLOCK) - opal_output(0, "mca_ptl_tcp_component_accept: accept() failed with errno %d.", ompi_socket_errno); - return; - } - mca_ptl_tcp_set_socket_options(sd); - - /* wait for receipt of peers process identifier to complete this connection */ - - event = OBJ_NEW(mca_ptl_tcp_event_t); - opal_event_set(&event->event, sd, OPAL_EV_READ, mca_ptl_tcp_component_recv_handler, event); - opal_event_add(&event->event, 0); - } -} - - -/* - * Event callback when there is data available on the registered - * socket to recv. - */ -static void mca_ptl_tcp_component_recv_handler(int sd, short flags, void* user) -{ - orte_process_name_t guid; - struct sockaddr_in addr; - int retval; - mca_ptl_tcp_proc_t* ptl_proc; - ompi_socklen_t addr_len = sizeof(addr); - mca_ptl_tcp_event_t *event = (mca_ptl_tcp_event_t *)user; - - /* accept new connections on the listen socket */ - if(mca_ptl_tcp_component.tcp_listen_sd == sd) { - mca_ptl_tcp_component_accept(); - return; - } - OBJ_RELEASE(event); - - /* recv the process identifier */ - retval = recv(sd, (char *)&guid, sizeof(guid), 0); - if(retval != sizeof(guid)) { - close(sd); - return; - } - - /* now set socket up to be non-blocking */ - if((flags = fcntl(sd, F_GETFL, 0)) < 0) { - opal_output(0, "mca_ptl_tcp_component_recv_handler: fcntl(F_GETFL) failed with errno=%d", ompi_socket_errno); - } else { - flags |= O_NONBLOCK; - if(fcntl(sd, F_SETFL, flags) < 0) { - opal_output(0, "mca_ptl_tcp_component_recv_handler: fcntl(F_SETFL) failed with errno=%d", ompi_socket_errno); - } - } - - /* lookup the corresponding process */ - ptl_proc = mca_ptl_tcp_proc_lookup(&guid); - if(NULL == ptl_proc) { - opal_output(0, "mca_ptl_tcp_component_recv_handler: unable to locate process"); - close(sd); - return; - } - - /* lookup peer address */ - if(getpeername(sd, (struct sockaddr*)&addr, &addr_len) != 0) { - opal_output(0, "mca_ptl_tcp_component_recv_handler: getpeername() failed with errno=%d", ompi_socket_errno); - close(sd); - return; - } - - /* are there any existing peer instances will to accept this connection */ - if(mca_ptl_tcp_proc_accept(ptl_proc, &addr, sd) == false) { - close(sd); - return; - } -} - diff --git a/ompi/mca/ptl/tcp/ptl_tcp_peer.c b/ompi/mca/ptl/tcp/ptl_tcp_peer.c deleted file mode 100644 index 69e364f95f..0000000000 --- a/ompi/mca/ptl/tcp/ptl_tcp_peer.c +++ /dev/null @@ -1,677 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#include "orte/orte_socket_errno.h" -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_FCNTL_H -#include -#endif -#ifdef HAVE_NETINET_IN_H -#include -#endif -#ifdef HAVE_NETINET_TCP_H -#include -#endif -#ifdef HAVE_ARPA_INET_H -#include -#endif - -#include "ompi/types.h" -#include "ompi/mca/pml/base/pml_base_sendreq.h" -#include "orte/mca/ns/ns.h" -#include "opal/util/output.h" -#include "ptl_tcp.h" -#include "ptl_tcp_addr.h" -#include "ptl_tcp_peer.h" -#include "ptl_tcp_proc.h" -#include "ptl_tcp_sendfrag.h" - -#define IMPORTANT_WINDOWS_COMMENT() \ - /* In windows, many of the socket functions return an EWOULDBLOCK instead of \ - things like EAGAIN, EINPROGRESS, etc. It has been verified that this will \ - not conflict with other error codes that are returned by these functions \ - under UNIX/Linux environments */ - -static void mca_ptl_tcp_peer_construct(mca_ptl_base_peer_t* ptl_peer); -static void mca_ptl_tcp_peer_destruct(mca_ptl_base_peer_t* ptl_peer); -static int mca_ptl_tcp_peer_start_connect(mca_ptl_base_peer_t*); -static void mca_ptl_tcp_peer_connected(mca_ptl_base_peer_t*); -static void mca_ptl_tcp_peer_recv_handler(int sd, short flags, void* user); -static void mca_ptl_tcp_peer_send_handler(int sd, short flags, void* user); - -/* - * Diagnostics: change this to "1" to enable the function - * mca_ptl_tcp_peer_dump(), below - */ -#define WANT_PEER_DUMP 0 - - -opal_class_t mca_ptl_tcp_peer_t_class = { - "mca_tcp_ptl_peer_t", - OBJ_CLASS(opal_list_item_t), - (opal_construct_t)mca_ptl_tcp_peer_construct, - (opal_destruct_t)mca_ptl_tcp_peer_destruct -}; - -/* - * Initialize state of the peer instance. - */ - -static void mca_ptl_tcp_peer_construct(mca_ptl_base_peer_t* ptl_peer) -{ - ptl_peer->peer_ptl = 0; - ptl_peer->peer_proc = 0; - ptl_peer->peer_addr = 0; - ptl_peer->peer_sd = -1; - ptl_peer->peer_send_frag = 0; - ptl_peer->peer_recv_frag = 0; - ptl_peer->peer_send_event.ev_flags = 0; - ptl_peer->peer_recv_event.ev_flags = 0; - ptl_peer->peer_state = MCA_PTL_TCP_CLOSED; - ptl_peer->peer_retries = 0; - ptl_peer->peer_nbo = false; - OBJ_CONSTRUCT(&ptl_peer->peer_frags, opal_list_t); - OBJ_CONSTRUCT(&ptl_peer->peer_send_lock, opal_mutex_t); - OBJ_CONSTRUCT(&ptl_peer->peer_recv_lock, opal_mutex_t); -} - -/* - * Cleanup any resources held by the peer. - */ - -static void mca_ptl_tcp_peer_destruct(mca_ptl_base_peer_t* ptl_peer) -{ - mca_ptl_tcp_proc_remove(ptl_peer->peer_proc, ptl_peer); - mca_ptl_tcp_peer_close(ptl_peer); - OBJ_DESTRUCT(&ptl_peer->peer_frags); - OBJ_DESTRUCT(&ptl_peer->peer_send_lock); - OBJ_DESTRUCT(&ptl_peer->peer_recv_lock); -} - -/* - * diagnostics - */ - -#if WANT_PEER_DUMP -static void mca_ptl_tcp_peer_dump(mca_ptl_base_peer_t* ptl_peer, const char* msg) -{ - char src[64]; - char dst[64]; - char buff[255]; - int sndbuf,rcvbuf,nodelay,flags; - struct sockaddr_in inaddr; - ompi_socklen_t optlen; - ompi_socklen_t addrlen = sizeof(struct sockaddr_in); - - getsockname(ptl_peer->peer_sd, (struct sockaddr*)&inaddr, &addrlen); - sprintf(src, "%s", inet_ntoa(inaddr.sin_addr)); - getpeername(ptl_peer->peer_sd, (struct sockaddr*)&inaddr, &addrlen); - sprintf(dst, "%s", inet_ntoa(inaddr.sin_addr)); - - if((flags = fcntl(ptl_peer->peer_sd, F_GETFL, 0)) < 0) { - opal_output(0, "mca_ptl_tcp_peer_connect: fcntl(F_GETFL) failed with errno=%d\n", ompi_socket_errno); - } - -#if defined(SO_SNDBUF) - optlen = sizeof(sndbuf); - if(getsockopt(ptl_peer->peer_sd, SOL_SOCKET, SO_SNDBUF, (char *)&sndbuf, &optlen) < 0) { - opal_output(0, "mca_ptl_tcp_peer_dump: SO_SNDBUF option: errno %d\n", ompi_socket_errno); - } -#else - sndbuf = -1; -#endif -#if defined(SO_RCVBUF) - optlen = sizeof(rcvbuf); - if(getsockopt(ptl_peer->peer_sd, SOL_SOCKET, SO_RCVBUF, (char *)&rcvbuf, &optlen) < 0) { - opal_output(0, "mca_ptl_tcp_peer_dump: SO_RCVBUF option: errno %d\n", ompi_socket_errno); - } -#else - rcvbuf = -1; -#endif -#if defined(TCP_NODELAY) - optlen = sizeof(nodelay); - if(getsockopt(ptl_peer->peer_sd, IPPROTO_TCP, TCP_NODELAY, (char *)&nodelay, &optlen) < 0) { - opal_output(0, "mca_ptl_tcp_peer_dump: TCP_NODELAY option: errno %d\n", ompi_socket_errno); - } -#else - nodelay = 0; -#endif - - sprintf(buff, "%s: %s - %s nodelay %d sndbuf %d rcvbuf %d flags %08x\n", - msg, src, dst, nodelay, sndbuf, rcvbuf, flags); - opal_output(0, buff); -} -#endif - -/* - * Initialize events to be used by the peer instance for TCP select/poll callbacks. - */ - -static inline void mca_ptl_tcp_peer_event_init(mca_ptl_base_peer_t* ptl_peer, int sd) -{ - opal_event_set( - &ptl_peer->peer_recv_event, - ptl_peer->peer_sd, - OPAL_EV_READ|OPAL_EV_PERSIST, - mca_ptl_tcp_peer_recv_handler, - ptl_peer); - opal_event_set( - &ptl_peer->peer_send_event, - ptl_peer->peer_sd, - OPAL_EV_WRITE|OPAL_EV_PERSIST, - mca_ptl_tcp_peer_send_handler, - ptl_peer); -} - - -/* - * Attempt to send a fragment using a given peer. If the peer is not connected, - * queue the fragment and start the connection as required. - */ - -int mca_ptl_tcp_peer_send(mca_ptl_base_peer_t* ptl_peer, mca_ptl_tcp_send_frag_t* frag, int offset) -{ - int rc = OMPI_SUCCESS; - OPAL_THREAD_LOCK(&ptl_peer->peer_send_lock); - switch(ptl_peer->peer_state) { - case MCA_PTL_TCP_CONNECTING: - case MCA_PTL_TCP_CONNECT_ACK: - case MCA_PTL_TCP_CLOSED: - opal_list_append(&ptl_peer->peer_frags, (opal_list_item_t*)frag); - if(ptl_peer->peer_state == MCA_PTL_TCP_CLOSED) - rc = mca_ptl_tcp_peer_start_connect(ptl_peer); - break; - case MCA_PTL_TCP_FAILED: - rc = OMPI_ERR_UNREACH; - break; - case MCA_PTL_TCP_CONNECTED: - if (NULL != ptl_peer->peer_send_frag) { - opal_list_append(&ptl_peer->peer_frags, (opal_list_item_t*)frag); - } else if (offset == 0) { - if(mca_ptl_tcp_send_frag_handler(frag, ptl_peer->peer_sd)) { - OPAL_THREAD_UNLOCK(&ptl_peer->peer_send_lock); - mca_ptl_tcp_send_frag_progress(frag); - return rc; - } else { - ptl_peer->peer_send_frag = frag; - opal_event_add(&ptl_peer->peer_send_event, 0); - } - } else { - /* after the first fragment - delay sending subsequent fragments to - * enable better overlap by the scheduler - */ - ptl_peer->peer_send_frag = frag; - opal_event_add(&ptl_peer->peer_send_event, 0); - } - break; - case MCA_PTL_TCP_SHUTDOWN: - rc = OMPI_ERROR; - break; - } - OPAL_THREAD_UNLOCK(&ptl_peer->peer_send_lock); - return rc; -} - - -/* - * A blocking send on a non-blocking socket. Used to send the small amount of connection - * information that identifies the peers endpoint. - */ -static int mca_ptl_tcp_peer_send_blocking(mca_ptl_base_peer_t* ptl_peer, void* data, size_t size) -{ - unsigned char* ptr = (unsigned char*)data; - size_t cnt = 0; - while(cnt < size) { - int retval = send(ptl_peer->peer_sd, (const char *)ptr+cnt, size-cnt, 0); - if(retval < 0) { - IMPORTANT_WINDOWS_COMMENT(); - if(ompi_socket_errno != EINTR && ompi_socket_errno != EAGAIN && ompi_socket_errno != EWOULDBLOCK) { - opal_output(0, "mca_ptl_tcp_peer_send_blocking: send() failed with errno=%d\n",ompi_socket_errno); - mca_ptl_tcp_peer_close(ptl_peer); - return -1; - } - continue; - } - cnt += retval; - } - return cnt; -} - - -/* - * Send the globally unique identifier for this process to a peer on - * a newly connected socket. - */ - -static int mca_ptl_tcp_peer_send_connect_ack(mca_ptl_base_peer_t* ptl_peer) -{ - /* send process identifier to remote peer */ - mca_ptl_tcp_proc_t* ptl_proc = mca_ptl_tcp_proc_local(); - if(mca_ptl_tcp_peer_send_blocking(ptl_peer, &ptl_proc->proc_name, sizeof(ptl_proc->proc_name)) != - sizeof(ptl_proc->proc_name)) { - return OMPI_ERR_UNREACH; - } - return OMPI_SUCCESS; -} - -/* - * Check the state of this peer. If the incoming connection request matches - * our peers address, check the state of our connection: - * (1) if a connection has not been attempted, accept the connection - * (2) if a connection has not been established, and the peers process identifier - * is less than the local process, accept the connection - * otherwise, reject the connection and continue with the current connection - */ - -bool mca_ptl_tcp_peer_accept(mca_ptl_base_peer_t* ptl_peer, struct sockaddr_in* addr, int sd) -{ - mca_ptl_tcp_addr_t* ptl_addr; - mca_ptl_tcp_proc_t* this_proc = mca_ptl_tcp_proc_local(); - orte_ns_cmp_bitmask_t mask = ORTE_NS_CMP_ALL; - int cmpval; - - OPAL_THREAD_LOCK(&ptl_peer->peer_recv_lock); - OPAL_THREAD_LOCK(&ptl_peer->peer_send_lock); - if((ptl_addr = ptl_peer->peer_addr) != NULL && - ptl_addr->addr_inet.s_addr == addr->sin_addr.s_addr) { - mca_ptl_tcp_proc_t *peer_proc = ptl_peer->peer_proc; - cmpval = orte_ns.compare(mask, - &peer_proc->proc_ompi->proc_name, - &this_proc->proc_ompi->proc_name); - if((ptl_peer->peer_sd < 0) || - (ptl_peer->peer_state != MCA_PTL_TCP_CONNECTED && - cmpval < 0)) { - mca_ptl_tcp_peer_close(ptl_peer); - ptl_peer->peer_sd = sd; - if(mca_ptl_tcp_peer_send_connect_ack(ptl_peer) != OMPI_SUCCESS) { - mca_ptl_tcp_peer_close(ptl_peer); - OPAL_THREAD_UNLOCK(&ptl_peer->peer_send_lock); - OPAL_THREAD_UNLOCK(&ptl_peer->peer_recv_lock); - return false; - } - mca_ptl_tcp_peer_event_init(ptl_peer, sd); - opal_event_add(&ptl_peer->peer_recv_event, 0); - mca_ptl_tcp_peer_connected(ptl_peer); -#if OMPI_ENABLE_DEBUG && WANT_PEER_DUMP - mca_ptl_tcp_peer_dump(ptl_peer, "accepted"); -#endif - OPAL_THREAD_UNLOCK(&ptl_peer->peer_send_lock); - OPAL_THREAD_UNLOCK(&ptl_peer->peer_recv_lock); - return true; - } - } - OPAL_THREAD_UNLOCK(&ptl_peer->peer_send_lock); - OPAL_THREAD_UNLOCK(&ptl_peer->peer_recv_lock); - return false; -} - - -/* - * Remove any event registrations associated with the socket - * and update the peer state to reflect the connection has - * been closed. - */ - -void mca_ptl_tcp_peer_close(mca_ptl_base_peer_t* ptl_peer) -{ - if(ptl_peer->peer_sd >= 0) { - opal_event_del(&ptl_peer->peer_recv_event); - opal_event_del(&ptl_peer->peer_send_event); - close(ptl_peer->peer_sd); - ptl_peer->peer_sd = -1; - } - ptl_peer->peer_state = MCA_PTL_TCP_CLOSED; - ptl_peer->peer_retries++; -} - -void mca_ptl_tcp_peer_shutdown(mca_ptl_base_peer_t* ptl_peer) -{ - OPAL_THREAD_LOCK(&ptl_peer->peer_recv_lock); - OPAL_THREAD_LOCK(&ptl_peer->peer_send_lock); - mca_ptl_tcp_peer_close(ptl_peer); - ptl_peer->peer_state = MCA_PTL_TCP_SHUTDOWN; - OPAL_THREAD_UNLOCK(&ptl_peer->peer_send_lock); - OPAL_THREAD_UNLOCK(&ptl_peer->peer_recv_lock); -} - - -/* - * Setup peer state to reflect that connection has been established, - * and start any pending sends. - */ - -static void mca_ptl_tcp_peer_connected(mca_ptl_base_peer_t* ptl_peer) -{ - /* setup socket options */ - ptl_peer->peer_state = MCA_PTL_TCP_CONNECTED; - ptl_peer->peer_retries = 0; - if(opal_list_get_size(&ptl_peer->peer_frags) > 0) { - if(NULL == ptl_peer->peer_send_frag) - ptl_peer->peer_send_frag = (mca_ptl_tcp_send_frag_t*) - opal_list_remove_first(&ptl_peer->peer_frags); - opal_event_add(&ptl_peer->peer_send_event, 0); - } -} - - -/* - * A blocking recv on a non-blocking socket. Used to receive the small amount of connection - * information that identifies the peers endpoint. - */ -static int mca_ptl_tcp_peer_recv_blocking(mca_ptl_base_peer_t* ptl_peer, void* data, size_t size) -{ - unsigned char* ptr = (unsigned char*)data; - size_t cnt = 0; - while(cnt < size) { - int retval = recv(ptl_peer->peer_sd, (char *)ptr+cnt, size-cnt, 0); - - /* remote closed connection */ - if(retval == 0) { - mca_ptl_tcp_peer_close(ptl_peer); - return -1; - } - - /* socket is non-blocking so handle errors */ - if(retval < 0) { - IMPORTANT_WINDOWS_COMMENT(); - if(ompi_socket_errno != EINTR && ompi_socket_errno != EAGAIN && ompi_socket_errno != EWOULDBLOCK) { - opal_output(0, "mca_ptl_tcp_peer_recv_blocking: recv() failed with errno=%d\n",ompi_socket_errno); - mca_ptl_tcp_peer_close(ptl_peer); - return -1; - } - continue; - } - cnt += retval; - } - if((int)cnt == -1) - opal_output(0, "mca_ptl_tcp_peer_recv_blocking: invalid cnt\n"); - return cnt; -} - - - -/* - * Receive the peers globally unique process identification from a newly - * connected socket and verify the expected response. If so, move the - * socket to a connected state. - */ - -static int mca_ptl_tcp_peer_recv_connect_ack(mca_ptl_base_peer_t* ptl_peer) -{ - orte_process_name_t guid; - mca_ptl_tcp_proc_t* ptl_proc = ptl_peer->peer_proc; - - if((mca_ptl_tcp_peer_recv_blocking(ptl_peer, &guid, sizeof(orte_process_name_t))) != sizeof(orte_process_name_t)) { - return OMPI_ERR_UNREACH; - } - - /* compare this to the expected values */ - if(memcmp(&ptl_proc->proc_name, &guid, sizeof(orte_process_name_t)) != 0) { - opal_output(0, "mca_ptl_tcp_peer_connect: received unexpected process identifier"); - mca_ptl_tcp_peer_close(ptl_peer); - return OMPI_ERR_UNREACH; - } - - /* connected */ - mca_ptl_tcp_peer_connected(ptl_peer); -#if OMPI_ENABLE_DEBUG && WANT_PEER_DUMP - mca_ptl_tcp_peer_dump(ptl_peer, "connected"); -#endif - return OMPI_SUCCESS; -} - - -void mca_ptl_tcp_set_socket_options(int sd) -{ - int optval; -#if defined(TCP_NODELAY) - optval = 1; - if(setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *)&optval, sizeof(optval)) < 0) { - opal_output(0, - "mca_ptl_tcp_set_socket_options: setsockopt(TCP_NODELAY) failed with errno=%d\n", - ompi_socket_errno); - } -#endif -#if defined(SO_SNDBUF) - if(mca_ptl_tcp_component.tcp_sndbuf > 0 && - setsockopt(sd, SOL_SOCKET, SO_SNDBUF, (char *)&mca_ptl_tcp_component.tcp_sndbuf, sizeof(int)) < 0) { - opal_output(0, - "mca_ptl_tcp_set_socket_options: SO_SNDBUF option: errno %d\n", - ompi_socket_errno); - } -#endif -#if defined(SO_RCVBUF) - if(mca_ptl_tcp_component.tcp_rcvbuf > 0 && - setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (char *)&mca_ptl_tcp_component.tcp_rcvbuf, sizeof(int)) < 0) { - opal_output(0, - "mca_ptl_tcp_set_socket_options: SO_RCVBUF option: errno %d\n", - ompi_socket_errno); - } -#endif -} - - - -/* - * Start a connection to the peer. This will likely not complete, - * as the socket is set to non-blocking, so register for event - * notification of connect completion. On connection we send - * our globally unique process identifier to the peer and wait for - * the peers response. - */ - -static int mca_ptl_tcp_peer_start_connect(mca_ptl_base_peer_t* ptl_peer) -{ - int rc,flags; - struct sockaddr_in peer_addr; - - ptl_peer->peer_sd = socket(AF_INET, SOCK_STREAM, 0); - if (ptl_peer->peer_sd < 0) { - ptl_peer->peer_retries++; - return OMPI_ERR_UNREACH; - } - - /* setup socket buffer sizes */ - mca_ptl_tcp_set_socket_options(ptl_peer->peer_sd); - - /* setup event callbacks */ - mca_ptl_tcp_peer_event_init(ptl_peer, ptl_peer->peer_sd); - - /* setup the socket as non-blocking */ - if((flags = fcntl(ptl_peer->peer_sd, F_GETFL, 0)) < 0) { - opal_output(0, "mca_ptl_tcp_peer_connect: fcntl(F_GETFL) failed with errno=%d\n", ompi_socket_errno); - } else { - flags |= O_NONBLOCK; - if(fcntl(ptl_peer->peer_sd, F_SETFL, flags) < 0) - opal_output(0, "mca_ptl_tcp_peer_connect: fcntl(F_SETFL) failed with errno=%d\n", ompi_socket_errno); - } - - /* start the connect - will likely fail with EINPROGRESS */ - peer_addr.sin_family = AF_INET; - peer_addr.sin_addr = ptl_peer->peer_addr->addr_inet; - peer_addr.sin_port = ptl_peer->peer_addr->addr_port; - if(connect(ptl_peer->peer_sd, (struct sockaddr*)&peer_addr, sizeof(peer_addr)) < 0) { - /* non-blocking so wait for completion */ - IMPORTANT_WINDOWS_COMMENT(); - if(ompi_socket_errno == EINPROGRESS || ompi_socket_errno == EWOULDBLOCK) { - ptl_peer->peer_state = MCA_PTL_TCP_CONNECTING; - opal_event_add(&ptl_peer->peer_send_event, 0); - return OMPI_SUCCESS; - } - mca_ptl_tcp_peer_close(ptl_peer); - ptl_peer->peer_retries++; - return OMPI_ERR_UNREACH; - } - - /* send our globally unique process identifier to the peer */ - if((rc = mca_ptl_tcp_peer_send_connect_ack(ptl_peer)) == OMPI_SUCCESS) { - ptl_peer->peer_state = MCA_PTL_TCP_CONNECT_ACK; - opal_event_add(&ptl_peer->peer_recv_event, 0); - } else { - mca_ptl_tcp_peer_close(ptl_peer); - } - return rc; -} - - -/* - * Check the status of the connection. If the connection failed, will retry - * later. Otherwise, send this processes identifier to the peer on the - * newly connected socket. - */ - -static void mca_ptl_tcp_peer_complete_connect(mca_ptl_base_peer_t* ptl_peer) -{ - int so_error = 0; - ompi_socklen_t so_length = sizeof(so_error); - - /* unregister from receiving event notifications */ - opal_event_del(&ptl_peer->peer_send_event); - - /* check connect completion status */ - if(getsockopt(ptl_peer->peer_sd, SOL_SOCKET, SO_ERROR, (char *)&so_error, &so_length) < 0) { - opal_output(0, "mca_ptl_tcp_peer_complete_connect: getsockopt() failed with errno=%d\n", ompi_socket_errno); - mca_ptl_tcp_peer_close(ptl_peer); - return; - } - IMPORTANT_WINDOWS_COMMENT(); - if(so_error == EINPROGRESS || so_error == EWOULDBLOCK) { - opal_event_add(&ptl_peer->peer_send_event, 0); - return; - } - if(so_error != 0) { - opal_output(0, "mca_ptl_tcp_peer_complete_connect: connect() failed with errno=%d\n", so_error); - mca_ptl_tcp_peer_close(ptl_peer); - return; - } - - if(mca_ptl_tcp_peer_send_connect_ack(ptl_peer) == OMPI_SUCCESS) { - ptl_peer->peer_state = MCA_PTL_TCP_CONNECT_ACK; - opal_event_add(&ptl_peer->peer_recv_event, 0); - } else { - mca_ptl_tcp_peer_close(ptl_peer); - } -} - - -/* - * A file descriptor is available/ready for recv. Check the state - * of the socket and take the appropriate action. - */ - -static void mca_ptl_tcp_peer_recv_handler(int sd, short flags, void* user) -{ - mca_ptl_base_peer_t* ptl_peer = (mca_ptl_base_peer_t *)user; - OPAL_THREAD_LOCK(&ptl_peer->peer_recv_lock); - switch(ptl_peer->peer_state) { - case MCA_PTL_TCP_CONNECT_ACK: - { - mca_ptl_tcp_peer_recv_connect_ack(ptl_peer); - break; - } - case MCA_PTL_TCP_CONNECTED: - { - mca_ptl_tcp_recv_frag_t* recv_frag = ptl_peer->peer_recv_frag; - if(NULL == recv_frag) { - int rc; - MCA_PTL_TCP_RECV_FRAG_ALLOC(recv_frag, rc); - if(NULL == recv_frag) { - OPAL_THREAD_UNLOCK(&ptl_peer->peer_recv_lock); - return; - } - mca_ptl_tcp_recv_frag_init(recv_frag, ptl_peer); - } - - /* check for completion of non-blocking recv on the current fragment */ - if(mca_ptl_tcp_recv_frag_handler(recv_frag, sd) == false) - ptl_peer->peer_recv_frag = recv_frag; - else - ptl_peer->peer_recv_frag = 0; - break; - } - case MCA_PTL_TCP_SHUTDOWN: - { - break; - } - default: - { - opal_output(0, "mca_ptl_tcp_peer_recv_handler: invalid socket state(%d)", ptl_peer->peer_state); - mca_ptl_tcp_peer_close(ptl_peer); - break; - } - } - OPAL_THREAD_UNLOCK(&ptl_peer->peer_recv_lock); -} - - -/* - * A file descriptor is available/ready for send. Check the state - * of the socket and take the appropriate action. - */ - -static void mca_ptl_tcp_peer_send_handler(int sd, short flags, void* user) -{ - mca_ptl_tcp_peer_t* ptl_peer = (mca_ptl_tcp_peer_t *)user; - OPAL_THREAD_LOCK(&ptl_peer->peer_send_lock); - switch(ptl_peer->peer_state) { - case MCA_PTL_TCP_CONNECTING: - mca_ptl_tcp_peer_complete_connect(ptl_peer); - break; - case MCA_PTL_TCP_CONNECTED: - { - /* complete the current send */ - do { - mca_ptl_tcp_send_frag_t* frag = ptl_peer->peer_send_frag; - if(mca_ptl_tcp_send_frag_handler(frag, ptl_peer->peer_sd) == false) { - break; - } - - /* if required - update request status and release fragment */ - OPAL_THREAD_UNLOCK(&ptl_peer->peer_send_lock); - mca_ptl_tcp_send_frag_progress(frag); - OPAL_THREAD_LOCK(&ptl_peer->peer_send_lock); - - /* progress any pending sends */ - ptl_peer->peer_send_frag = (mca_ptl_tcp_send_frag_t*) - opal_list_remove_first(&ptl_peer->peer_frags); - } while (NULL != ptl_peer->peer_send_frag); - - /* if nothing else to do unregister for send event notifications */ - if(NULL == ptl_peer->peer_send_frag) { - opal_event_del(&ptl_peer->peer_send_event); - } - break; - } - default: - opal_output(0, "mca_ptl_tcp_peer_send_handler: invalid connection state (%d)", - ptl_peer->peer_state); - opal_event_del(&ptl_peer->peer_send_event); - break; - } - OPAL_THREAD_UNLOCK(&ptl_peer->peer_send_lock); -} - - - diff --git a/ompi/mca/ptl/tcp/ptl_tcp_peer.h b/ompi/mca/ptl/tcp/ptl_tcp_peer.h deleted file mode 100644 index 607c13aa4d..0000000000 --- a/ompi/mca/ptl/tcp/ptl_tcp_peer.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_TCP_PEER_H -#define MCA_PTL_TCP_PEER_H - -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_SYS_SOCKET_H -#include -#endif -#ifdef HAVE_NETINET_IN_H -#include -#endif -#include "opal/class/opal_list.h" -#include "opal/event/event.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/ptl/ptl.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/** - * State of TCP peer connection. - */ - -typedef enum { - MCA_PTL_TCP_CLOSED, - MCA_PTL_TCP_CONNECTING, - MCA_PTL_TCP_CONNECT_ACK, - MCA_PTL_TCP_CONNECTED, - MCA_PTL_TCP_SHUTDOWN, - MCA_PTL_TCP_FAILED -} mca_ptl_tcp_state_t; - - -/** - * An abstraction that represents a connection to a peer process. - * An instance of mca_ptl_base_peer_t is associated w/ each process - * and PTL pair at startup. However, connections to the peer - * are established dynamically on an as-needed basis: -*/ -struct mca_ptl_base_peer_t { - opal_list_item_t super; - struct mca_ptl_tcp_module_t* peer_ptl; /**< PTL instance that created this connection */ - struct mca_ptl_tcp_proc_t* peer_proc; /**< proc structure corresponding to peer */ - struct mca_ptl_tcp_addr_t* peer_addr; /**< address of peer */ - int peer_sd; /**< socket connection to peer */ - struct mca_ptl_tcp_send_frag_t* peer_send_frag; /**< current send frag being processed */ - struct mca_ptl_tcp_recv_frag_t* peer_recv_frag; /**< current recv frag being processed */ - mca_ptl_tcp_state_t peer_state; /**< current state of the connection */ - size_t peer_retries; /**< number of connection retries attempted */ - opal_list_t peer_frags; /**< list of pending frags to send */ - opal_mutex_t peer_send_lock; /**< lock for concurrent access to peer state */ - opal_mutex_t peer_recv_lock; /**< lock for concurrent access to peer state */ - opal_event_t peer_send_event; /**< event for async processing of send frags */ - opal_event_t peer_recv_event; /**< event for async processing of recv frags */ - bool peer_nbo; /**< convert headers to network byte order? */ -}; -typedef struct mca_ptl_base_peer_t mca_ptl_base_peer_t; - -extern opal_class_t mca_ptl_tcp_peer_t_class; -typedef struct mca_ptl_base_peer_t mca_ptl_tcp_peer_t; - -void mca_ptl_tcp_set_socket_options(int sd); -void mca_ptl_tcp_peer_close(mca_ptl_base_peer_t*); -int mca_ptl_tcp_peer_send(mca_ptl_base_peer_t*, struct mca_ptl_tcp_send_frag_t*, int); -bool mca_ptl_tcp_peer_accept(mca_ptl_base_peer_t*, struct sockaddr_in*, int); -void mca_ptl_tcp_peer_shutdown(mca_ptl_base_peer_t*); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif - diff --git a/ompi/mca/ptl/tcp/ptl_tcp_proc.c b/ompi/mca/ptl/tcp/ptl_tcp_proc.c deleted file mode 100644 index 5bd3fc6fcc..0000000000 --- a/ompi/mca/ptl/tcp/ptl_tcp_proc.c +++ /dev/null @@ -1,246 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include - -#include "opal/sys/atomic.h" -#include "orte/class/orte_proc_table.h" -#include "ompi/mca/pml/base/pml_base_module_exchange.h" -#include "opal/util/output.h" -#include "ptl_tcp.h" -#include "ptl_tcp_addr.h" -#include "ptl_tcp_peer.h" -#include "ptl_tcp_proc.h" - - -static void mca_ptl_tcp_proc_construct(mca_ptl_tcp_proc_t* proc); -static void mca_ptl_tcp_proc_destruct(mca_ptl_tcp_proc_t* proc); - -OBJ_CLASS_INSTANCE( - mca_ptl_tcp_proc_t, - opal_list_item_t, - mca_ptl_tcp_proc_construct, - mca_ptl_tcp_proc_destruct -); - - -/** - * Initialize tcp proc instance - */ - -void mca_ptl_tcp_proc_construct(mca_ptl_tcp_proc_t* proc) -{ - proc->proc_ompi = 0; - proc->proc_addrs = 0; - proc->proc_addr_count = 0; - proc->proc_peers = 0; - proc->proc_peer_count = 0; - OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t); -} - - -/* - * Cleanup tcp proc instance - */ - -void mca_ptl_tcp_proc_destruct(mca_ptl_tcp_proc_t* proc) -{ - /* remove from list of all proc instances */ - OPAL_THREAD_LOCK(&mca_ptl_tcp_component.tcp_lock); - orte_hash_table_remove_proc(&mca_ptl_tcp_component.tcp_procs, &proc->proc_name); - OPAL_THREAD_UNLOCK(&mca_ptl_tcp_component.tcp_lock); - - /* release resources */ - if(NULL != proc->proc_peers) - free(proc->proc_peers); - OBJ_DESTRUCT(&proc->proc_lock); -} - - -/* - * Create a TCP process structure. There is a one-to-one correspondence - * between a ompi_proc_t and a mca_ptl_tcp_proc_t instance. We cache additional - * data (specifically the list of mca_ptl_tcp_peer_t instances, and publiched - * addresses) associated w/ a given destination on this datastructure. - */ - -mca_ptl_tcp_proc_t* mca_ptl_tcp_proc_create(ompi_proc_t* ompi_proc) -{ - int rc; - size_t size; - mca_ptl_tcp_proc_t* ptl_proc; - - OPAL_THREAD_LOCK(&mca_ptl_tcp_component.tcp_lock); - ptl_proc = (mca_ptl_tcp_proc_t*)orte_hash_table_get_proc( - &mca_ptl_tcp_component.tcp_procs, &ompi_proc->proc_name); - if(NULL != ptl_proc) { - OPAL_THREAD_UNLOCK(&mca_ptl_tcp_component.tcp_lock); - return ptl_proc; - } - - ptl_proc = OBJ_NEW(mca_ptl_tcp_proc_t); - if(NULL == ptl_proc) - return NULL; - ptl_proc->proc_ompi = ompi_proc; - ptl_proc->proc_name = ompi_proc->proc_name; - - /* add to hash table of all proc instance */ - orte_hash_table_set_proc( - &mca_ptl_tcp_component.tcp_procs, - &ptl_proc->proc_name, - ptl_proc); - OPAL_THREAD_UNLOCK(&mca_ptl_tcp_component.tcp_lock); - - /* lookup tcp parameters exported by this proc */ - rc = mca_pml_base_modex_recv( &mca_ptl_tcp_component.super.ptlm_version, - ompi_proc, - (void**)&ptl_proc->proc_addrs, - &size); - if(rc != OMPI_SUCCESS) { - opal_output(0, "mca_ptl_tcp_proc_create: mca_pml_base_modex_recv: failed with return value=%d", rc); - OBJ_RELEASE(ptl_proc); - return NULL; - } - if(0 != (size % sizeof(mca_ptl_tcp_addr_t))) { - opal_output(0, "mca_ptl_tcp_proc_create: mca_pml_base_modex_recv: invalid size %d\n", size); - return NULL; - } - ptl_proc->proc_addr_count = size / sizeof(mca_ptl_tcp_addr_t); - - /* allocate space for peer array - one for each exported address */ - ptl_proc->proc_peers = (mca_ptl_base_peer_t**) - malloc(ptl_proc->proc_addr_count * sizeof(mca_ptl_base_peer_t*)); - if(NULL == ptl_proc->proc_peers) { - OBJ_RELEASE(ptl_proc); - return NULL; - } - if(NULL == mca_ptl_tcp_component.tcp_local && ompi_proc == ompi_proc_local()) - mca_ptl_tcp_component.tcp_local = ptl_proc; - return ptl_proc; -} - - -/* - * Look for an existing TCP process instance based on the globally unique - * process identifier. - */ -mca_ptl_tcp_proc_t* mca_ptl_tcp_proc_lookup(const orte_process_name_t *name) -{ - mca_ptl_tcp_proc_t* proc; - OPAL_THREAD_LOCK(&mca_ptl_tcp_component.tcp_lock); - proc = (mca_ptl_tcp_proc_t*)orte_hash_table_get_proc( - &mca_ptl_tcp_component.tcp_procs, name); - OPAL_THREAD_UNLOCK(&mca_ptl_tcp_component.tcp_lock); - return proc; -} - -#ifdef HAVE_NETINET_IN_H -#include -#endif -#ifdef HAVE_ARPA_INET_H -#include -#endif - -/* - * Note that this routine must be called with the lock on the process already - * held. Insert a ptl instance into the proc array and assign it an address. - */ -int mca_ptl_tcp_proc_insert(mca_ptl_tcp_proc_t* ptl_proc, mca_ptl_base_peer_t* ptl_peer) -{ - struct mca_ptl_tcp_module_t *ptl_tcp = ptl_peer->peer_ptl; - size_t i; - unsigned long net1; - - /* insert into peer array */ - ptl_peer->peer_proc = ptl_proc; - ptl_proc->proc_peers[ptl_proc->proc_peer_count++] = ptl_peer; - - net1 = ptl_tcp->ptl_ifaddr.sin_addr.s_addr & ptl_tcp->ptl_ifmask.sin_addr.s_addr; - /* - * Look through the proc instance for an address that is on the - * directly attached network. If we don't find one, pick the first - * unused address. - */ - for(i=0; iproc_addr_count; i++) { - mca_ptl_tcp_addr_t* peer_addr = ptl_proc->proc_addrs + i; - unsigned long net2 = peer_addr->addr_inet.s_addr & ptl_tcp->ptl_ifmask.sin_addr.s_addr; - if(peer_addr->addr_inuse != 0) - continue; - if(net1 == net2) { - ptl_peer->peer_addr = peer_addr; - break; - } else if(ptl_peer->peer_addr != 0) - ptl_peer->peer_addr = peer_addr; - } - /* Make sure there is a common interface */ - if( NULL != ptl_peer->peer_addr ) { - ptl_peer->peer_addr->addr_inuse++; - return OMPI_SUCCESS; - } - return OMPI_ERR_UNREACH; -} - -/* - * Remove a peer from the proc array and indicate the address is - * no longer in use. - */ - -int mca_ptl_tcp_proc_remove(mca_ptl_tcp_proc_t* ptl_proc, mca_ptl_base_peer_t* ptl_peer) -{ - size_t i; - OPAL_THREAD_LOCK(&ptl_proc->proc_lock); - for(i=0; iproc_peer_count; i++) { - if(ptl_proc->proc_peers[i] == ptl_peer) { - memmove(ptl_proc->proc_peers+i, ptl_proc->proc_peers+i+1, - (ptl_proc->proc_peer_count-i-1)*sizeof(mca_ptl_base_peer_t*)); - if(--ptl_proc->proc_peer_count == 0) { - OPAL_THREAD_UNLOCK(&ptl_proc->proc_lock); - OBJ_RELEASE(ptl_proc); - return OMPI_SUCCESS; - } - ptl_peer->peer_addr->addr_inuse--; - break; - } - } - OPAL_THREAD_UNLOCK(&ptl_proc->proc_lock); - return OMPI_SUCCESS; -} - - -/* - * loop through all available PTLs for one matching the source address - * of the request. - */ -bool mca_ptl_tcp_proc_accept(mca_ptl_tcp_proc_t* ptl_proc, struct sockaddr_in* addr, int sd) -{ - size_t i; - OPAL_THREAD_LOCK(&ptl_proc->proc_lock); - for(i=0; iproc_peer_count; i++) { - mca_ptl_base_peer_t* ptl_peer = ptl_proc->proc_peers[i]; - if(mca_ptl_tcp_peer_accept(ptl_peer, addr, sd)) { - OPAL_THREAD_UNLOCK(&ptl_proc->proc_lock); - return true; - } - } - OPAL_THREAD_UNLOCK(&ptl_proc->proc_lock); - return false; -} - - diff --git a/ompi/mca/ptl/tcp/ptl_tcp_proc.h b/ompi/mca/ptl/tcp/ptl_tcp_proc.h deleted file mode 100644 index f77b4a0365..0000000000 --- a/ompi/mca/ptl/tcp/ptl_tcp_proc.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_TCP_PROC_H -#define MCA_PTL_TCP_PROC_H - -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_SYS_SOCKET_H -#include -#endif -#ifdef HAVE_NETINET_IN_H -#include -#endif -#include "orte/mca/ns/ns_types.h" -#include "opal/class/opal_object.h" -#include "ompi/proc/proc.h" -#include "ptl_tcp.h" -#include "ptl_tcp_peer.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - - - -/** - * Represents the state of a remote process and the set of addresses - * that it exports. Also cache an instance of mca_ptl_base_peer_t for each - * PTL instance that attempts to open a connection to the process. - */ -struct mca_ptl_tcp_proc_t { - opal_list_item_t super; /**< allow proc to be placed on a list */ - ompi_proc_t *proc_ompi; /**< pointer to corresponding ompi_proc_t */ - orte_process_name_t proc_name; /**< globally unique identifier for the process */ - struct mca_ptl_tcp_addr_t *proc_addrs; /**< array of addresses published by peer */ - size_t proc_addr_count; /**< number of addresses published by peer */ - struct mca_ptl_base_peer_t **proc_peers; /**< array of peers that have been created to access this proc */ - size_t proc_peer_count; /**< number of peers */ - opal_mutex_t proc_lock; /**< lock to protect against concurrent access to proc state */ -}; -typedef struct mca_ptl_tcp_proc_t mca_ptl_tcp_proc_t; - - -OBJ_CLASS_DECLARATION(mca_ptl_tcp_proc_t); - - -mca_ptl_tcp_proc_t* mca_ptl_tcp_proc_create(ompi_proc_t* ompi_proc); -mca_ptl_tcp_proc_t* mca_ptl_tcp_proc_lookup(const orte_process_name_t*); - - -/** - * Inlined function to return local TCP proc instance. - */ - -static inline mca_ptl_tcp_proc_t* mca_ptl_tcp_proc_local(void) -{ - if(NULL == mca_ptl_tcp_component.tcp_local) - mca_ptl_tcp_component.tcp_local = mca_ptl_tcp_proc_create(ompi_proc_local()); - return mca_ptl_tcp_component.tcp_local; -} - -int mca_ptl_tcp_proc_insert(mca_ptl_tcp_proc_t*, struct mca_ptl_base_peer_t*); -int mca_ptl_tcp_proc_remove(mca_ptl_tcp_proc_t*, struct mca_ptl_base_peer_t*); -bool mca_ptl_tcp_proc_accept(mca_ptl_tcp_proc_t*, struct sockaddr_in*, int sd); -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif - diff --git a/ompi/mca/ptl/tcp/ptl_tcp_recvfrag.c b/ompi/mca/ptl/tcp/ptl_tcp_recvfrag.c deleted file mode 100644 index 911737d9ba..0000000000 --- a/ompi/mca/ptl/tcp/ptl_tcp_recvfrag.c +++ /dev/null @@ -1,331 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#include "orte/orte_socket_errno.h" -#include "ompi/mca/ptl/base/ptl_base_sendreq.h" -#include "opal/util/output.h" -#include "ptl_tcp.h" -#include "ptl_tcp_peer.h" -#include "ptl_tcp_recvfrag.h" -#include "ptl_tcp_sendfrag.h" - - - -static void mca_ptl_tcp_recv_frag_construct(mca_ptl_tcp_recv_frag_t* frag); -static void mca_ptl_tcp_recv_frag_destruct(mca_ptl_tcp_recv_frag_t* frag); -static bool mca_ptl_tcp_recv_frag_header(mca_ptl_tcp_recv_frag_t* frag, int sd, size_t); -static bool mca_ptl_tcp_recv_frag_ack(mca_ptl_tcp_recv_frag_t* frag, int sd); -static bool mca_ptl_tcp_recv_frag_frag(mca_ptl_tcp_recv_frag_t* frag, int sd); -static bool mca_ptl_tcp_recv_frag_match(mca_ptl_tcp_recv_frag_t* frag, int sd); -static bool mca_ptl_tcp_recv_frag_data(mca_ptl_tcp_recv_frag_t* frag, int sd); -static bool mca_ptl_tcp_recv_frag_discard(mca_ptl_tcp_recv_frag_t* frag, int sd); - - -opal_class_t mca_ptl_tcp_recv_frag_t_class = { - "mca_ptl_tcp_recv_frag_t", - OBJ_CLASS(mca_ptl_base_recv_frag_t), - (opal_construct_t)mca_ptl_tcp_recv_frag_construct, - (opal_destruct_t)mca_ptl_tcp_recv_frag_destruct -}; - - -/* - * TCP fragment constructor - */ - -static void mca_ptl_tcp_recv_frag_construct(mca_ptl_tcp_recv_frag_t* frag) -{ -} - - -/* - * TCP fragment destructor - */ - -static void mca_ptl_tcp_recv_frag_destruct(mca_ptl_tcp_recv_frag_t* frag) -{ -} - -/* - * Callback from event library when socket has data available - * for receive. - */ - -bool mca_ptl_tcp_recv_frag_handler(mca_ptl_tcp_recv_frag_t* frag, int sd) -{ - /* read common header */ - if(frag->frag_hdr_cnt < sizeof(mca_ptl_base_header_t)) { - if(mca_ptl_tcp_recv_frag_header(frag, sd, sizeof(mca_ptl_base_header_t)) == false) - return false; - - /* convert this to host byte order if required */ - if(frag->frag_recv.frag_base.frag_peer->peer_nbo) { - /* note this field is only a byte - so doesn't matter what the byte ordering is */ - switch(frag->frag_recv.frag_base.frag_header.hdr_common.hdr_type) { - case MCA_PTL_HDR_TYPE_MATCH: - MCA_PTL_BASE_MATCH_HDR_NTOH(frag->frag_recv.frag_base.frag_header.hdr_match); - break; - case MCA_PTL_HDR_TYPE_RNDV: - MCA_PTL_BASE_RNDV_HDR_NTOH(frag->frag_recv.frag_base.frag_header.hdr_rndv); - break; - case MCA_PTL_HDR_TYPE_FRAG: - MCA_PTL_BASE_FRAG_HDR_NTOH(frag->frag_recv.frag_base.frag_header.hdr_frag); - break; - case MCA_PTL_HDR_TYPE_ACK: - case MCA_PTL_HDR_TYPE_NACK: - MCA_PTL_BASE_ACK_HDR_NTOH(frag->frag_recv.frag_base.frag_header.hdr_ack); - break; - default: - opal_output(0, "mca_ptl_tcp_recv_frag_handler: invalid message type: %08X", - *(unsigned long*)&frag->frag_recv.frag_base.frag_header); - return true; - } - } - if( (MCA_PTL_HDR_TYPE_MATCH == frag->frag_recv.frag_base.frag_header.hdr_common.hdr_type) || - (MCA_PTL_HDR_TYPE_RNDV == frag->frag_recv.frag_base.frag_header.hdr_common.hdr_type) ) { - /* first pass through - attempt a match */ - mca_ptl_base_module_t* ptl = frag->frag_recv.frag_base.frag_owner; - /* attempt to match a posted recv */ - if (ptl->ptl_match( ptl, &frag->frag_recv, - &frag->frag_recv.frag_base.frag_header.hdr_match)) { - mca_ptl_tcp_recv_frag_matched(frag, 0, frag->frag_recv.frag_base.frag_header.hdr_rndv.hdr_frag_length); - } else { - /* match was not made - so allocate buffer for eager send */ - if(frag->frag_recv.frag_base.frag_header.hdr_match.hdr_msg_length > 0) { - frag->frag_size = frag->frag_recv.frag_base.frag_header.hdr_rndv.hdr_frag_length; - frag->frag_recv.frag_base.frag_addr = malloc(frag->frag_size); - frag->frag_recv.frag_base.frag_size = frag->frag_size; - frag->frag_recv.frag_is_buffered = true; - } else { - frag->frag_recv.frag_base.frag_size = 0; - frag->frag_recv.frag_is_buffered = false; - frag->frag_size = 0; - } - } - } - } - - switch(frag->frag_recv.frag_base.frag_header.hdr_common.hdr_type) { - case MCA_PTL_HDR_TYPE_MATCH: - case MCA_PTL_HDR_TYPE_RNDV: - return mca_ptl_tcp_recv_frag_match(frag, sd); - case MCA_PTL_HDR_TYPE_FRAG: - return mca_ptl_tcp_recv_frag_frag(frag, sd); - case MCA_PTL_HDR_TYPE_ACK: - case MCA_PTL_HDR_TYPE_NACK: - return mca_ptl_tcp_recv_frag_ack(frag, sd); - default: - opal_output(0, "mca_ptl_tcp_recv_frag_handler: invalid message type: %08X", - *(unsigned long*)&frag->frag_recv.frag_base.frag_header); - return true; - } -} - -/* - * Receive fragment header - */ - -static bool mca_ptl_tcp_recv_frag_header(mca_ptl_tcp_recv_frag_t* frag, int sd, size_t size) -{ - /* non-blocking read - continue if interrupted, otherwise wait until data available */ - unsigned char* ptr = (unsigned char*)&frag->frag_recv.frag_base.frag_header; - while(frag->frag_hdr_cnt < size) { - int cnt = recv(sd, (char *)(ptr + frag->frag_hdr_cnt), size - frag->frag_hdr_cnt, 0); - if(cnt == 0) { - mca_ptl_tcp_peer_close(frag->frag_recv.frag_base.frag_peer); - OMPI_FREE_LIST_RETURN(&mca_ptl_tcp_component.tcp_recv_frags, (opal_list_item_t*)frag); - return false; - } - if(cnt < 0) { - switch(ompi_socket_errno) { - case EINTR: - continue; - case EWOULDBLOCK: - /* opal_output(0, "mca_ptl_tcp_recv_frag_header: EWOULDBLOCK\n"); */ - return false; - default: - opal_output(0, "mca_ptl_tcp_recv_frag_header: recv() failed with errno=%d", ompi_socket_errno); - mca_ptl_tcp_peer_close(frag->frag_recv.frag_base.frag_peer); - OMPI_FREE_LIST_RETURN(&mca_ptl_tcp_component.tcp_recv_frags, (opal_list_item_t*)frag); - return false; - } - } - frag->frag_hdr_cnt += cnt; -#if MCA_PTL_TCP_STATISTICS - ((mca_ptl_tcp_module_t*)frag->frag_owner)->ptl_bytes_recv += cnt; -#endif - } - return true; -} - - -/* - * Receive and process an ack. - */ - -static bool mca_ptl_tcp_recv_frag_ack(mca_ptl_tcp_recv_frag_t* frag, int sd) -{ - mca_ptl_tcp_send_frag_t* sendfrag; - mca_ptl_base_send_request_t* sendreq; - sendfrag = (mca_ptl_tcp_send_frag_t*)frag->frag_recv.frag_base.frag_header.hdr_ack.hdr_src_ptr.pval; - sendreq = sendfrag->frag_send.frag_request; - sendreq->req_peer_match = frag->frag_recv.frag_base.frag_header.hdr_ack.hdr_dst_match; - mca_ptl_tcp_send_frag_progress(sendfrag); - mca_ptl_tcp_recv_frag_return(frag->frag_recv.frag_base.frag_owner, frag); - return true; -} - - -/* - * Receive and process a match request - first fragment. - */ - -static bool mca_ptl_tcp_recv_frag_match(mca_ptl_tcp_recv_frag_t* frag, int sd) -{ - /* receive fragment data */ - if(frag->frag_msg_cnt < frag->frag_recv.frag_base.frag_size) { - if(mca_ptl_tcp_recv_frag_data(frag, sd) == false) { - return false; - } - } - - /* discard any data that exceeds the posted receive */ - if(frag->frag_msg_cnt < frag->frag_size) { - if(mca_ptl_tcp_recv_frag_discard(frag, sd) == false) { - return false; - } - } - - mca_ptl_tcp_recv_frag_progress(frag); - return true; -} - - -/* - * Receive and process 2nd+ fragments of a multi-fragment message. - */ - -static bool mca_ptl_tcp_recv_frag_frag(mca_ptl_tcp_recv_frag_t* frag, int sd) -{ - /* get request from header */ - if(frag->frag_msg_cnt == 0) { - frag->frag_recv.frag_request = (mca_ptl_base_recv_request_t *)frag->frag_recv.frag_base.frag_header.hdr_frag.hdr_dst_ptr.pval; - mca_ptl_tcp_recv_frag_matched(frag, - frag->frag_recv.frag_base.frag_header.hdr_frag.hdr_frag_offset, - frag->frag_recv.frag_base.frag_header.hdr_frag.hdr_frag_length); - } - - /* continue to receive user data */ - if(frag->frag_msg_cnt < frag->frag_recv.frag_base.frag_size) { - if(mca_ptl_tcp_recv_frag_data(frag, sd) == false) - return false; - } - - if(frag->frag_msg_cnt < frag->frag_size) { - if(mca_ptl_tcp_recv_frag_discard(frag, sd) == false) - return false; - } - - /* indicate completion status */ - mca_ptl_tcp_recv_frag_progress(frag); - return true; -} - - -/* - * Continue with non-blocking recv() calls until the entire - * fragment is received. - */ - -static bool mca_ptl_tcp_recv_frag_data(mca_ptl_tcp_recv_frag_t* frag, int sd) -{ - while(frag->frag_msg_cnt < frag->frag_recv.frag_base.frag_size) { - int cnt = recv(sd, (char*)frag->frag_recv.frag_base.frag_addr+frag->frag_msg_cnt, - frag->frag_recv.frag_base.frag_size-frag->frag_msg_cnt, 0); - if(cnt == 0) { - mca_ptl_tcp_peer_close(frag->frag_recv.frag_base.frag_peer); - OMPI_FREE_LIST_RETURN(&mca_ptl_tcp_component.tcp_recv_frags, (opal_list_item_t*)frag); - return false; - } - if(cnt < 0) { - switch(ompi_socket_errno) { - case EINTR: - continue; - case EWOULDBLOCK: - return false; - default: - opal_output(0, "mca_ptl_tcp_recv_frag_data: recv() failed with errno=%d", ompi_socket_errno); - mca_ptl_tcp_peer_close(frag->frag_recv.frag_base.frag_peer); - OMPI_FREE_LIST_RETURN(&mca_ptl_tcp_component.tcp_recv_frags, (opal_list_item_t*)frag); - return false; - } - } - frag->frag_msg_cnt += cnt; -#if MCA_PTL_TCP_STATISTICS - ((mca_ptl_tcp_module_t*)frag->frag_owner)->ptl_bytes_recv += cnt; -#endif - } - return true; -} - - -/* - * If the app posted a receive buffer smaller than the - * fragment, receive and discard remaining bytes. -*/ - -static bool mca_ptl_tcp_recv_frag_discard(mca_ptl_tcp_recv_frag_t* frag, int sd) -{ - while(frag->frag_msg_cnt < frag->frag_size) { - size_t count = frag->frag_size - frag->frag_msg_cnt; - void *rbuf = malloc(count); - int cnt = recv(sd, (char *)rbuf, count, 0); - free(rbuf); - if(cnt == 0) { - mca_ptl_tcp_peer_close(frag->frag_recv.frag_base.frag_peer); - OMPI_FREE_LIST_RETURN(&mca_ptl_tcp_component.tcp_recv_frags, (opal_list_item_t*)frag); - return false; - } - if(cnt < 0) { - switch(ompi_socket_errno) { - case EINTR: - continue; - case EWOULDBLOCK: - /* opal_output(0, "mca_ptl_tcp_recv_frag_discard: EWOULDBLOCK\n"); */ - return false; - default: - opal_output(0, "mca_ptl_tcp_recv_frag_discard: recv() failed with errno=%d", ompi_socket_errno); - mca_ptl_tcp_peer_close(frag->frag_recv.frag_base.frag_peer); - OMPI_FREE_LIST_RETURN(&mca_ptl_tcp_component.tcp_recv_frags, (opal_list_item_t*)frag); - return false; - } - } - frag->frag_msg_cnt += cnt; -#if MCA_PTL_TCP_STATISTICS - ((mca_ptl_tcp_module_t*)frag->frag_owner)->ptl_bytes_recv += cnt; -#endif - } - return true; -} - diff --git a/ompi/mca/ptl/tcp/ptl_tcp_recvfrag.h b/ompi/mca/ptl/tcp/ptl_tcp_recvfrag.h deleted file mode 100644 index 2da8be6e64..0000000000 --- a/ompi/mca/ptl/tcp/ptl_tcp_recvfrag.h +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PTL_TCP_RECV_FRAG_H -#define MCA_PTL_TCP_RECV_FRAG_H - -#include "ompi_config.h" -#include -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_SYS_SOCKET_H -#include -#endif -#ifdef HAVE_NETINET_IN_H -#include -#endif -#include "opal/sys/atomic.h" -#include "ompi/mca/ptl/base/ptl_base_recvfrag.h" -#include "ptl_tcp_peer.h" -#include "ptl_tcp.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif -extern opal_class_t mca_ptl_tcp_recv_frag_t_class; - -/** - * TCP received fragment derived type. - */ -struct mca_ptl_tcp_recv_frag_t { - mca_ptl_base_recv_frag_t frag_recv; /**< base receive fragment descriptor */ - size_t frag_hdr_cnt; /**< number of header bytes received */ - size_t frag_msg_cnt; /**< number of message bytes received */ - size_t frag_size; /**< size of the fragment on the wire */ - bool frag_ack_pending; /**< is an ack pending for this fragment */ - volatile int frag_progressed; /**< flag used to atomically progress fragment */ -}; -typedef struct mca_ptl_tcp_recv_frag_t mca_ptl_tcp_recv_frag_t; - - -#define MCA_PTL_TCP_RECV_FRAG_ALLOC(frag, rc) \ - { \ - opal_list_item_t* item; \ - OMPI_FREE_LIST_GET(&mca_ptl_tcp_component.tcp_recv_frags, item, rc); \ - frag = (mca_ptl_tcp_recv_frag_t*)item; \ - } - -bool mca_ptl_tcp_recv_frag_handler(mca_ptl_tcp_recv_frag_t*, int sd); - -/* - * Initialize a TCP receive fragment for a specific peer. - */ -static inline void mca_ptl_tcp_recv_frag_init(mca_ptl_tcp_recv_frag_t* frag, struct mca_ptl_base_peer_t* peer) -{ - frag->frag_recv.frag_base.frag_owner = &(peer->peer_ptl->super); - frag->frag_recv.frag_base.frag_addr = NULL; - frag->frag_recv.frag_base.frag_size = 0; - frag->frag_recv.frag_base.frag_peer = peer; - frag->frag_recv.frag_request = 0; - frag->frag_recv.frag_is_buffered = false; - frag->frag_hdr_cnt = 0; - frag->frag_msg_cnt = 0; - frag->frag_ack_pending = false; - frag->frag_progressed = 0; -} - -bool mca_ptl_tcp_recv_frag_send_ack(mca_ptl_tcp_recv_frag_t* frag); - -extern void* mca_ptl_tcp_memalloc( size_t* length, void* data ); - -static inline void mca_ptl_tcp_recv_frag_matched( - mca_ptl_tcp_recv_frag_t* frag, - size_t frag_offset, - size_t frag_length) -{ - mca_ptl_base_recv_request_t* request = frag->frag_recv.frag_request; - - /* if there is data associated with the fragment -- setup to receive */ - if(frag_length > 0) { - /* initialize receive convertor */ - ompi_convertor_clone( &(request->req_recv.req_convertor), - &(frag->frag_recv.frag_base.frag_convertor), 1 ); - ompi_convertor_personalize( &frag->frag_recv.frag_base.frag_convertor, 0, - &frag_offset, mca_ptl_tcp_memalloc, NULL ); - /* non-contiguous - allocate buffer for receive */ - if( 1 == ompi_convertor_need_buffers( &frag->frag_recv.frag_base.frag_convertor ) ) { - frag->frag_recv.frag_base.frag_addr = malloc(frag_length); - frag->frag_recv.frag_is_buffered = true; - /* determine offset into users buffer */ - } else { - long true_lb, true_extent; - - ompi_ddt_get_true_extent( request->req_recv.req_base.req_datatype, &true_lb, &true_extent ); - frag->frag_recv.frag_base.frag_addr = ((unsigned char*)request->req_recv.req_base.req_addr) + - frag_offset + true_lb; - } - frag->frag_size = frag_length; - if(frag_offset + frag_length > request->req_recv.req_bytes_packed) { - if(frag_offset > request->req_recv.req_bytes_packed) - frag->frag_recv.frag_base.frag_size = 0; - else - frag->frag_recv.frag_base.frag_size = request->req_recv.req_bytes_packed - frag_offset; - } else { - frag->frag_recv.frag_base.frag_size = frag_length; - } - } else { - frag->frag_size = 0; - frag->frag_recv.frag_base.frag_size = 0; - } -} - - -static inline void mca_ptl_tcp_recv_frag_progress(mca_ptl_tcp_recv_frag_t* frag) -{ - uint32_t iov_count; - size_t max_data; - int32_t freeAfter; - int32_t frag_progressed = opal_atomic_add_32(&frag->frag_progressed,1); - - /* For a match/rendezvous packet - we need to progress the fragment after - * the match has been made and all data has arrived. So this routine is - * called twice for this case. Otherwise, progress a fragment as soon as - * all of the data has been received. - */ - if((frag->frag_recv.frag_base.frag_header.hdr_common.hdr_type == MCA_PTL_HDR_TYPE_FRAG && - frag_progressed == 1 ) || - (frag->frag_recv.frag_base.frag_header.hdr_common.hdr_type != MCA_PTL_HDR_TYPE_FRAG && - frag_progressed == 2 )) { - - mca_ptl_base_recv_request_t* request = frag->frag_recv.frag_request; - if(frag->frag_recv.frag_is_buffered) { - mca_ptl_base_frag_header_t* header = &(frag)->frag_recv.frag_base.frag_header.hdr_frag; - size_t offset = (header->hdr_common.hdr_type == MCA_PTL_HDR_TYPE_FRAG) ? - header->hdr_frag_offset : 0; - - /* - * Initialize convertor and use it to unpack data - */ - struct iovec iov; - - ompi_convertor_clone( &(request->req_recv.req_convertor), - &(frag->frag_recv.frag_base.frag_convertor), 1 ); - ompi_convertor_personalize( &(frag->frag_recv.frag_base.frag_convertor), - 0, &offset, mca_ptl_tcp_memalloc, NULL ); - - iov.iov_base = (ompi_iov_base_ptr_t)frag->frag_recv.frag_base.frag_addr; - iov.iov_len = frag->frag_recv.frag_base.frag_size; - iov_count = 1; - max_data = iov.iov_len; - ompi_convertor_unpack( &frag->frag_recv.frag_base.frag_convertor, - &iov, &iov_count, &max_data, &freeAfter ); - } - - /* progress the request */ - frag->frag_recv.frag_base.frag_owner->ptl_recv_progress( - frag->frag_recv.frag_base.frag_owner, - request, - frag->frag_size, - frag->frag_recv.frag_base.frag_size); - - if((frag)->frag_ack_pending == false) { - mca_ptl_tcp_recv_frag_return(frag->frag_recv.frag_base.frag_owner, (frag)); - } - } -} -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif diff --git a/ompi/mca/ptl/tcp/ptl_tcp_sendfrag.c b/ompi/mca/ptl/tcp/ptl_tcp_sendfrag.c deleted file mode 100644 index b720205bae..0000000000 --- a/ompi/mca/ptl/tcp/ptl_tcp_sendfrag.c +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#include "orte/orte_socket_errno.h" -#include "ompi/types.h" -#include "ompi/datatype/datatype.h" -#include "ompi/mca/pml/base/pml_base_sendreq.h" -#include "ompi/communicator/communicator.h" -#include "ptl_tcp.h" -#include "ptl_tcp_peer.h" -#include "ptl_tcp_proc.h" -#include "ptl_tcp_sendfrag.h" - -#define frag_header frag_send.frag_base.frag_header -#define frag_owner frag_send.frag_base.frag_owner -#define frag_peer frag_send.frag_base.frag_peer -#define frag_convertor frag_send.frag_base.frag_convertor - - -static void mca_ptl_tcp_send_frag_construct(mca_ptl_tcp_send_frag_t* frag); -static void mca_ptl_tcp_send_frag_destruct(mca_ptl_tcp_send_frag_t* frag); - - -opal_class_t mca_ptl_tcp_send_frag_t_class = { - "mca_ptl_tcp_send_frag_t", - OBJ_CLASS(mca_ptl_base_send_frag_t), - (opal_construct_t)mca_ptl_tcp_send_frag_construct, - (opal_destruct_t)mca_ptl_tcp_send_frag_destruct -}; - -/* - * Placeholders for send fragment constructor/destructors. - */ - -static void mca_ptl_tcp_send_frag_construct(mca_ptl_tcp_send_frag_t* frag) -{ - OMPI_DEBUG_ZERO(*frag); - frag->free_after = 0; -} - - -static void mca_ptl_tcp_send_frag_destruct(mca_ptl_tcp_send_frag_t* frag) -{ -} - -extern mca_ptl_tcp_component_t mca_ptl_tcp_component; - -void* mca_ptl_tcp_memalloc( size_t* length, void* data ) -{ - if( (*length) > mca_ptl_tcp_component.tcp_frag_size ) - *length = mca_ptl_tcp_component.tcp_frag_size; - return malloc( *length ); -} - -/* - * Initialize the fragment based on the current offset into the users - * data buffer, and the indicated size. - */ - -int mca_ptl_tcp_send_frag_init( - mca_ptl_tcp_send_frag_t* sendfrag, - mca_ptl_base_peer_t* ptl_peer, - mca_ptl_base_send_request_t* sendreq, - size_t offset, - size_t* size, - int flags) -{ - /* message header */ - size_t size_in = *size, size_out, max_data; - uint32_t iov_count; - mca_ptl_base_header_t* hdr = &sendfrag->frag_header; - - sendfrag->free_after = 0; - /* initialize convertor */ - if(size_in > 0) { - ompi_convertor_t *convertor; - int rc; - - convertor = &sendfrag->frag_convertor; - ompi_convertor_clone( &sendreq->req_send.req_convertor, convertor, 1 ); - ompi_convertor_personalize( convertor, 0, &offset, mca_ptl_tcp_memalloc, NULL ); - /* if data is contigous convertor will return an offset - * into users buffer - otherwise will return an allocated buffer - * that holds the packed data - */ - sendfrag->frag_vec[1].iov_base = NULL; - sendfrag->frag_vec[1].iov_len = size_in; - iov_count = 1; - max_data = size_in; - if((rc = ompi_convertor_pack( convertor, &sendfrag->frag_vec[1], - &iov_count, &max_data, &(sendfrag->free_after) )) < 0) { - return OMPI_ERROR; - } - /* adjust the freeAfter as the position zero is reserved for the header */ - sendfrag->free_after <<= 1; - - /* adjust size and request offset to reflect actual number of bytes packed by convertor */ - size_out = sendfrag->frag_vec[1].iov_len; - } else { - size_out = size_in; - } - - if(offset == 0) { - hdr->hdr_common.hdr_type = (flags & MCA_PTL_FLAGS_ACK) ? MCA_PTL_HDR_TYPE_MATCH : MCA_PTL_HDR_TYPE_RNDV; - hdr->hdr_common.hdr_flags = flags; - hdr->hdr_match.hdr_contextid = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_msg_length = sendreq->req_send.req_bytes_packed; - hdr->hdr_match.hdr_msg_seq = sendreq->req_send.req_base.req_sequence; - hdr->hdr_rndv.hdr_frag_length = size_out; - hdr->hdr_rndv.hdr_src_ptr.lval = 0; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */ - hdr->hdr_rndv.hdr_src_ptr.pval = sendfrag; - if(ptl_peer->peer_nbo) { - MCA_PTL_BASE_RNDV_HDR_HTON(hdr->hdr_rndv); - } - } else { - hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG; - hdr->hdr_common.hdr_flags = flags; - hdr->hdr_frag.hdr_frag_offset = offset; - hdr->hdr_frag.hdr_src_ptr.lval = 0; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */ - hdr->hdr_frag.hdr_src_ptr.pval = sendfrag; - hdr->hdr_frag.hdr_dst_ptr = sendreq->req_peer_match; - hdr->hdr_frag.hdr_frag_length = size_out; - if(ptl_peer->peer_nbo) { - MCA_PTL_BASE_FRAG_HDR_HTON(hdr->hdr_frag); - } - } - - /* fragment state */ - sendfrag->frag_owner = &ptl_peer->peer_ptl->super; - sendfrag->frag_send.frag_request = sendreq; - sendfrag->frag_send.frag_base.frag_addr = sendfrag->frag_vec[1].iov_base; - sendfrag->frag_send.frag_base.frag_size = size_out; - - sendfrag->frag_peer = ptl_peer; - sendfrag->frag_vec_ptr = sendfrag->frag_vec; - sendfrag->frag_vec_cnt = (size_out == 0) ? 1 : 2; - sendfrag->frag_vec[0].iov_base = (ompi_iov_base_ptr_t)hdr; - sendfrag->frag_vec[0].iov_len = sizeof(mca_ptl_base_header_t); - sendfrag->frag_progressed = 0; - *size = size_out; - return OMPI_SUCCESS; -} - - -/* - * The socket is setup as non-blocking, writes are handled asynchronously, - * with event callbacks when the socket is ready for writes. - */ - -bool mca_ptl_tcp_send_frag_handler(mca_ptl_tcp_send_frag_t* frag, int sd) -{ - int cnt=-1; - size_t i, num_vecs; - - /* non-blocking write, but continue if interrupted */ - while(cnt < 0) { - cnt = writev(sd, frag->frag_vec_ptr, frag->frag_vec_cnt); - if(cnt < 0) { - switch(ompi_socket_errno) { - case EINTR: - continue; - case EWOULDBLOCK: - /* opal_output(0, "mca_ptl_tcp_send_frag_handler: EWOULDBLOCK\n"); */ - return false; - case EFAULT: - opal_output( 0, "mca_ptl_tcp_send_frag_handler: writev error (%p, %d)\n\t%s(%d)\n", - frag->frag_vec_ptr[0].iov_base, frag->frag_vec_ptr[0].iov_len, - strerror(ompi_socket_errno), frag->frag_vec_cnt ); - default: - { - opal_output(0, "mca_ptl_tcp_send_frag_handler: writev failed with errno=%d", ompi_socket_errno); - mca_ptl_tcp_peer_close(frag->frag_peer); - return false; - } - } - } - } - -#if MCA_PTL_TCP_STATISTICS - ((mca_ptl_tcp_module_t*)frag->frag_owner)->ptl_bytes_sent += cnt; - ((mca_ptl_tcp_module_t*)frag->frag_owner)->ptl_send_handler++; -#endif - - /* if the write didn't complete - update the iovec state */ - num_vecs = frag->frag_vec_cnt; - for(i=0; i= (int)frag->frag_vec_ptr->iov_len) { - cnt -= frag->frag_vec_ptr->iov_len; - if( frag->free_after & 1 ) { - free( frag->frag_saved_vec.iov_base ); - } - frag->frag_vec_ptr++; - frag->frag_vec_cnt--; - frag->frag_saved_vec = *frag->frag_vec_ptr; - frag->free_after >>= 1; - } else { - frag->frag_vec_ptr->iov_base = (ompi_iov_base_ptr_t) - (((unsigned char*)frag->frag_vec_ptr->iov_base) + cnt); - frag->frag_vec_ptr->iov_len -= cnt; - break; - } - } - return (frag->frag_vec_cnt == 0); -} - diff --git a/ompi/mca/ptl/tcp/ptl_tcp_sendfrag.h b/ompi/mca/ptl/tcp/ptl_tcp_sendfrag.h deleted file mode 100644 index 9a1078440a..0000000000 --- a/ompi/mca/ptl/tcp/ptl_tcp_sendfrag.h +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PTL_TCP_SEND_FRAG_H -#define MCA_PTL_TCP_SEND_FRAG_H - -#include "ompi_config.h" -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_SYS_SOCKET_H -#include -#endif -#ifdef HAVE_NETINET_IN_H -#include -#endif -#include "opal/sys/atomic.h" -#include "ompi/mca/ptl/base/ptl_base_sendreq.h" -#include "ompi/mca/ptl/base/ptl_base_sendfrag.h" -#include "ptl_tcp.h" -#include "ptl_tcp_recvfrag.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif -extern opal_class_t mca_ptl_tcp_send_frag_t_class; -struct mca_ptl_base_peer_t; - - -/** - * TCP send fragment derived type. - */ -struct mca_ptl_tcp_send_frag_t { - mca_ptl_base_send_frag_t frag_send; /**< base send fragment descriptor */ - int32_t free_after; /**< keep trace of which vectors we have to free */ - struct iovec *frag_vec_ptr; /**< pointer into iovec array */ - size_t frag_vec_cnt; /**< number of iovec structs left to process */ - struct iovec frag_vec[2]; /**< array of iovecs for send */ - struct iovec frag_saved_vec; /**< save the initial values from the current iovec */ - volatile int frag_progressed; /**< for threaded case - has request status been updated */ -}; -typedef struct mca_ptl_tcp_send_frag_t mca_ptl_tcp_send_frag_t; - - -#define MCA_PTL_TCP_SEND_FRAG_ALLOC(item, rc) \ - OMPI_FREE_LIST_GET(&mca_ptl_tcp_component.tcp_send_frags, item, rc); - - -bool mca_ptl_tcp_send_frag_handler(mca_ptl_tcp_send_frag_t*, int sd); - - -/** - * Initialize a fragment descriptor. - * - * frag (IN) Fragment - * peer (IN) PTL peer addressing information - * request (IN) Send request - * offset (IN) Current offset into packed buffer - * size (IN/OUT) Requested size / actual size returned - * flags (IN) - */ - -int mca_ptl_tcp_send_frag_init( - mca_ptl_tcp_send_frag_t*, - struct mca_ptl_base_peer_t*, - struct mca_ptl_base_send_request_t*, - size_t offset, - size_t* size, - int flags); - - -/** - * For fragments that require an acknowledgment, this routine will be called - * twice, once when the send completes, and again when the acknowledgment is - * returned. Only the last caller should update the request status, so we - * add a lock w/ the frag_progressed flag. - */ - -static inline void mca_ptl_tcp_send_frag_progress(mca_ptl_tcp_send_frag_t* frag) -{ - mca_ptl_base_send_request_t* request = frag->frag_send.frag_request; - bool frag_ack; - - /* if this is an ack - simply return to pool */ - if(request == NULL) { - mca_ptl_tcp_send_frag_return(frag->frag_send.frag_base.frag_owner, frag); - return; - } - - /* Done when: - * (1) ack is not required and send completes - * (2) ack is received and send has completed - */ - frag_ack = (frag->frag_send.frag_base.frag_header. - hdr_common.hdr_flags & MCA_PTL_FLAGS_ACK) ? true : false; - if(frag_ack == false || opal_atomic_add_32(&frag->frag_progressed,1) == 2) { - - /* update request status */ - frag->frag_send.frag_base.frag_owner->ptl_send_progress( - frag->frag_send.frag_base.frag_owner, - request, - frag->frag_send.frag_base.frag_size); - - /* the first fragment is allocated with the request, - * all others need to be returned to free list - */ - if (request->req_cached == false || - frag->frag_send.frag_base.frag_header.hdr_common.hdr_type == MCA_PTL_HDR_TYPE_FRAG) { - mca_ptl_tcp_send_frag_return(frag->frag_send.frag_base.frag_owner, frag); - } - } -} - - -static inline void mca_ptl_tcp_send_frag_init_ack( - mca_ptl_tcp_send_frag_t* ack, - struct mca_ptl_base_module_t* ptl, - struct mca_ptl_base_peer_t* ptl_peer, - mca_ptl_tcp_recv_frag_t* frag) -{ - mca_ptl_base_header_t* hdr = &ack->frag_send.frag_base.frag_header; - mca_ptl_base_recv_request_t* request = frag->frag_recv.frag_request; - hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_ACK; - hdr->hdr_common.hdr_flags = 0; - hdr->hdr_ack.hdr_src_ptr = frag->frag_recv.frag_base.frag_header.hdr_rndv.hdr_src_ptr; - hdr->hdr_ack.hdr_dst_match.lval = 0; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */ - hdr->hdr_ack.hdr_dst_match.pval = request; - hdr->hdr_ack.hdr_dst_addr.lval = 0; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */ - hdr->hdr_ack.hdr_dst_addr.pval = request->req_recv.req_base.req_addr; - hdr->hdr_ack.hdr_dst_size = request->req_recv.req_bytes_packed; - ack->frag_send.frag_request = 0; - ack->frag_send.frag_base.frag_peer = ptl_peer; - ack->frag_send.frag_base.frag_owner = ptl; - ack->frag_send.frag_base.frag_addr = NULL; - ack->frag_send.frag_base.frag_size = 0; - ack->frag_vec_ptr = ack->frag_vec; - ack->frag_vec[0].iov_base = (ompi_iov_base_ptr_t)hdr; - ack->frag_vec[0].iov_len = sizeof(mca_ptl_base_header_t); - ack->frag_vec_cnt = 1; - ack->free_after = 0; -} - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif - diff --git a/ompi/mca/ptl/tcp/ptl_tcp_sendreq.c b/ompi/mca/ptl/tcp/ptl_tcp_sendreq.c deleted file mode 100644 index 3a09365bd8..0000000000 --- a/ompi/mca/ptl/tcp/ptl_tcp_sendreq.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "ompi_config.h" -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#include "orte/orte_socket_errno.h" -#include "ompi/types.h" -#include "ompi/mca/ptl/base/ptl_base_sendreq.h" -#include "ptl_tcp.h" -#include "ptl_tcp_sendreq.h" - - -static void mca_ptl_tcp_send_request_construct(mca_ptl_tcp_send_request_t*); -static void mca_ptl_tcp_send_request_destruct(mca_ptl_tcp_send_request_t*); - - -opal_class_t mca_ptl_tcp_send_request_t_class = { - "mca_ptl_tcp_send_request_t", - OBJ_CLASS(mca_ptl_base_send_request_t), - (opal_construct_t)mca_ptl_tcp_send_request_construct, - (opal_destruct_t)mca_ptl_tcp_send_request_destruct -}; - - -void mca_ptl_tcp_send_request_construct(mca_ptl_tcp_send_request_t* request) -{ - OBJ_CONSTRUCT(&request->req_frag, mca_ptl_tcp_send_frag_t); -} - - -void mca_ptl_tcp_send_request_destruct(mca_ptl_tcp_send_request_t* request) -{ - OBJ_DESTRUCT(&request->req_frag); -} - diff --git a/ompi/mca/ptl/tcp/ptl_tcp_sendreq.h b/ompi/mca/ptl/tcp/ptl_tcp_sendreq.h deleted file mode 100644 index efff4febe9..0000000000 --- a/ompi/mca/ptl/tcp/ptl_tcp_sendreq.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PTL_TCP_SEND_REQUEST_H -#define MCA_PTL_TCP_SEND_REQUEST_H - -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_SYS_SOCKET_H -#include -#endif -#ifdef HAVE_NETINET_IN_H -#include -#endif -#include "ompi_config.h" -#include "ompi/mca/ptl/base/ptl_base_sendreq.h" -#include "ptl_tcp_sendfrag.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif -extern opal_class_t mca_ptl_tcp_send_request_t_class; -/** - * TCP send request derived type. The send request contains both the - * base send request, and space for the first TCP send fragment descriptor. - * This avoids the overhead of a second allocation for the initial send - * fragment on every send request. - */ -struct mca_ptl_tcp_send_request_t { - mca_ptl_base_send_request_t super; - mca_ptl_tcp_send_frag_t req_frag; /* first fragment */ -}; -typedef struct mca_ptl_tcp_send_request_t mca_ptl_tcp_send_request_t; - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif -