diff --git a/ompi/mca/pml/csum/Makefile.am b/ompi/mca/pml/csum/Makefile.am deleted file mode 100644 index 3cac38d6ae..0000000000 --- a/ompi/mca/pml/csum/Makefile.am +++ /dev/null @@ -1,66 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -dist_pkgdata_DATA = \ - help-mpi-pml-csum.txt - -EXTRA_DIST = post_configure.sh pml_csum_endpoint.c pml_csum_endpoint.h - -csum_sources = \ - pml_csum.c \ - pml_csum.h \ - pml_csum_comm.c \ - pml_csum_comm.h \ - pml_csum_component.c \ - pml_csum_component.h \ - pml_csum_hdr.h \ - pml_csum_iprobe.c \ - pml_csum_irecv.c \ - pml_csum_isend.c \ - pml_csum_progress.c \ - pml_csum_rdma.c \ - pml_csum_rdma.h \ - pml_csum_rdmafrag.c \ - pml_csum_rdmafrag.h \ - pml_csum_recvfrag.c \ - pml_csum_recvfrag.h \ - pml_csum_recvreq.c \ - pml_csum_recvreq.h \ - pml_csum_sendreq.c \ - pml_csum_sendreq.h \ - pml_csum_start.c - -if MCA_BUILD_ompi_pml_csum_DSO -component_noinst = -component_install = mca_pml_csum.la -else -component_noinst = libmca_pml_csum.la -component_install = -endif - -mcacomponentdir = $(pkglibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_pml_csum_la_SOURCES = $(csum_sources) -mca_pml_csum_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_pml_csum_la_SOURCES = $(csum_sources) -libmca_pml_csum_la_LDFLAGS = -module -avoid-version diff --git a/ompi/mca/pml/csum/help-mpi-pml-csum.txt b/ompi/mca/pml/csum/help-mpi-pml-csum.txt deleted file mode 100644 index ed378d5003..0000000000 --- a/ompi/mca/pml/csum/help-mpi-pml-csum.txt +++ /dev/null @@ -1,20 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -[eager_limit_too_small] -The "eager limit" MCA parameter in the %s BTL was set to a value which -is too low for Open MPI to function properly. Please re-run your job -with a higher eager limit value for this BTL; the exact MCA parameter -name and its corresponding minimum value is shown below. - - Local host: %s - BTL name: %s - BTL eager limit value: %d (set via btl_%s_eager_limit) - BTL eager limit minimum: %d - MCA parameter name: btl_%s_eager_limit diff --git a/ompi/mca/pml/csum/pml_csum.c b/ompi/mca/pml/csum/pml_csum.c deleted file mode 100644 index 718cf43598..0000000000 --- a/ompi/mca/pml/csum/pml_csum.c +++ /dev/null @@ -1,903 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2006-2008 University of Houston. All rights reserved. - * Copyright (c) 2009 IBM Corporation. All rights reserved. - * Copyright (c) 2009-2012 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved - * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include -#include - -#include "opal/class/opal_bitmap.h" -#include "opal/util/crc.h" -#include "opal/util/output.h" -#include "opal/util/show_help.h" - -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/base.h" -#include "ompi/mca/btl/btl.h" -#include "ompi/mca/pml/base/base.h" -#include "ompi/mca/btl/base/base.h" -#include "ompi/mca/bml/base/base.h" -#include "ompi/runtime/ompi_cr.h" - -#include "pml_csum.h" -#include "pml_csum_component.h" -#include "pml_csum_comm.h" -#include "pml_csum_hdr.h" -#include "pml_csum_recvfrag.h" -#include "pml_csum_sendreq.h" -#include "pml_csum_recvreq.h" -#include "pml_csum_rdmafrag.h" - -mca_pml_csum_t mca_pml_csum = { - { - mca_pml_csum_add_procs, - mca_pml_csum_del_procs, - mca_pml_csum_enable, - mca_pml_csum_progress, - mca_pml_csum_add_comm, - mca_pml_csum_del_comm, - mca_pml_csum_irecv_init, - mca_pml_csum_irecv, - mca_pml_csum_recv, - mca_pml_csum_isend_init, - mca_pml_csum_isend, - mca_pml_csum_send, - mca_pml_csum_iprobe, - mca_pml_csum_probe, - mca_pml_csum_start, - mca_pml_csum_improbe, - mca_pml_csum_mprobe, - mca_pml_csum_imrecv, - mca_pml_csum_mrecv, - mca_pml_csum_dump, - mca_pml_csum_ft_event, - 65535, - INT_MAX - } -}; - - -void mca_pml_csum_error_handler( struct mca_btl_base_module_t* btl, - int32_t flags, ompi_proc_t* errproc, - char* btlinfo ); - -int mca_pml_csum_enable(bool enable) -{ - if( false == enable ) { - return OMPI_SUCCESS; - } - - OBJ_CONSTRUCT(&mca_pml_csum.lock, opal_mutex_t); - - /* fragments */ - OBJ_CONSTRUCT(&mca_pml_csum.rdma_frags, ompi_free_list_t); - ompi_free_list_init_new( &mca_pml_csum.rdma_frags, - sizeof(mca_pml_csum_rdma_frag_t), - opal_cache_line_size, - OBJ_CLASS(mca_pml_csum_rdma_frag_t), - 0,opal_cache_line_size, - mca_pml_csum.free_list_num, - mca_pml_csum.free_list_max, - mca_pml_csum.free_list_inc, - NULL ); - - OBJ_CONSTRUCT(&mca_pml_csum.recv_frags, ompi_free_list_t); - - ompi_free_list_init_new( &mca_pml_csum.recv_frags, - sizeof(mca_pml_csum_recv_frag_t) + mca_pml_csum.unexpected_limit, - opal_cache_line_size, - OBJ_CLASS(mca_pml_csum_recv_frag_t), - 0,opal_cache_line_size, - mca_pml_csum.free_list_num, - mca_pml_csum.free_list_max, - mca_pml_csum.free_list_inc, - NULL ); - - OBJ_CONSTRUCT(&mca_pml_csum.pending_pckts, ompi_free_list_t); - ompi_free_list_init_new( &mca_pml_csum.pending_pckts, - sizeof(mca_pml_csum_pckt_pending_t), - opal_cache_line_size, - OBJ_CLASS(mca_pml_csum_pckt_pending_t), - 0,opal_cache_line_size, - mca_pml_csum.free_list_num, - mca_pml_csum.free_list_max, - mca_pml_csum.free_list_inc, - NULL ); - - - OBJ_CONSTRUCT(&mca_pml_csum.buffers, ompi_free_list_t); - OBJ_CONSTRUCT(&mca_pml_csum.send_ranges, ompi_free_list_t); - ompi_free_list_init_new( &mca_pml_csum.send_ranges, - sizeof(mca_pml_csum_send_range_t) + - (mca_pml_csum.max_send_per_range - 1) * sizeof(mca_pml_csum_com_btl_t), - opal_cache_line_size, - OBJ_CLASS(mca_pml_csum_send_range_t), - 0,opal_cache_line_size, - mca_pml_csum.free_list_num, - mca_pml_csum.free_list_max, - mca_pml_csum.free_list_inc, - NULL ); - - /* pending operations */ - OBJ_CONSTRUCT(&mca_pml_csum.send_pending, opal_list_t); - OBJ_CONSTRUCT(&mca_pml_csum.recv_pending, opal_list_t); - OBJ_CONSTRUCT(&mca_pml_csum.pckt_pending, opal_list_t); - OBJ_CONSTRUCT(&mca_pml_csum.rdma_pending, opal_list_t); - /* missing communicator pending list */ - OBJ_CONSTRUCT(&mca_pml_csum.non_existing_communicator_pending, opal_list_t); - - /** - * If we get here this is the PML who get selected for the run. We - * should get ownership for the send and receive requests list, and - * initialize them with the size of our own requests. - */ - ompi_free_list_init_new( &mca_pml_base_send_requests, - sizeof(mca_pml_csum_send_request_t) + - (mca_pml_csum.max_rdma_per_request - 1) * - sizeof(mca_pml_csum_com_btl_t), - opal_cache_line_size, - OBJ_CLASS(mca_pml_csum_send_request_t), - 0,opal_cache_line_size, - mca_pml_csum.free_list_num, - mca_pml_csum.free_list_max, - mca_pml_csum.free_list_inc, - NULL ); - - ompi_free_list_init_new( &mca_pml_base_recv_requests, - sizeof(mca_pml_csum_recv_request_t) + - (mca_pml_csum.max_rdma_per_request - 1) * - sizeof(mca_pml_csum_com_btl_t), - opal_cache_line_size, - OBJ_CLASS(mca_pml_csum_recv_request_t), - 0,opal_cache_line_size, - mca_pml_csum.free_list_num, - mca_pml_csum.free_list_max, - mca_pml_csum.free_list_inc, - NULL ); - - mca_pml_csum.enabled = true; - return OMPI_SUCCESS; -} - -int mca_pml_csum_add_comm(ompi_communicator_t* comm) -{ - /* allocate pml specific comm data */ - mca_pml_csum_comm_t* pml_comm = OBJ_NEW(mca_pml_csum_comm_t); - opal_list_item_t *item, *next_item; - mca_pml_csum_recv_frag_t* frag; - mca_pml_csum_comm_proc_t* pml_proc; - mca_pml_csum_match_hdr_t* hdr; - int i; - - if (NULL == pml_comm) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* should never happen, but it was, so check */ - if (comm->c_contextid > mca_pml_csum.super.pml_max_contextid) { - OBJ_RELEASE(pml_comm); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - mca_pml_csum_comm_init_size(pml_comm, comm->c_remote_group->grp_proc_count); - comm->c_pml_comm = pml_comm; - - for( i = 0; i < comm->c_remote_group->grp_proc_count; i++ ) { - pml_comm->procs[i].ompi_proc = ompi_group_peer_lookup(comm->c_remote_group,i); - OBJ_RETAIN(pml_comm->procs[i].ompi_proc); - } - /* Grab all related messages from the non_existing_communicator pending queue */ - for( item = opal_list_get_first(&mca_pml_csum.non_existing_communicator_pending); - item != opal_list_get_end(&mca_pml_csum.non_existing_communicator_pending); - item = next_item ) { - frag = (mca_pml_csum_recv_frag_t*)item; - next_item = opal_list_get_next(item); - hdr = &frag->hdr.hdr_match; - - /* Is this fragment for the current communicator ? */ - if( frag->hdr.hdr_match.hdr_ctx != comm->c_contextid ) - continue; - - /* As we now know we work on a fragment for this communicator - * we should remove it from the - * non_existing_communicator_pending list. */ - opal_list_remove_item( &mca_pml_csum.non_existing_communicator_pending, - item ); - - add_fragment_to_unexpected: - - /* We generate the MSG_ARRIVED event as soon as the PML is aware - * of a matching fragment arrival. Independing if it is received - * on the correct order or not. This will allow the tools to - * figure out if the messages are not received in the correct - * order (if multiple network interfaces). - */ - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - - /* There is no matching to be done, and no lock to be held on the communicator as - * we know at this point that the communicator has not yet been returned to the user. - * The only required protection is around the non_existing_communicator_pending queue. - * We just have to push the fragment into the unexpected list of the corresponding - * proc, or into the out-of-order (cant_match) list. - */ - pml_proc = &(pml_comm->procs[hdr->hdr_src]); - - if( ((uint16_t)hdr->hdr_seq) == ((uint16_t)pml_proc->expected_sequence) ) { - /* We're now expecting the next sequence number. */ - pml_proc->expected_sequence++; - opal_list_append( &pml_proc->unexpected_frags, (opal_list_item_t*)frag ); - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - /* And now the ugly part. As some fragments can be inserted in the cant_match list, - * every time we succesfully add a fragment in the unexpected list we have to make - * sure the next one is not in the cant_match. Otherwise, we will endup in a deadlock - * situation as the cant_match is only checked when a new fragment is received from - * the network. - */ - for(frag = (mca_pml_csum_recv_frag_t *)opal_list_get_first(&pml_proc->frags_cant_match); - frag != (mca_pml_csum_recv_frag_t *)opal_list_get_end(&pml_proc->frags_cant_match); - frag = (mca_pml_csum_recv_frag_t *)opal_list_get_next(frag)) { - hdr = &frag->hdr.hdr_match; - /* If the message has the next expected seq from that proc... */ - if(hdr->hdr_seq != pml_proc->expected_sequence) - continue; - - opal_list_remove_item(&pml_proc->frags_cant_match, (opal_list_item_t*)frag); - goto add_fragment_to_unexpected; - } - } else { - opal_list_append( &pml_proc->frags_cant_match, (opal_list_item_t*)frag ); - } - } - return OMPI_SUCCESS; -} - -int mca_pml_csum_del_comm(ompi_communicator_t* comm) -{ - mca_pml_csum_comm_t* pml_comm = comm->c_pml_comm; - int i; - - for( i = 0; i < comm->c_remote_group->grp_proc_count; i++ ) { - OBJ_RELEASE(pml_comm->procs[i].ompi_proc); - } - OBJ_RELEASE(comm->c_pml_comm); - comm->c_pml_comm = NULL; - return OMPI_SUCCESS; -} - - -/* - * For each proc setup a datastructure that indicates the BTLs - * that can be used to reach the destination. - * - */ - -int mca_pml_csum_add_procs(ompi_proc_t** procs, size_t nprocs) -{ - opal_bitmap_t reachable; - int rc; - size_t i; - opal_list_item_t *item; - opal_convertor_t *local_convertor; - - if(nprocs == 0) - return OMPI_SUCCESS; - /* Create a convertor for processes on the same node & - disable checksum computation for local communication */ - local_convertor = opal_convertor_create(ompi_proc_local()->proc_arch, 0); - local_convertor->flags &= ~CONVERTOR_WITH_CHECKSUM; - - for (i = 0 ; i < nprocs ; ++i) { - /* we don't have any endpoint data we need to cache on the - ompi_proc_t, so set proc_pml to NULL */ - procs[i]->proc_pml = NULL; - /* if the proc isn't local, tell the convertor to - * checksum the data - */ - if (!OPAL_PROC_ON_LOCAL_NODE(procs[i]->proc_flags)) { - procs[i]->proc_convertor->flags |= CONVERTOR_WITH_CHECKSUM; - } else { - OBJ_RELEASE(procs[i]->proc_convertor); - procs[i]->proc_convertor = local_convertor; - OBJ_RETAIN(local_convertor); - } - } - /* Decrement reference count by one, as we increment it twice for ourselves */ - OBJ_RELEASE(local_convertor); - - OBJ_CONSTRUCT(&reachable, opal_bitmap_t); - rc = opal_bitmap_init(&reachable, (int)nprocs); - if(OMPI_SUCCESS != rc) - return rc; - - /* - * JJH: Disable this in FT enabled builds since - * we use a wrapper PML. It will cause this check to - * return failure as all processes will return the wrapper PML - * component in use instead of the wrapped PML component underneath. - */ -#if OPAL_ENABLE_FT_CR == 0 - /* make sure remote procs are using the same PML as us */ - if (OMPI_SUCCESS != (rc = mca_pml_base_pml_check_selected("csum", - procs, - nprocs))) { - return rc; - } -#endif - - rc = mca_bml.bml_add_procs( nprocs, - procs, - &reachable ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - /* Check that values supplied by all initialized btls will work - for us. Note that this is the list of all initialized BTLs, - not the ones used for the just added procs. This is a little - overkill and inaccurate, as we may end up not using the BTL in - question and all add_procs calls after the first one are - duplicating an already completed check. But the final - initialization of the PML occurs before the final - initialization of the BTLs, and iterating through the in-use - BTLs requires iterating over the procs, as the BML does not - expose all currently in use btls. */ - - for (item = opal_list_get_first(&mca_btl_base_modules_initialized) ; - item != opal_list_get_end(&mca_btl_base_modules_initialized) ; - item = opal_list_get_next(item)) { - mca_btl_base_selected_module_t *sm = - (mca_btl_base_selected_module_t*) item; - if (sm->btl_module->btl_eager_limit < sizeof(mca_pml_csum_hdr_t)) { - opal_show_help("help-mpi-pml-csum.txt", "eager_limit_too_small", - true, - sm->btl_component->btl_version.mca_component_name, - ompi_process_info.nodename, - sm->btl_component->btl_version.mca_component_name, - sm->btl_module->btl_eager_limit, - sm->btl_component->btl_version.mca_component_name, - sizeof(mca_pml_csum_hdr_t), - sm->btl_component->btl_version.mca_component_name); - rc = OMPI_ERR_BAD_PARAM; - goto cleanup_and_return; - } - } - - - /* TODO: Move these callback registration to another place */ - rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_MATCH, - mca_pml_csum_recv_frag_callback_match, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_RNDV, - mca_pml_csum_recv_frag_callback_rndv, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_RGET, - mca_pml_csum_recv_frag_callback_rget, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_ACK, - mca_pml_csum_recv_frag_callback_ack, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_FRAG, - mca_pml_csum_recv_frag_callback_frag, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_PUT, - mca_pml_csum_recv_frag_callback_put, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_FIN, - mca_pml_csum_recv_frag_callback_fin, - NULL ); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - /* register error handlers */ - rc = mca_bml.bml_register_error(mca_pml_csum_error_handler); - if(OMPI_SUCCESS != rc) - goto cleanup_and_return; - - cleanup_and_return: - OBJ_DESTRUCT(&reachable); - - return rc; -} - -/* - * iterate through each proc and notify any PTLs associated - * with the proc that it is/has gone away - */ - -int mca_pml_csum_del_procs(ompi_proc_t** procs, size_t nprocs) -{ - return mca_bml.bml_del_procs(nprocs, procs); -} - -/* - * diagnostics - */ - -int mca_pml_csum_dump(struct ompi_communicator_t* comm, int verbose) -{ - struct mca_pml_comm_t* pml_comm = comm->c_pml_comm; - int i; - - /* iterate through all procs on communicator */ - for( i = 0; i < (int)pml_comm->num_procs; i++ ) { - mca_pml_csum_comm_proc_t* proc = &pml_comm->procs[i]; - mca_bml_base_endpoint_t* ep = (mca_bml_base_endpoint_t*)proc->ompi_proc->proc_bml; - size_t n; - - opal_output(0, "[Rank %d]\n", i); - /* dump all receive queues */ - - /* dump all btls */ - for(n=0; nbtl_eager.arr_size; n++) { - mca_bml_base_btl_t* bml_btl = &ep->btl_eager.bml_btls[n]; - bml_btl->btl->btl_dump(bml_btl->btl, bml_btl->btl_endpoint, verbose); - } - } - return OMPI_SUCCESS; -} - -static void mca_pml_csum_fin_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; - - /* check for pending requests */ - MCA_PML_CSUM_PROGRESS_PENDING(bml_btl); -} - -/** - * Send an FIN to the peer. If we fail to send this ack (no more available - * fragments or the send failed) this function automatically add the FIN - * to the list of pending FIN, Which guarantee that the FIN will be sent - * later. - */ -int mca_pml_csum_send_fin( ompi_proc_t* proc, - mca_bml_base_btl_t* bml_btl, - ompi_ptr_t hdr_des, - uint8_t order, - uint32_t status ) -{ - mca_btl_base_descriptor_t* fin; - mca_pml_csum_fin_hdr_t* hdr; - int rc; - - mca_bml_base_alloc(bml_btl, &fin, order, sizeof(mca_pml_csum_fin_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - - if(NULL == fin) { - MCA_PML_CSUM_ADD_FIN_TO_PENDING(proc, hdr_des, bml_btl, order, status); - return OMPI_ERR_OUT_OF_RESOURCE; - } - fin->des_cbfunc = mca_pml_csum_fin_completion; - fin->des_cbdata = NULL; - - /* fill in header */ - hdr = (mca_pml_csum_fin_hdr_t*)fin->des_src->seg_addr.pval; - hdr->hdr_common.hdr_flags = 0; - hdr->hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_FIN; - hdr->hdr_common.hdr_csum = 0; - hdr->hdr_des = hdr_des; - hdr->hdr_fail = status; - - hdr->hdr_common.hdr_csum = opal_csum16(hdr, sizeof(mca_pml_csum_fin_hdr_t)); - - OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, - "%s: Sending \'FIN\' with header csum:0x%04x\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), hdr->hdr_common.hdr_csum)); - - csum_hdr_hton(hdr, MCA_PML_CSUM_HDR_TYPE_FIN, proc); - - /* queue request */ - rc = mca_bml_base_send( bml_btl, - fin, - MCA_PML_CSUM_HDR_TYPE_FIN ); - if( OPAL_LIKELY( rc >= 0 ) ) { - if( OPAL_LIKELY( 1 == rc ) ) { - MCA_PML_CSUM_PROGRESS_PENDING(bml_btl); - } - return OMPI_SUCCESS; - } - mca_bml_base_free(bml_btl, fin); - MCA_PML_CSUM_ADD_FIN_TO_PENDING(proc, hdr_des, bml_btl, order, status); - return OMPI_ERR_OUT_OF_RESOURCE; -} - -void mca_pml_csum_process_pending_packets(mca_bml_base_btl_t* bml_btl) -{ - mca_pml_csum_pckt_pending_t *pckt; - int32_t i, rc, s = (int32_t)opal_list_get_size(&mca_pml_csum.pckt_pending); - - for(i = 0; i < s; i++) { - mca_bml_base_btl_t *send_dst = NULL; - OPAL_THREAD_LOCK(&mca_pml_csum.lock); - pckt = (mca_pml_csum_pckt_pending_t*) - opal_list_remove_first(&mca_pml_csum.pckt_pending); - OPAL_THREAD_UNLOCK(&mca_pml_csum.lock); - if(NULL == pckt) - break; - if(pckt->bml_btl != NULL && - pckt->bml_btl->btl == bml_btl->btl) { - send_dst = pckt->bml_btl; - } else { - send_dst = mca_bml_base_btl_array_find( - &pckt->proc->proc_bml->btl_eager, bml_btl->btl); - } - if(NULL == send_dst) { - OPAL_THREAD_LOCK(&mca_pml_csum.lock); - opal_list_append(&mca_pml_csum.pckt_pending, - (opal_list_item_t*)pckt); - OPAL_THREAD_UNLOCK(&mca_pml_csum.lock); - continue; - } - - switch(pckt->hdr.hdr_common.hdr_type) { - case MCA_PML_CSUM_HDR_TYPE_ACK: - rc = mca_pml_csum_recv_request_ack_send_btl(pckt->proc, - send_dst, - pckt->hdr.hdr_ack.hdr_src_req.lval, - pckt->hdr.hdr_ack.hdr_dst_req.pval, - pckt->hdr.hdr_ack.hdr_send_offset, - pckt->hdr.hdr_common.hdr_flags & MCA_PML_CSUM_HDR_FLAGS_NORDMA); - if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) { - OPAL_THREAD_LOCK(&mca_pml_csum.lock); - opal_list_append(&mca_pml_csum.pckt_pending, - (opal_list_item_t*)pckt); - OPAL_THREAD_UNLOCK(&mca_pml_csum.lock); - return; - } - break; - case MCA_PML_CSUM_HDR_TYPE_FIN: - rc = mca_pml_csum_send_fin(pckt->proc, send_dst, - pckt->hdr.hdr_fin.hdr_des, - pckt->order, - pckt->hdr.hdr_fin.hdr_fail); - if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) { - return; - } - break; - default: - opal_output(0, "[%s:%d] wrong header type\n", - __FILE__, __LINE__); - break; - } - /* We're done with this packet, return it back to the free list */ - MCA_PML_CSUM_PCKT_PENDING_RETURN(pckt); - } -} - -void mca_pml_csum_process_pending_rdma(void) -{ - mca_pml_csum_rdma_frag_t* frag; - int32_t i, rc, s = (int32_t)opal_list_get_size(&mca_pml_csum.rdma_pending); - - for(i = 0; i < s; i++) { - OPAL_THREAD_LOCK(&mca_pml_csum.lock); - frag = (mca_pml_csum_rdma_frag_t*) - opal_list_remove_first(&mca_pml_csum.rdma_pending); - OPAL_THREAD_UNLOCK(&mca_pml_csum.lock); - if(NULL == frag) - break; - if(frag->rdma_state == MCA_PML_CSUM_RDMA_PUT) { - frag->retries++; - rc = mca_pml_csum_send_request_put_frag(frag); - } else { - rc = mca_pml_csum_recv_request_get_frag(frag); - } - if(OMPI_ERR_OUT_OF_RESOURCE == rc) - break; - } -} - - -void mca_pml_csum_error_handler( - struct mca_btl_base_module_t* btl, int32_t flags, - ompi_proc_t* errproc, char* btlinfo ) { - ompi_rte_abort(-1, NULL); -} - -#if OPAL_ENABLE_FT_CR == 0 -int mca_pml_csum_ft_event( int state ) { - return OMPI_SUCCESS; -} -#else -int mca_pml_csum_ft_event( int state ) -{ - static bool first_continue_pass = false; - ompi_proc_t** procs = NULL; - size_t num_procs; - int ret, p; - ompi_rte_collective_t *coll, *modex; - - coll = OBJ_NEW(ompi_rte_collective_t); - coll->id = ompi_process_info.peer_init_barrier; - if(OPAL_CRS_CHECKPOINT == state) { - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCPBR1); - ompi_rte_barrier(coll); - ORTE_WAIT_FOR_COMPLETION(coll->active); - } - - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P0); - } - else if(OPAL_CRS_CONTINUE == state) { - first_continue_pass = !first_continue_pass; - - if( !first_continue_pass ) { - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_COREBR0); - ompi_rte_barrier(coll); - ORTE_WAIT_FOR_COMPLETION(coll->active); - } - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P2); - } - - if( orte_cr_continue_like_restart && !first_continue_pass ) { - /* - * Get a list of processes - */ - procs = ompi_proc_all(&num_procs); - if(NULL == procs) { - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto clean; - } - - /* - * Refresh the proc structure, and publish our proc info in the modex. - * NOTE: Do *not* call ompi_proc_finalize as there are many places in - * the code that point to indv. procs in this strucutre. For our - * needs here we only need to fix up the modex, bml and pml - * references. - */ - if (OMPI_SUCCESS != (ret = ompi_proc_refresh())) { - opal_output(0, - "pml:csum: ft_event(Restart): proc_refresh Failed %d", - ret); - for(p = 0; p < (int)num_procs; ++p) { - OBJ_RELEASE(procs[p]); - } - free (procs); - goto clean; - } - } - } - else if(OPAL_CRS_RESTART_PRE == state ) { - /* Nothing here */ - } - else if(OPAL_CRS_RESTART == state ) { - /* - * Get a list of processes - */ - procs = ompi_proc_all(&num_procs); - if(NULL == procs) { - ret = OMPI_ERR_OUT_OF_RESOURCE; - goto clean; - } - - /* - * Clean out the modex information since it is invalid now. - * ompi_rte_purge_proc_attrs(); - * This happens at the ORTE level, so doing it again here will cause - * some issues with socket caching. - */ - - - /* - * Refresh the proc structure, and publish our proc info in the modex. - * NOTE: Do *not* call ompi_proc_finalize as there are many places in - * the code that point to indv. procs in this strucutre. For our - * needs here we only need to fix up the modex, bml and pml - * references. - */ - if (OMPI_SUCCESS != (ret = ompi_proc_refresh())) { - opal_output(0, - "pml:csum: ft_event(Restart): proc_refresh Failed %d", - ret); - for(p = 0; p < (int)num_procs; ++p) { - OBJ_RELEASE(procs[p]); - } - free (procs); - goto clean; - } - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - /* Call the BML - * BML is expected to call ft_event in - * - BTL(s) - * - MPool(s) - */ - if( OMPI_SUCCESS != (ret = mca_bml.bml_ft_event(state))) { - opal_output(0, "pml:base: ft_event: BML ft_event function failed: %d\n", - ret); - } - - if(OPAL_CRS_CHECKPOINT == state) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P1); - - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR0); - /* JJH Cannot barrier here due to progress engine -- ompi_rte_barrier();*/ - } - } - else if(OPAL_CRS_CONTINUE == state) { - if( !first_continue_pass ) { - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR1); - ompi_rte_barrier(coll); - ORTE_WAIT_FOR_COMPLETION(coll->active); - } - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P3); - } - - if( orte_cr_continue_like_restart && !first_continue_pass ) { - /* - * Exchange the modex information once again. - * BTLs will have republished their modex information. - */ - modex = OBJ_NEW(ompi_rte_collective_t); - modex->id = ompi_process_info.peer_modex; - if (OMPI_SUCCESS != (ret = orte_grpcomm.modex(modex))) { - opal_output(0, - "pml:csum: ft_event(Restart): Failed orte_grpcomm.modex() = %d", - ret); - OBJ_RELEASE(modex); - goto clean; - } - ORTE_WAIT_FOR_COMPLETION(modex->active); - OBJ_RELEASE(modex); - - /* - * Startup the PML stack now that the modex is running again - * Add the new procs (BTLs redo modex recv's) - */ - if( OMPI_SUCCESS != (ret = mca_pml_csum_add_procs(procs, num_procs) ) ) { - opal_output(0, "pml:csum: ft_event(Restart): Failed in add_procs (%d)", ret); - goto clean; - } - - /* Is this barrier necessary ? JJH */ - if (OMPI_SUCCESS != (ret = ompi_rte_barrier(coll))) { - opal_output(0, "pml:csum: ft_event(Restart): Failed in ompi_rte_barrier (%d)", ret); - goto clean; - } - ORTE_WAIT_FOR_COMPLETION(coll->active); - - if( NULL != procs ) { - for(p = 0; p < (int)num_procs; ++p) { - OBJ_RELEASE(procs[p]); - } - free(procs); - procs = NULL; - } - } - if( !first_continue_pass ) { - if( opal_cr_timing_barrier_enabled ) { - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR2); - ompi_rte_barrier(coll); - ORTE_WAIT_FOR_COMPLETION(coll->active); - } - OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCP1); - } - } - else if(OPAL_CRS_RESTART_PRE == state ) { - /* Nothing here */ - } - else if(OPAL_CRS_RESTART == state ) { - /* - * Exchange the modex information once again. - * BTLs will have republished their modex information. - */ - modex = OBJ_NEW(ompi_rte_collective_t); - modex->id = ompi_process_info.peer_modex; - if (OMPI_SUCCESS != (ret = orte_grpcomm.modex(NULL))) { - opal_output(0, - "pml:csum: ft_event(Restart): Failed orte_grpcomm.modex() = %d", - ret); - OBJ_RELEASE(modex); - goto clean; - } - ORTE_WAIT_FOR_COMPLETION(modex->active); - OBJ_RELEASE(modex); - - /* - * Startup the PML stack now that the modex is running again - * Add the new procs (BTLs redo modex recv's) - */ - if( OMPI_SUCCESS != (ret = mca_pml_csum_add_procs(procs, num_procs) ) ) { - opal_output(0, "pml:csum: ft_event(Restart): Failed in add_procs (%d)", ret); - goto clean; - } - - /* Is this barrier necessary ? JJH */ - if (OMPI_SUCCESS != (ret = ompi_rte_barrier(coll))) { - opal_output(0, "pml:csum: ft_event(Restart): Failed in ompi_rte_barrier (%d)", ret); - goto clean; - } - ORTE_WAIT_FOR_COMPLETION(coll->active); - - if( NULL != procs ) { - for(p = 0; p < (int)num_procs; ++p) { - OBJ_RELEASE(procs[p]); - } - free(procs); - procs = NULL; - } - } - else if(OPAL_CRS_TERM == state ) { - ; - } - else { - ; - } - - ret = OMPI_SUCCESS; - -clean: - OBJ_RELEASE(coll); - return ret; -} -#endif /* OPAL_ENABLE_FT_CR */ - -int mca_pml_csum_com_btl_comp(const void *v1, const void *v2) -{ - const mca_pml_csum_com_btl_t *b1 = (const mca_pml_csum_com_btl_t *) v1; - const mca_pml_csum_com_btl_t *b2 = (const mca_pml_csum_com_btl_t *) v2; - - if(b1->bml_btl->btl_weight < b2->bml_btl->btl_weight) - return 1; - if(b1->bml_btl->btl_weight > b2->bml_btl->btl_weight) - return -1; - - return 0; -} - diff --git a/ompi/mca/pml/csum/pml_csum.h b/ompi/mca/pml/csum/pml_csum.h deleted file mode 100644 index 885569447a..0000000000 --- a/ompi/mca/pml/csum/pml_csum.h +++ /dev/null @@ -1,361 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 IBM Corporation. All rights reserved. - * Copyright (c) 2009-2012 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_CSUM_H -#define MCA_PML_CSUM_H - -#include "ompi_config.h" -#include "ompi/class/ompi_free_list.h" -#include "ompi/request/request.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/pml_base_request.h" -#include "ompi/mca/pml/base/pml_base_bsend.h" -#include "ompi/mca/pml/base/pml_base_sendreq.h" -#include "ompi/datatype/ompi_datatype.h" -#include "pml_csum_hdr.h" -#include "ompi/mca/bml/base/base.h" -#include "ompi/proc/proc.h" -#include "ompi/mca/allocator/base/base.h" - -BEGIN_C_DECLS - -/** - * CSUM PML module - */ - -struct mca_pml_csum_t { - mca_pml_base_module_t super; - - int priority; - int free_list_num; /* initial size of free list */ - int free_list_max; /* maximum size of free list */ - int free_list_inc; /* number of elements to grow free list */ - size_t send_pipeline_depth; - size_t recv_pipeline_depth; - size_t rdma_put_retries_limit; - int max_rdma_per_request; - int max_send_per_range; - bool leave_pinned; - int leave_pinned_pipeline; - - /* lock queue access */ - opal_mutex_t lock; - - /* free lists */ - ompi_free_list_t rdma_frags; - ompi_free_list_t recv_frags; - ompi_free_list_t pending_pckts; - ompi_free_list_t buffers; - ompi_free_list_t send_ranges; - - /* list of pending operations */ - opal_list_t pckt_pending; - opal_list_t send_pending; - opal_list_t recv_pending; - opal_list_t rdma_pending; - /* List of pending fragments without a matching communicator */ - opal_list_t non_existing_communicator_pending; - bool enabled; - char* allocator_name; - mca_allocator_base_module_t* allocator; - uint32_t unexpected_limit; -}; -typedef struct mca_pml_csum_t mca_pml_csum_t; - -extern mca_pml_csum_t mca_pml_csum; -extern int mca_pml_csum_output; - -/* - * PML interface functions. - */ - -extern int mca_pml_csum_add_comm( - struct ompi_communicator_t* comm -); - -extern int mca_pml_csum_del_comm( - struct ompi_communicator_t* comm -); - -extern int mca_pml_csum_add_procs( - struct ompi_proc_t **procs, - size_t nprocs -); - -extern int mca_pml_csum_del_procs( - struct ompi_proc_t **procs, - size_t nprocs -); - -extern int mca_pml_csum_enable( bool enable ); - -extern int mca_pml_csum_progress(void); - -extern int mca_pml_csum_iprobe( int dst, - int tag, - struct ompi_communicator_t* comm, - int *matched, - ompi_status_public_t* status ); - -extern int mca_pml_csum_probe( int dst, - int tag, - struct ompi_communicator_t* comm, - ompi_status_public_t* status ); - -extern int mca_pml_csum_improbe( int dst, - int tag, - struct ompi_communicator_t* comm, - int *matched, - struct ompi_message_t **message, - ompi_status_public_t* status ); - -extern int mca_pml_csum_mprobe( int dst, - int tag, - struct ompi_communicator_t* comm, - struct ompi_message_t **message, - ompi_status_public_t* status ); - -extern int mca_pml_csum_isend_init( void *buf, - size_t count, - ompi_datatype_t *datatype, - int dst, - int tag, - mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm, - struct ompi_request_t **request ); - -extern int mca_pml_csum_isend( void *buf, - size_t count, - ompi_datatype_t *datatype, - int dst, - int tag, - mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm, - struct ompi_request_t **request ); - -extern int mca_pml_csum_send( void *buf, - size_t count, - ompi_datatype_t *datatype, - int dst, - int tag, - mca_pml_base_send_mode_t mode, - struct ompi_communicator_t* comm ); - -extern int mca_pml_csum_irecv_init( void *buf, - size_t count, - ompi_datatype_t *datatype, - int src, - int tag, - struct ompi_communicator_t* comm, - struct ompi_request_t **request ); - -extern int mca_pml_csum_irecv( void *buf, - size_t count, - ompi_datatype_t *datatype, - int src, - int tag, - struct ompi_communicator_t* comm, - struct ompi_request_t **request ); - -extern int mca_pml_csum_recv( void *buf, - size_t count, - ompi_datatype_t *datatype, - int src, - int tag, - struct ompi_communicator_t* comm, - ompi_status_public_t* status ); - -extern int mca_pml_csum_imrecv( void *buf, - size_t count, - ompi_datatype_t *datatype, - struct ompi_message_t **message, - struct ompi_request_t **request ); - -extern int mca_pml_csum_mrecv( void *buf, - size_t count, - ompi_datatype_t *datatype, - struct ompi_message_t **message, - ompi_status_public_t* status ); - -extern int mca_pml_csum_dump( struct ompi_communicator_t* comm, - int verbose ); - -extern int mca_pml_csum_start( size_t count, - ompi_request_t** requests ); - -extern int mca_pml_csum_ft_event( int state ); - -END_C_DECLS - -struct mca_pml_csum_pckt_pending_t { - ompi_free_list_item_t super; - ompi_proc_t* proc; - mca_pml_csum_hdr_t hdr; - struct mca_bml_base_btl_t *bml_btl; - uint8_t order; -}; -typedef struct mca_pml_csum_pckt_pending_t mca_pml_csum_pckt_pending_t; -OBJ_CLASS_DECLARATION(mca_pml_csum_pckt_pending_t); - -#define MCA_PML_CSUM_PCKT_PENDING_ALLOC(pckt,rc) \ -do { \ - ompi_free_list_item_t* item; \ - OMPI_FREE_LIST_WAIT(&mca_pml_csum.pending_pckts, item, rc); \ - pckt = (mca_pml_csum_pckt_pending_t*)item; \ -} while (0) - -#define MCA_PML_CSUM_PCKT_PENDING_RETURN(pckt) \ -do { \ - /* return packet */ \ - OMPI_FREE_LIST_RETURN(&mca_pml_csum.pending_pckts, \ - (ompi_free_list_item_t*)pckt); \ -} while(0) - -#define MCA_PML_CSUM_ADD_FIN_TO_PENDING(P, D, B, O, S) \ - do { \ - mca_pml_csum_pckt_pending_t *_pckt; \ - int _rc; \ - \ - MCA_PML_CSUM_PCKT_PENDING_ALLOC(_pckt,_rc); \ - _pckt->hdr.hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_FIN; \ - _pckt->hdr.hdr_fin.hdr_des = (D); \ - _pckt->hdr.hdr_fin.hdr_fail = (S); \ - _pckt->proc = (P); \ - _pckt->bml_btl = (B); \ - _pckt->order = (O); \ - OPAL_THREAD_LOCK(&mca_pml_csum.lock); \ - opal_list_append(&mca_pml_csum.pckt_pending, \ - (opal_list_item_t*)_pckt); \ - OPAL_THREAD_UNLOCK(&mca_pml_csum.lock); \ - } while(0) - - -int mca_pml_csum_send_fin(ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl, - ompi_ptr_t hdr_des, uint8_t order, uint32_t status); - -/* This function tries to resend FIN/ACK packets from pckt_pending queue. - * Packets are added to the queue when sending of FIN or ACK is failed due to - * resource unavailability. bml_btl passed to the function doesn't represents - * packet's destination, it represents BTL on which resource was freed, so only - * this BTL should be considered for resending packets */ -void mca_pml_csum_process_pending_packets(mca_bml_base_btl_t* bml_btl); - -/* This function retries failed PUT/GET operations on frag. When RDMA operation - * cannot be accomplished for some reason, frag is put on the rdma_pending list. - * Later the operation is retried. The destination of RDMA operation is stored - * inside the frag structure */ -void mca_pml_csum_process_pending_rdma(void); - -#define MCA_PML_CSUM_PROGRESS_PENDING(bml_btl) \ - do { \ - if(opal_list_get_size(&mca_pml_csum.pckt_pending)) \ - mca_pml_csum_process_pending_packets(bml_btl); \ - if(opal_list_get_size(&mca_pml_csum.recv_pending)) \ - mca_pml_csum_recv_request_process_pending(); \ - if(opal_list_get_size(&mca_pml_csum.send_pending)) \ - mca_pml_csum_send_request_process_pending(bml_btl); \ - if(opal_list_get_size(&mca_pml_csum.rdma_pending)) \ - mca_pml_csum_process_pending_rdma(); \ - } while (0) - -/* - * Compute the total number of bytes on supplied descriptor - */ -static inline int mca_pml_csum_compute_segment_length (size_t seg_size, void *segments, size_t count, - size_t hdrlen) { - size_t i, length; - - for (i = 0, length = -hdrlen ; i < count ; ++i) { - mca_btl_base_segment_t *segment = - (mca_btl_base_segment_t *)((char *) segments + i * seg_size); - - length += segment->seg_len; - } - - return length; -} - -static inline int mca_pml_csum_compute_segment_length_base (mca_btl_base_segment_t *segments, - size_t count, size_t hdrlen) { - size_t i, length; - - for (i = 0, length = -hdrlen ; i < count ; ++i) { - length += segments[i].seg_len; - } - - return length; -} - -/* represent BTL chosen for sending request */ -struct mca_pml_csum_com_btl_t { - mca_bml_base_btl_t *bml_btl; - struct mca_mpool_base_registration_t* btl_reg; - size_t length; -}; -typedef struct mca_pml_csum_com_btl_t mca_pml_csum_com_btl_t; - -int mca_pml_csum_com_btl_comp(const void *v1, const void *v2); - -/* Calculate what percentage of a message to send through each BTL according to - * relative weight */ -static inline void -mca_pml_csum_calc_weighted_length( mca_pml_csum_com_btl_t *btls, int num_btls, size_t size, - double weight_total ) -{ - int i; - size_t length_left; - - /* shortcut for common case for only one BTL */ - if( OPAL_LIKELY(1 == num_btls) ) { - btls[0].length = size; - return; - } - - /* sort BTLs according of their weights so BTLs with smaller weight will - * not hijack all of the traffic */ - qsort( btls, num_btls, sizeof(mca_pml_csum_com_btl_t), - mca_pml_csum_com_btl_comp ); - - for(length_left = size, i = 0; i < num_btls; i++) { - mca_bml_base_btl_t* bml_btl = btls[i].bml_btl; - size_t length = 0; - if( OPAL_UNLIKELY(0 != length_left) ) { - length = (length_left > bml_btl->btl->btl_eager_limit)? - ((size_t)(size * (bml_btl->btl_weight / weight_total))) : - length_left; - - if(length > length_left) - length = length_left; - length_left -= length; - } - btls[i].length = length; - } - - /* account for rounding errors */ - btls[0].length += length_left; -} - -#endif diff --git a/ompi/mca/pml/csum/pml_csum_comm.c b/ompi/mca/pml/csum/pml_csum_comm.c deleted file mode 100644 index 9769becc2c..0000000000 --- a/ompi/mca/pml/csum/pml_csum_comm.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include - -#include "pml_csum.h" -#include "pml_csum_comm.h" - - - -static void mca_pml_csum_comm_proc_construct(mca_pml_csum_comm_proc_t* proc) -{ - proc->expected_sequence = 1; - proc->ompi_proc = NULL; - proc->send_sequence = 0; - OBJ_CONSTRUCT(&proc->frags_cant_match, opal_list_t); - OBJ_CONSTRUCT(&proc->specific_receives, opal_list_t); - OBJ_CONSTRUCT(&proc->unexpected_frags, opal_list_t); -} - - -static void mca_pml_csum_comm_proc_destruct(mca_pml_csum_comm_proc_t* proc) -{ - OBJ_DESTRUCT(&proc->frags_cant_match); - OBJ_DESTRUCT(&proc->specific_receives); - OBJ_DESTRUCT(&proc->unexpected_frags); -} - - -static OBJ_CLASS_INSTANCE( - mca_pml_csum_comm_proc_t, - opal_object_t, - mca_pml_csum_comm_proc_construct, - mca_pml_csum_comm_proc_destruct); - - -static void mca_pml_csum_comm_construct(mca_pml_csum_comm_t* comm) -{ - OBJ_CONSTRUCT(&comm->wild_receives, opal_list_t); - OBJ_CONSTRUCT(&comm->matching_lock, opal_mutex_t); - comm->recv_sequence = 0; - comm->procs = NULL; - comm->num_procs = 0; -} - - -static void mca_pml_csum_comm_destruct(mca_pml_csum_comm_t* comm) -{ - size_t i; - for(i=0; inum_procs; i++) - OBJ_DESTRUCT((&comm->procs[i])); - if(NULL != comm->procs) - free(comm->procs); - OBJ_DESTRUCT(&comm->wild_receives); - OBJ_DESTRUCT(&comm->matching_lock); -} - - -OBJ_CLASS_INSTANCE( - mca_pml_csum_comm_t, - opal_object_t, - mca_pml_csum_comm_construct, - mca_pml_csum_comm_destruct); - - -int mca_pml_csum_comm_init_size(mca_pml_csum_comm_t* comm, size_t size) -{ - size_t i; - - /* send message sequence-number support - sender side */ - comm->procs = (mca_pml_csum_comm_proc_t*)malloc(sizeof(mca_pml_csum_comm_proc_t)*size); - if(NULL == comm->procs) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - for(i=0; iprocs+i, mca_pml_csum_comm_proc_t); - } - comm->num_procs = size; - return OMPI_SUCCESS; -} - - diff --git a/ompi/mca/pml/csum/pml_csum_comm.h b/ompi/mca/pml/csum/pml_csum_comm.h deleted file mode 100644 index 71a6eb3df5..0000000000 --- a/ompi/mca/pml/csum/pml_csum_comm.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PML_OB1_COMM_H -#define MCA_PML_OB1_COMM_H - -#include "opal/threads/mutex.h" -#include "opal/class/opal_list.h" -#include "ompi/proc/proc.h" -BEGIN_C_DECLS - - -struct mca_pml_csum_comm_proc_t { - opal_object_t super; - uint16_t expected_sequence; /**< send message sequence number - receiver side */ - struct ompi_proc_t* ompi_proc; -#if OPAL_ENABLE_MULTI_THREADS - volatile int32_t send_sequence; /**< send side sequence number */ -#else - int32_t send_sequence; /**< send side sequence number */ -#endif - opal_list_t frags_cant_match; /**< out-of-order fragment queues */ - opal_list_t specific_receives; /**< queues of unmatched specific receives */ - opal_list_t unexpected_frags; /**< unexpected fragment queues */ -}; -typedef struct mca_pml_csum_comm_proc_t mca_pml_csum_comm_proc_t; - - -/** - * Cached on ompi_communicator_t to hold queues/state - * used by the PML<->PTL interface for matching logic. - */ -struct mca_pml_comm_t { - opal_object_t super; -#if OPAL_ENABLE_MULTI_THREADS - volatile uint32_t recv_sequence; /**< recv request sequence number - receiver side */ -#else - uint32_t recv_sequence; /**< recv request sequence number - receiver side */ -#endif - opal_mutex_t matching_lock; /**< matching lock */ - opal_list_t wild_receives; /**< queue of unmatched wild (source process not specified) receives */ - mca_pml_csum_comm_proc_t* procs; - size_t num_procs; -}; -typedef struct mca_pml_comm_t mca_pml_csum_comm_t; - -OBJ_CLASS_DECLARATION(mca_pml_csum_comm_t); - - -/** - * Initialize an instance of mca_pml_csum_comm_t based on the communicator size. - * - * @param comm Instance of mca_pml_csum_comm_t - * @param size Size of communicator - * @return OMPI_SUCCESS or error status on failure. - */ - -extern int mca_pml_csum_comm_init_size(mca_pml_csum_comm_t* comm, size_t size); - -END_C_DECLS -#endif - diff --git a/ompi/mca/pml/csum/pml_csum_component.c b/ompi/mca/pml/csum/pml_csum_component.c deleted file mode 100644 index d86b71992a..0000000000 --- a/ompi/mca/pml/csum/pml_csum_component.c +++ /dev/null @@ -1,254 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 IBM Corporation. All rights reserved. - * Copyright (c) 2009 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "opal/mca/event/event.h" -#include "mpi.h" -#include "ompi/runtime/params.h" -#include "ompi/mca/pml/pml.h" -#include "opal/mca/base/mca_base_param.h" -#include "ompi/mca/pml/base/pml_base_bsend.h" -#include "pml_csum.h" -#include "pml_csum_hdr.h" -#include "pml_csum_sendreq.h" -#include "pml_csum_recvreq.h" -#include "pml_csum_rdmafrag.h" -#include "pml_csum_recvfrag.h" -#include "ompi/mca/bml/base/base.h" -#include "pml_csum_component.h" -#include "ompi/mca/allocator/base/base.h" - -OBJ_CLASS_INSTANCE( mca_pml_csum_pckt_pending_t, - ompi_free_list_item_t, - NULL, - NULL ); - -static int mca_pml_csum_component_open(void); -static int mca_pml_csum_component_close(void); -static mca_pml_base_module_t* -mca_pml_csum_component_init( int* priority, bool enable_progress_threads, - bool enable_mpi_threads ); -static int mca_pml_csum_component_fini(void); -int mca_pml_csum_output = 0; - -mca_pml_base_component_2_0_0_t mca_pml_csum_component = { - - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - { - MCA_PML_BASE_VERSION_2_0_0, - - "csum", /* MCA component name */ - OMPI_MAJOR_VERSION, /* MCA component major version */ - OMPI_MINOR_VERSION, /* MCA component minor version */ - OMPI_RELEASE_VERSION, /* MCA component release version */ - mca_pml_csum_component_open, /* component open */ - mca_pml_csum_component_close /* component close */ - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - mca_pml_csum_component_init, /* component init */ - mca_pml_csum_component_fini /* component finalize */ - -}; - -void *mca_pml_csum_seg_alloc( struct mca_mpool_base_module_t* mpool, - size_t* size, - mca_mpool_base_registration_t** registration); - -void mca_pml_csum_seg_free( struct mca_mpool_base_module_t* mpool, - void* segment ); - -static inline int mca_pml_csum_param_register_int( - const char* param_name, - int default_value) -{ - int param_value = default_value; - - (void) mca_base_param_reg_int (&mca_pml_csum_component.pmlm_version, param_name, - NULL, false, false, default_value, ¶m_value); - - return param_value; -} - -static int mca_pml_csum_component_open(void) -{ - int value; - mca_allocator_base_component_t* allocator_component; - - value = mca_pml_csum_param_register_int("verbose", 0); - mca_pml_csum_output = opal_output_open(NULL); - opal_output_set_verbosity(mca_pml_csum_output, value); - - mca_pml_csum.free_list_num = - mca_pml_csum_param_register_int("free_list_num", 4); - mca_pml_csum.free_list_max = - mca_pml_csum_param_register_int("free_list_max", -1); - mca_pml_csum.free_list_inc = - mca_pml_csum_param_register_int("free_list_inc", 64); - mca_pml_csum.priority = - mca_pml_csum_param_register_int("priority", 0); - mca_pml_csum.send_pipeline_depth = - mca_pml_csum_param_register_int("send_pipeline_depth", 3); - mca_pml_csum.recv_pipeline_depth = - mca_pml_csum_param_register_int("recv_pipeline_depth", 4); - mca_pml_csum.rdma_put_retries_limit = - mca_pml_csum_param_register_int("rdma_put_retries_limit", 5); - mca_pml_csum.max_rdma_per_request = - mca_pml_csum_param_register_int("max_rdma_per_request", 4); - mca_pml_csum.max_send_per_range = - mca_pml_csum_param_register_int("max_send_per_range", 4); - - mca_pml_csum.unexpected_limit = - mca_pml_csum_param_register_int("unexpected_limit", 128); - - mca_base_param_reg_string(&mca_pml_csum_component.pmlm_version, - "allocator", - "Name of allocator component for unexpected messages", - false, false, - "bucket", - &mca_pml_csum.allocator_name); - - allocator_component = mca_allocator_component_lookup( mca_pml_csum.allocator_name ); - if(NULL == allocator_component) { - opal_output(0, "mca_pml_csum_component_open: can't find allocator: %s\n", mca_pml_csum.allocator_name); - return OMPI_ERROR; - } - - mca_pml_csum.allocator = allocator_component->allocator_init(true, - mca_pml_csum_seg_alloc, - mca_pml_csum_seg_free, NULL); - if(NULL == mca_pml_csum.allocator) { - opal_output(0, "mca_pml_csum_component_open: unable to initialize allocator\n"); - return OMPI_ERROR; - } - - mca_pml_csum.enabled = false; - return mca_bml_base_open(); -} - - -static int mca_pml_csum_component_close(void) -{ - int rc; - - if (OMPI_SUCCESS != (rc = mca_bml_base_close())) { - return rc; - } - if (NULL != mca_pml_csum.allocator_name) { - free(mca_pml_csum.allocator_name); - } - opal_output_close(mca_pml_csum_output); - - return OMPI_SUCCESS; -} - - -static mca_pml_base_module_t* -mca_pml_csum_component_init( int* priority, - bool enable_progress_threads, - bool enable_mpi_threads ) -{ - opal_output_verbose( 10, mca_pml_csum_output, - "in csum, my priority is %d\n", mca_pml_csum.priority); - - if((*priority) > mca_pml_csum.priority) { - *priority = mca_pml_csum.priority; - return NULL; - } - *priority = mca_pml_csum.priority; - - if(OMPI_SUCCESS != mca_bml_base_init( enable_progress_threads, - enable_mpi_threads)) { - return NULL; - } - - /* Set this here (vs in component_open()) because - ompi_mpi_leave_pinned* may have been set after MCA params were - read (e.g., by the openib btl) */ - mca_pml_csum.leave_pinned = (1 == ompi_mpi_leave_pinned); - mca_pml_csum.leave_pinned_pipeline = (int) ompi_mpi_leave_pinned_pipeline; - - return &mca_pml_csum.super; -} - -int mca_pml_csum_component_fini(void) -{ - int rc; - - /* Shutdown BML */ - if(OMPI_SUCCESS != (rc = mca_bml.bml_finalize())) - return rc; - - if(!mca_pml_csum.enabled) - return OMPI_SUCCESS; /* never selected.. return success.. */ - mca_pml_csum.enabled = false; /* not anymore */ - - OBJ_DESTRUCT(&mca_pml_csum.rdma_pending); - OBJ_DESTRUCT(&mca_pml_csum.pckt_pending); - OBJ_DESTRUCT(&mca_pml_csum.recv_pending); - OBJ_DESTRUCT(&mca_pml_csum.send_pending); - OBJ_DESTRUCT(&mca_pml_csum.non_existing_communicator_pending); - OBJ_DESTRUCT(&mca_pml_csum.buffers); - OBJ_DESTRUCT(&mca_pml_csum.pending_pckts); - OBJ_DESTRUCT(&mca_pml_csum.recv_frags); - OBJ_DESTRUCT(&mca_pml_csum.rdma_frags); - OBJ_DESTRUCT(&mca_pml_csum.lock); - - if(OMPI_SUCCESS != (rc = mca_pml_csum.allocator->alc_finalize(mca_pml_csum.allocator))) { - return rc; - } - -#if 0 - if (mca_pml_base_send_requests.fl_num_allocated != - mca_pml_base_send_requests.super.opal_list_length) { - opal_output(0, "csum send requests: %d allocated %d returned\n", - mca_pml_base_send_requests.fl_num_allocated, - mca_pml_base_send_requests.super.opal_list_length); - } - if (mca_pml_base_recv_requests.fl_num_allocated != - mca_pml_base_recv_requests.super.opal_list_length) { - opal_output(0, "csum recv requests: %d allocated %d returned\n", - mca_pml_base_recv_requests.fl_num_allocated, - mca_pml_base_recv_requests.super.opal_list_length); - } -#endif - - return OMPI_SUCCESS; -} - -void *mca_pml_csum_seg_alloc( struct mca_mpool_base_module_t* mpool, - size_t* size, - mca_mpool_base_registration_t** registration) { - return malloc(*size); -} - -void mca_pml_csum_seg_free( struct mca_mpool_base_module_t* mpool, - void* segment ) { - free(segment); -} diff --git a/ompi/mca/pml/csum/pml_csum_component.h b/ompi/mca/pml/csum/pml_csum_component.h deleted file mode 100644 index 6b7af6ef1d..0000000000 --- a/ompi/mca/pml/csum/pml_csum_component.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_CSUM_COMPONENT_H -#define MCA_PML_CSUM_COMPONENT_H - -BEGIN_C_DECLS - -/* - * PML module functions. - */ -OMPI_MODULE_DECLSPEC extern mca_pml_base_component_2_0_0_t mca_pml_csum_component; - -END_C_DECLS - -#endif diff --git a/ompi/mca/pml/csum/pml_csum_endpoint.c b/ompi/mca/pml/csum/pml_csum_endpoint.c deleted file mode 100644 index 5bc347251a..0000000000 --- a/ompi/mca/pml/csum/pml_csum_endpoint.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include - -#include "pml_csum_endpoint.h" - - diff --git a/ompi/mca/pml/csum/pml_csum_endpoint.h b/ompi/mca/pml/csum/pml_csum_endpoint.h deleted file mode 100644 index 636d742786..0000000000 --- a/ompi/mca/pml/csum/pml_csum_endpoint.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PML_CSUM_ENDPOINT_H -#define MCA_PML_CSUM_ENDPOINT_H - - -BEGIN_C_DECLS - -END_C_DECLS -#endif - diff --git a/ompi/mca/pml/csum/pml_csum_hdr.h b/ompi/mca/pml/csum/pml_csum_hdr.h deleted file mode 100644 index 7803935ae1..0000000000 --- a/ompi/mca/pml/csum/pml_csum_hdr.h +++ /dev/null @@ -1,393 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 IBM Corporation. All rights reserved. - * Copyright (c) 2009 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef MCA_PML_CSUM_HEADER_H -#define MCA_PML_CSUM_HEADER_H - -#include "ompi_config.h" -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_NETINET_IN_H -#include -#endif - -#include "opal/types.h" -#include "opal/util/arch.h" -#include "ompi/mca/btl/btl.h" -#include "ompi/proc/proc.h" - -#define MCA_PML_CSUM_HDR_TYPE_MATCH (MCA_BTL_TAG_PML + 1) -#define MCA_PML_CSUM_HDR_TYPE_RNDV (MCA_BTL_TAG_PML + 2) -#define MCA_PML_CSUM_HDR_TYPE_RGET (MCA_BTL_TAG_PML + 3) -#define MCA_PML_CSUM_HDR_TYPE_ACK (MCA_BTL_TAG_PML + 4) -#define MCA_PML_CSUM_HDR_TYPE_NACK (MCA_BTL_TAG_PML + 5) -#define MCA_PML_CSUM_HDR_TYPE_FRAG (MCA_BTL_TAG_PML + 6) -#define MCA_PML_CSUM_HDR_TYPE_GET (MCA_BTL_TAG_PML + 7) -#define MCA_PML_CSUM_HDR_TYPE_PUT (MCA_BTL_TAG_PML + 8) -#define MCA_PML_CSUM_HDR_TYPE_FIN (MCA_BTL_TAG_PML + 9) - -#define MCA_PML_CSUM_HDR_FLAGS_ACK 1 /* is an ack required */ -#define MCA_PML_CSUM_HDR_FLAGS_NBO 2 /* is the hdr in network byte order */ -#define MCA_PML_CSUM_HDR_FLAGS_PIN 4 /* is user buffer pinned */ -#define MCA_PML_CSUM_HDR_FLAGS_CONTIG 8 /* is user buffer contiguous */ -#define MCA_PML_CSUM_HDR_FLAGS_NORDMA 16 /* rest will be send by copy-in-out */ - -/** - * Common hdr attributes - must be first element in each hdr type - */ -struct mca_pml_csum_common_hdr_t { - uint8_t hdr_type; /**< type of envelope */ - uint8_t hdr_flags; /**< flags indicating how fragment should be processed */ - uint16_t hdr_csum; /**< checksum over header */ -}; -typedef struct mca_pml_csum_common_hdr_t mca_pml_csum_common_hdr_t; - -#define MCA_PML_CSUM_COMMON_HDR_NTOH(h) (h).hdr_csum = ntohs((h).hdr_csum); -#define MCA_PML_CSUM_COMMON_HDR_HTON(h) (h).hdr_csum = htons((h).hdr_csum); - -/** - * Header definition for the first fragment, contains the - * attributes required to match the corresponding posted receive. - */ -struct mca_pml_csum_match_hdr_t { - mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */ - uint16_t hdr_ctx; /**< communicator index */ - uint16_t hdr_seq; /**< message sequence number */ - int32_t hdr_src; /**< source rank */ - int32_t hdr_tag; /**< user tag */ - uint32_t hdr_csum; /**< checksum over data */ -}; -#define OMPI_PML_CSUM_MATCH_HDR_LEN 20 - -typedef struct mca_pml_csum_match_hdr_t mca_pml_csum_match_hdr_t; - -#define MCA_PML_CSUM_MATCH_HDR_NTOH(h) \ -do { \ - MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \ - (h).hdr_ctx = ntohs((h).hdr_ctx); \ - (h).hdr_src = ntohl((h).hdr_src); \ - (h).hdr_tag = ntohl((h).hdr_tag); \ - (h).hdr_seq = ntohs((h).hdr_seq); \ - (h).hdr_csum = ntohl((h).hdr_csum); \ -} while (0) - -#define MCA_PML_CSUM_MATCH_HDR_HTON(h) \ -do { \ - MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \ - (h).hdr_ctx = htons((h).hdr_ctx); \ - (h).hdr_src = htonl((h).hdr_src); \ - (h).hdr_tag = htonl((h).hdr_tag); \ - (h).hdr_seq = htons((h).hdr_seq); \ - (h).hdr_csum = htonl((h).hdr_csum); \ -} while (0) - -/** - * Header definition for the first fragment when an acknowledgment - * is required. This could be the first fragment of a large message - * or a short message that requires an ack (synchronous). - */ -struct mca_pml_csum_rendezvous_hdr_t { - mca_pml_csum_match_hdr_t hdr_match; - uint64_t hdr_msg_length; /**< message length */ - ompi_ptr_t hdr_src_req; /**< pointer to source request - returned in ack */ -}; -typedef struct mca_pml_csum_rendezvous_hdr_t mca_pml_csum_rendezvous_hdr_t; - -/* Note that hdr_src_req is not put in network byte order because it - is never processed by the receiver, other than being copied into - the ack header */ -#define MCA_PML_CSUM_RNDV_HDR_NTOH(h) \ - do { \ - MCA_PML_CSUM_MATCH_HDR_NTOH((h).hdr_match); \ - (h).hdr_msg_length = ntoh64((h).hdr_msg_length); \ - } while (0) - -#define MCA_PML_CSUM_RNDV_HDR_HTON(h) \ - do { \ - MCA_PML_CSUM_MATCH_HDR_HTON((h).hdr_match); \ - (h).hdr_msg_length = hton64((h).hdr_msg_length); \ - } while (0) - -/** - * Header definition for a combined rdma rendezvous/get - */ -struct mca_pml_csum_rget_hdr_t { - mca_pml_csum_rendezvous_hdr_t hdr_rndv; - uint32_t hdr_seg_cnt; /**< number of segments for rdma */ -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - uint8_t hdr_padding[4]; -#endif - ompi_ptr_t hdr_des; /**< source descriptor */ -}; -typedef struct mca_pml_csum_rget_hdr_t mca_pml_csum_rget_hdr_t; - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG -#define MCA_PML_CSUM_RGET_HDR_FILL(h) \ -do { \ - (h).hdr_padding[0] = 0; \ - (h).hdr_padding[1] = 0; \ - (h).hdr_padding[2] = 0; \ - (h).hdr_padding[3] = 0; \ -} while(0) -#else -#define MCA_PML_CSUM_RGET_HDR_FILL(h) -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */ - -#define MCA_PML_CSUM_RGET_HDR_NTOH(h) \ - do { \ - MCA_PML_CSUM_RNDV_HDR_NTOH((h).hdr_rndv); \ - (h).hdr_seg_cnt = ntohl((h).hdr_seg_cnt); \ - } while (0) - -#define MCA_PML_CSUM_RGET_HDR_HTON(h) \ - do { \ - MCA_PML_CSUM_RNDV_HDR_HTON((h).hdr_rndv); \ - MCA_PML_CSUM_RGET_HDR_FILL(h); \ - (h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \ - } while (0) - -/** - * Header for subsequent fragments. - */ -struct mca_pml_csum_frag_hdr_t { - mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */ - uint32_t hdr_csum; - uint64_t hdr_frag_offset; /**< offset into message */ - ompi_ptr_t hdr_src_req; /**< pointer to source request */ - ompi_ptr_t hdr_dst_req; /**< pointer to matched receive */ -}; -typedef struct mca_pml_csum_frag_hdr_t mca_pml_csum_frag_hdr_t; - -#define MCA_PML_CSUM_FRAG_HDR_NTOH(h) \ - do { \ - MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \ - (h).hdr_csum = ntohl((h).hdr_csum); \ - (h).hdr_frag_offset = ntoh64((h).hdr_frag_offset); \ - } while (0) - -#define MCA_PML_CSUM_FRAG_HDR_HTON(h) \ - do { \ - MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \ - (h).hdr_csum = htonl((h).hdr_csum); \ - (h).hdr_frag_offset = hton64((h).hdr_frag_offset); \ - } while (0) - -/** - * Header used to acknowledgment outstanding fragment(s). - */ - -struct mca_pml_csum_ack_hdr_t { - mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */ -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - uint8_t hdr_padding[4]; -#endif - ompi_ptr_t hdr_src_req; /**< source request */ - ompi_ptr_t hdr_dst_req; /**< matched receive request */ - uint64_t hdr_send_offset; /**< starting point of copy in/out */ -}; -typedef struct mca_pml_csum_ack_hdr_t mca_pml_csum_ack_hdr_t; - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG -#define MCA_PML_CSUM_ACK_HDR_FILL(h) \ -do { \ - (h).hdr_padding[0] = 0; \ - (h).hdr_padding[1] = 0; \ - (h).hdr_padding[2] = 0; \ - (h).hdr_padding[3] = 0; \ -} while (0) -#else -#define MCA_PML_CSUM_ACK_HDR_FILL(h) -#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */ - -/* Note that the request headers are not put in NBO because the - src_req is already in receiver's byte order and the dst_req is not - used by the receiver for anything other than backpointers in return - headers */ -#define MCA_PML_CSUM_ACK_HDR_NTOH(h) \ - do { \ - MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \ - (h).hdr_send_offset = ntoh64((h).hdr_send_offset); \ - } while (0) - -#define MCA_PML_CSUM_ACK_HDR_HTON(h) \ - do { \ - MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \ - MCA_PML_CSUM_ACK_HDR_FILL(h); \ - (h).hdr_send_offset = hton64((h).hdr_send_offset); \ - } while (0) - -/** - * Header used to initiate an RDMA operation. - */ - -struct mca_pml_csum_rdma_hdr_t { - mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */ - uint32_t hdr_seg_cnt; /**< number of segments for rdma */ - ompi_ptr_t hdr_req; /**< destination request */ - ompi_ptr_t hdr_des; /**< source descriptor */ - uint64_t hdr_rdma_offset; /**< current offset into user buffer */ - mca_btl_base_segment_t hdr_segs[1]; /**< list of segments for rdma */ -}; -typedef struct mca_pml_csum_rdma_hdr_t mca_pml_csum_rdma_hdr_t; - -#define MCA_PML_CSUM_RDMA_HDR_NTOH(h) \ - do { \ - MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \ - (h).hdr_seg_cnt = ntohl((h).hdr_seg_cnt); \ - (h).hdr_rdma_offset = ntoh64((h).hdr_rdma_offset); \ - } while (0) - -#define MCA_PML_CSUM_RDMA_HDR_HTON(h) \ - do { \ - MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \ - (h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \ - (h).hdr_rdma_offset = hton64((h).hdr_rdma_offset); \ - } while (0) - -/** - * Header used to complete an RDMA operation. - */ - -struct mca_pml_csum_fin_hdr_t { - mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */ - uint32_t hdr_csum; - ompi_ptr_t hdr_des; /**< completed descriptor */ - uint32_t hdr_fail; /**< RDMA operation failed */ -}; -typedef struct mca_pml_csum_fin_hdr_t mca_pml_csum_fin_hdr_t; - -#define MCA_PML_CSUM_FIN_HDR_NTOH(h) \ - do { \ - MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \ - (h).hdr_csum = ntohl((h).hdr_csum); \ - (h).hdr_fail = ntohl((h).hdr_fail); \ - } while (0) - -#define MCA_PML_CSUM_FIN_HDR_HTON(h) \ - do { \ - MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \ - (h).hdr_csum = htonl((h).hdr_csum); \ - (h).hdr_fail = htonl((h).hdr_fail); \ - } while (0) - -/** - * Union of defined hdr types. - */ -union mca_pml_csum_hdr_t { - mca_pml_csum_common_hdr_t hdr_common; - mca_pml_csum_match_hdr_t hdr_match; - mca_pml_csum_rendezvous_hdr_t hdr_rndv; - mca_pml_csum_rget_hdr_t hdr_rget; - mca_pml_csum_frag_hdr_t hdr_frag; - mca_pml_csum_ack_hdr_t hdr_ack; - mca_pml_csum_rdma_hdr_t hdr_rdma; - mca_pml_csum_fin_hdr_t hdr_fin; -}; -typedef union mca_pml_csum_hdr_t mca_pml_csum_hdr_t; - -#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT -static inline __opal_attribute_always_inline__ void -csum_hdr_ntoh(mca_pml_csum_hdr_t *hdr, const uint8_t hdr_type) -{ - if(!(hdr->hdr_common.hdr_flags & MCA_PML_CSUM_HDR_FLAGS_NBO)) - return; - - switch(hdr_type) { - case MCA_PML_CSUM_HDR_TYPE_MATCH: - MCA_PML_CSUM_MATCH_HDR_NTOH(hdr->hdr_match); - break; - case MCA_PML_CSUM_HDR_TYPE_RNDV: - MCA_PML_CSUM_RNDV_HDR_NTOH(hdr->hdr_rndv); - break; - case MCA_PML_CSUM_HDR_TYPE_RGET: - MCA_PML_CSUM_RGET_HDR_NTOH(hdr->hdr_rget); - break; - case MCA_PML_CSUM_HDR_TYPE_ACK: - MCA_PML_CSUM_ACK_HDR_NTOH(hdr->hdr_ack); - break; - case MCA_PML_CSUM_HDR_TYPE_FRAG: - MCA_PML_CSUM_FRAG_HDR_NTOH(hdr->hdr_frag); - break; - case MCA_PML_CSUM_HDR_TYPE_PUT: - MCA_PML_CSUM_RDMA_HDR_NTOH(hdr->hdr_rdma); - break; - case MCA_PML_CSUM_HDR_TYPE_FIN: - MCA_PML_CSUM_FIN_HDR_NTOH(hdr->hdr_fin); - break; - default: - assert(0); - break; - } -} -#else -#define csum_hdr_ntoh(h, t) do{}while(0) -#endif - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT -#define csum_hdr_hton(h, t, p) \ - csum_hdr_hton_intr((mca_pml_csum_hdr_t*)h, t, p) -static inline __opal_attribute_always_inline__ void -csum_hdr_hton_intr(mca_pml_csum_hdr_t *hdr, const uint8_t hdr_type, - const ompi_proc_t *proc) -{ -#ifdef WORDS_BIGENDIAN - hdr->hdr_common.hdr_flags |= MCA_PML_CSUM_HDR_FLAGS_NBO; -#else - - if(!(proc->proc_arch & OPAL_ARCH_ISBIGENDIAN)) - return; - - hdr->hdr_common.hdr_flags |= MCA_PML_CSUM_HDR_FLAGS_NBO; - switch(hdr_type) { - case MCA_PML_CSUM_HDR_TYPE_MATCH: - MCA_PML_CSUM_MATCH_HDR_HTON(hdr->hdr_match); - break; - case MCA_PML_CSUM_HDR_TYPE_RNDV: - MCA_PML_CSUM_RNDV_HDR_HTON(hdr->hdr_rndv); - break; - case MCA_PML_CSUM_HDR_TYPE_RGET: - MCA_PML_CSUM_RGET_HDR_HTON(hdr->hdr_rget); - break; - case MCA_PML_CSUM_HDR_TYPE_ACK: - MCA_PML_CSUM_ACK_HDR_HTON(hdr->hdr_ack); - break; - case MCA_PML_CSUM_HDR_TYPE_FRAG: - MCA_PML_CSUM_FRAG_HDR_HTON(hdr->hdr_frag); - break; - case MCA_PML_CSUM_HDR_TYPE_PUT: - MCA_PML_CSUM_RDMA_HDR_HTON(hdr->hdr_rdma); - break; - case MCA_PML_CSUM_HDR_TYPE_FIN: - MCA_PML_CSUM_FIN_HDR_HTON(hdr->hdr_fin); - break; - default: - assert(0); - break; - } -#endif -} -#else -#define csum_hdr_hton(h, t, p) do{}while(0) -#endif -#endif diff --git a/ompi/mca/pml/csum/pml_csum_iprobe.c b/ompi/mca/pml/csum/pml_csum_iprobe.c deleted file mode 100644 index 39c5ffdc61..0000000000 --- a/ompi/mca/pml/csum/pml_csum_iprobe.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/request/request.h" -#include "pml_csum_recvreq.h" - - -int mca_pml_csum_iprobe(int src, - int tag, - struct ompi_communicator_t *comm, - int *matched, ompi_status_public_t * status) -{ - int rc = OMPI_SUCCESS; - mca_pml_csum_recv_request_t recvreq; - - OBJ_CONSTRUCT( &recvreq, mca_pml_csum_recv_request_t ); - recvreq.req_recv.req_base.req_ompi.req_type = OMPI_REQUEST_PML; - recvreq.req_recv.req_base.req_type = MCA_PML_REQUEST_IPROBE; - - MCA_PML_CSUM_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char.dt, src, tag, comm, true); - MCA_PML_CSUM_RECV_REQUEST_START(&recvreq); - - if( recvreq.req_recv.req_base.req_ompi.req_complete == true ) { - if( NULL != status ) { - *status = recvreq.req_recv.req_base.req_ompi.req_status; - } - *matched = 1; - } else { - *matched = 0; - opal_progress(); - } - MCA_PML_BASE_RECV_REQUEST_FINI( &recvreq.req_recv ); - return rc; -} - - -int mca_pml_csum_probe(int src, - int tag, - struct ompi_communicator_t *comm, - ompi_status_public_t * status) -{ - mca_pml_csum_recv_request_t recvreq; - - OBJ_CONSTRUCT( &recvreq, mca_pml_csum_recv_request_t ); - recvreq.req_recv.req_base.req_ompi.req_type = OMPI_REQUEST_PML; - recvreq.req_recv.req_base.req_type = MCA_PML_REQUEST_PROBE; - - MCA_PML_CSUM_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char.dt, src, tag, comm, true); - MCA_PML_CSUM_RECV_REQUEST_START(&recvreq); - - ompi_request_wait_completion(&recvreq.req_recv.req_base.req_ompi); - - if (NULL != status) { - *status = recvreq.req_recv.req_base.req_ompi.req_status; - } - MCA_PML_BASE_RECV_REQUEST_FINI( &recvreq.req_recv ); - return OMPI_SUCCESS; -} - - -int -mca_pml_csum_improbe(int dst, - int tag, - struct ompi_communicator_t* comm, - int *matched, - struct ompi_message_t **message, - ompi_status_public_t* status) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - - -int -mca_pml_csum_mprobe(int dst, - int tag, - struct ompi_communicator_t* comm, - struct ompi_message_t **message, - ompi_status_public_t* status) -{ - return OMPI_ERR_NOT_SUPPORTED; -} diff --git a/ompi/mca/pml/csum/pml_csum_irecv.c b/ompi/mca/pml/csum/pml_csum_irecv.c deleted file mode 100644 index 97ff62a952..0000000000 --- a/ompi/mca/pml/csum/pml_csum_irecv.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/request/request.h" -#include "pml_csum_recvreq.h" -#include "ompi/peruse/peruse-internal.h" - -int mca_pml_csum_irecv_init(void *addr, - size_t count, - ompi_datatype_t * datatype, - int src, - int tag, - struct ompi_communicator_t *comm, - struct ompi_request_t **request) -{ - int rc; - mca_pml_csum_recv_request_t *recvreq; - MCA_PML_CSUM_RECV_REQUEST_ALLOC(recvreq, rc); - if (NULL == recvreq) - return rc; - - MCA_PML_CSUM_RECV_REQUEST_INIT(recvreq, - addr, - count, datatype, src, tag, comm, true); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &((recvreq)->req_recv.req_base), - PERUSE_RECV); - - *request = (ompi_request_t *) recvreq; - return OMPI_SUCCESS; -} - -int mca_pml_csum_irecv(void *addr, - size_t count, - ompi_datatype_t * datatype, - int src, - int tag, - struct ompi_communicator_t *comm, - struct ompi_request_t **request) -{ - int rc; - - mca_pml_csum_recv_request_t *recvreq; - MCA_PML_CSUM_RECV_REQUEST_ALLOC(recvreq, rc); - if (NULL == recvreq) - return rc; - - MCA_PML_CSUM_RECV_REQUEST_INIT(recvreq, - addr, - count, datatype, src, tag, comm, false); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &((recvreq)->req_recv.req_base), - PERUSE_RECV); - - MCA_PML_CSUM_RECV_REQUEST_START(recvreq); - *request = (ompi_request_t *) recvreq; - return OMPI_SUCCESS; -} - - -int mca_pml_csum_recv(void *addr, - size_t count, - ompi_datatype_t * datatype, - int src, - int tag, - struct ompi_communicator_t *comm, - ompi_status_public_t * status) -{ - int rc; - mca_pml_csum_recv_request_t *recvreq; - MCA_PML_CSUM_RECV_REQUEST_ALLOC(recvreq, rc); - if (NULL == recvreq) - return rc; - - MCA_PML_CSUM_RECV_REQUEST_INIT(recvreq, - addr, - count, datatype, src, tag, comm, false); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &((recvreq)->req_recv.req_base), - PERUSE_RECV); - - MCA_PML_CSUM_RECV_REQUEST_START(recvreq); - ompi_request_wait_completion(&recvreq->req_recv.req_base.req_ompi); - - if (NULL != status) { /* return status */ - *status = recvreq->req_recv.req_base.req_ompi.req_status; - } - rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR; - ompi_request_free( (ompi_request_t**)&recvreq ); - return rc; -} - - -int -mca_pml_csum_imrecv(void *buf, - size_t count, - ompi_datatype_t *datatype, - struct ompi_message_t **message, - struct ompi_request_t **request) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - - -int -mca_pml_csum_mrecv(void *buf, - size_t count, - ompi_datatype_t *datatype, - struct ompi_message_t **message, - ompi_status_public_t* status) -{ - return OMPI_ERR_NOT_SUPPORTED; -} diff --git a/ompi/mca/pml/csum/pml_csum_isend.c b/ompi/mca/pml/csum/pml_csum_isend.c deleted file mode 100644 index 341263cd79..0000000000 --- a/ompi/mca/pml/csum/pml_csum_isend.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_csum.h" -#include "pml_csum_sendreq.h" -#include "pml_csum_recvreq.h" -#include "ompi/peruse/peruse-internal.h" - -int mca_pml_csum_isend_init(void *buf, - size_t count, - ompi_datatype_t * datatype, - int dst, - int tag, - mca_pml_base_send_mode_t sendmode, - ompi_communicator_t * comm, - ompi_request_t ** request) -{ - int rc; - - mca_pml_csum_send_request_t *sendreq = NULL; - MCA_PML_CSUM_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc); - if (rc != OMPI_SUCCESS) - return rc; - - MCA_PML_CSUM_SEND_REQUEST_INIT(sendreq, - buf, - count, - datatype, - dst, tag, - comm, sendmode, true); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &(sendreq)->req_send.req_base, - PERUSE_SEND); - - *request = (ompi_request_t *) sendreq; - return OMPI_SUCCESS; -} - - -int mca_pml_csum_isend(void *buf, - size_t count, - ompi_datatype_t * datatype, - int dst, - int tag, - mca_pml_base_send_mode_t sendmode, - ompi_communicator_t * comm, - ompi_request_t ** request) -{ - int rc; - mca_pml_csum_send_request_t *sendreq = NULL; - - MCA_PML_CSUM_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc); - if (rc != OMPI_SUCCESS) - return rc; - - MCA_PML_CSUM_SEND_REQUEST_INIT(sendreq, - buf, - count, - datatype, - dst, tag, - comm, sendmode, false); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &(sendreq)->req_send.req_base, - PERUSE_SEND); - - MCA_PML_CSUM_SEND_REQUEST_START(sendreq, rc); - *request = (ompi_request_t *) sendreq; - return rc; -} - - -int mca_pml_csum_send(void *buf, - size_t count, - ompi_datatype_t * datatype, - int dst, - int tag, - mca_pml_base_send_mode_t sendmode, - ompi_communicator_t * comm) -{ - int rc; - mca_pml_csum_send_request_t *sendreq; - - MCA_PML_CSUM_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc); - if (rc != OMPI_SUCCESS) - return rc; - - MCA_PML_CSUM_SEND_REQUEST_INIT(sendreq, - buf, - count, - datatype, - dst, tag, - comm, sendmode, false); - - PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE, - &(sendreq)->req_send.req_base, - PERUSE_SEND); - - MCA_PML_CSUM_SEND_REQUEST_START(sendreq, rc); - if (rc != OMPI_SUCCESS) { - MCA_PML_CSUM_SEND_REQUEST_RETURN( sendreq ); - return rc; - } - - ompi_request_wait_completion(&sendreq->req_send.req_base.req_ompi); - - rc = sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR; - ompi_request_free( (ompi_request_t**)&sendreq ); - return rc; -} diff --git a/ompi/mca/pml/csum/pml_csum_progress.c b/ompi/mca/pml/csum/pml_csum_progress.c deleted file mode 100644 index 896e1a4e3f..0000000000 --- a/ompi/mca/pml/csum/pml_csum_progress.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2008 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_csum.h" -#include "pml_csum_sendreq.h" -#include "ompi/mca/bml/base/base.h" - -int mca_pml_csum_progress(void) -{ - int i, queue_length = opal_list_get_size(&mca_pml_csum.send_pending); - int j, completed_requests = 0; - bool send_succedded; - - if( OPAL_LIKELY(0 == queue_length) ) - return 0; - - for( i = 0; i < queue_length; i++ ) { - mca_pml_csum_send_pending_t pending_type = MCA_PML_CSUM_SEND_PENDING_NONE; - mca_pml_csum_send_request_t* sendreq; - mca_bml_base_endpoint_t* endpoint; - - sendreq = get_request_from_send_pending(&pending_type); - if(OPAL_UNLIKELY(NULL == sendreq)) - break; - - switch(pending_type) { - case MCA_PML_CSUM_SEND_PENDING_NONE: - assert(0); - return 0; - case MCA_PML_CSUM_SEND_PENDING_SCHEDULE: - if( mca_pml_csum_send_request_schedule_exclusive(sendreq) == - OMPI_ERR_OUT_OF_RESOURCE ) { - return 0; - } - completed_requests++; - break; - case MCA_PML_CSUM_SEND_PENDING_START: - endpoint = sendreq->req_endpoint; - send_succedded = false; - for(j = 0; j < (int)mca_bml_base_btl_array_get_size(&endpoint->btl_eager); j++) { - mca_bml_base_btl_t* bml_btl; - int rc; - - /* select a btl */ - bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); - rc = mca_pml_csum_send_request_start_btl(sendreq, bml_btl); - if( OPAL_LIKELY(OMPI_SUCCESS == rc) ) { - send_succedded = true; - completed_requests++; - break; - } - } - if( false == send_succedded ) { - add_request_to_send_pending(sendreq, MCA_PML_CSUM_SEND_PENDING_START, true); - } - } - } - return completed_requests; -} - diff --git a/ompi/mca/pml/csum/pml_csum_rdma.c b/ompi/mca/pml/csum/pml_csum_rdma.c deleted file mode 100644 index 6b29611b61..0000000000 --- a/ompi/mca/pml/csum/pml_csum_rdma.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/ - -#include "ompi_config.h" -#include "ompi/constants.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/bml/bml.h" -#include "ompi/mca/mpool/mpool.h" -#include "pml_csum.h" -#include "pml_csum_rdma.h" - -/* Use this registration if no registration needed for a BTL instead of NULL. - * This will help other code to distinguish case when memory is not registered - * from case when registration is not needed */ -static mca_mpool_base_registration_t pml_csum_dummy_reg; - -/* - * Check to see if memory is registered or can be registered. Build a - * set of registrations on the request. - */ - -size_t mca_pml_csum_rdma_btls( - mca_bml_base_endpoint_t* bml_endpoint, - unsigned char* base, - size_t size, - mca_pml_csum_com_btl_t* rdma_btls) -{ - int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma); - double weight_total = 0; - int num_btls_used = 0, n; - - /* shortcut when there are no rdma capable btls */ - if(num_btls == 0) { - return 0; - } - - /* check to see if memory is registered */ - for(n = 0; n < num_btls && num_btls_used < mca_pml_csum.max_rdma_per_request; - n++) { - mca_bml_base_btl_t* bml_btl = - mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma, - (bml_endpoint->btl_rdma_index + n) % num_btls); - mca_mpool_base_registration_t* reg = &pml_csum_dummy_reg; - mca_mpool_base_module_t *btl_mpool = bml_btl->btl->btl_mpool; - - if( NULL != btl_mpool ) { - if(!mca_pml_csum.leave_pinned) { - /* look through existing registrations */ - btl_mpool->mpool_find(btl_mpool, base, size, ®); - } else { - /* register the memory */ - btl_mpool->mpool_register(btl_mpool, base, size, 0, ®); - } - - if(NULL == reg) - continue; - } - - rdma_btls[num_btls_used].bml_btl = bml_btl; - rdma_btls[num_btls_used].btl_reg = reg; - weight_total += bml_btl->btl_weight; - num_btls_used++; - } - - /* if we don't use leave_pinned and all BTLs that already have this memory - * registered amount to less then half of available bandwidth - fall back to - * pipeline protocol */ - if(0 == num_btls_used || (!mca_pml_csum.leave_pinned && weight_total < 0.5)) - return 0; - - mca_pml_csum_calc_weighted_length(rdma_btls, num_btls_used, size, - weight_total); - - bml_endpoint->btl_rdma_index = (bml_endpoint->btl_rdma_index + 1) % num_btls; - return num_btls_used; -} - -size_t mca_pml_csum_rdma_pipeline_btls( mca_bml_base_endpoint_t* bml_endpoint, - size_t size, - mca_pml_csum_com_btl_t* rdma_btls ) -{ - int i, num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma); - double weight_total = 0; - - for(i = 0; i < num_btls && i < mca_pml_csum.max_rdma_per_request; i++) { - rdma_btls[i].bml_btl = - mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma); - if(NULL != rdma_btls[i].bml_btl->btl->btl_mpool) - rdma_btls[i].btl_reg = NULL; - else - rdma_btls[i].btl_reg = &pml_csum_dummy_reg; - - weight_total += rdma_btls[i].bml_btl->btl_weight; - } - - mca_pml_csum_calc_weighted_length(rdma_btls, i, size, weight_total); - - return i; -} diff --git a/ompi/mca/pml/csum/pml_csum_rdma.h b/ompi/mca/pml/csum/pml_csum_rdma.h deleted file mode 100644 index 109ef181ad..0000000000 --- a/ompi/mca/pml/csum/pml_csum_rdma.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_CSUM_RDMA_H -#define MCA_PML_CSUM_RDMA_H - -struct mca_bml_base_endpoint_t; - -/* - * Of the set of available btls that support RDMA, - * find those that already have registrations - or - * register if required (for leave_pinned option) - */ -size_t mca_pml_csum_rdma_btls(struct mca_bml_base_endpoint_t* endpoint, - unsigned char* base, size_t size, struct mca_pml_csum_com_btl_t* btls); - -/* Choose RDMA BTLs to use for sending of a request by pipeline protocol. - * Calculate number of bytes to send through each BTL according to available - * bandwidth */ -size_t mca_pml_csum_rdma_pipeline_btls(struct mca_bml_base_endpoint_t* endpoint, - size_t size, mca_pml_csum_com_btl_t* rdma_btls); -#endif - diff --git a/ompi/mca/pml/csum/pml_csum_rdmafrag.c b/ompi/mca/pml/csum/pml_csum_rdmafrag.c deleted file mode 100644 index 438efb29e9..0000000000 --- a/ompi/mca/pml/csum/pml_csum_rdmafrag.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_csum.h" -#include "pml_csum_rdmafrag.h" - - -OBJ_CLASS_INSTANCE( - mca_pml_csum_rdma_frag_t, - ompi_free_list_item_t, - NULL, - NULL); diff --git a/ompi/mca/pml/csum/pml_csum_rdmafrag.h b/ompi/mca/pml/csum/pml_csum_rdmafrag.h deleted file mode 100644 index 3d14483c92..0000000000 --- a/ompi/mca/pml/csum/pml_csum_rdmafrag.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_CSUM_RDMAFRAG_H -#define MCA_PML_CSUM_RDMAFRAG_H - -#include "ompi/mca/btl/btl.h" -#include "pml_csum_hdr.h" - -BEGIN_C_DECLS - -typedef enum { - MCA_PML_CSUM_RDMA_PUT, - MCA_PML_CSUM_RDMA_GET -} mca_pml_csum_rdma_state_t; - -struct mca_pml_csum_rdma_frag_t { - ompi_free_list_item_t super; - mca_bml_base_btl_t* rdma_bml; - mca_pml_csum_hdr_t rdma_hdr; - mca_pml_csum_rdma_state_t rdma_state; - size_t rdma_length; - uint8_t rdma_segs[MCA_BTL_SEG_MAX_SIZE * MCA_BTL_DES_MAX_SEGMENTS]; - void *rdma_req; - struct mca_bml_base_endpoint_t* rdma_ep; - opal_convertor_t convertor; - mca_mpool_base_registration_t* reg; - uint32_t retries; -}; -typedef struct mca_pml_csum_rdma_frag_t mca_pml_csum_rdma_frag_t; - -OBJ_CLASS_DECLARATION(mca_pml_csum_rdma_frag_t); - - -#define MCA_PML_CSUM_RDMA_FRAG_ALLOC(frag,rc) \ -do { \ - ompi_free_list_item_t* item; \ - OMPI_FREE_LIST_WAIT(&mca_pml_csum.rdma_frags, item, rc); \ - frag = (mca_pml_csum_rdma_frag_t*)item; \ -} while(0) - -#define MCA_PML_CSUM_RDMA_FRAG_RETURN(frag) \ -do { \ - /* return fragment */ \ - OMPI_FREE_LIST_RETURN(&mca_pml_csum.rdma_frags, \ - (ompi_free_list_item_t*)frag); \ -} while(0) - - -END_C_DECLS - -#endif - diff --git a/ompi/mca/pml/csum/pml_csum_recvfrag.c b/ompi/mca/pml/csum/pml_csum_recvfrag.c deleted file mode 100644 index e976ce2529..0000000000 --- a/ompi/mca/pml/csum/pml_csum_recvfrag.c +++ /dev/null @@ -1,841 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2006-2008 University of Houston. All rights reserved. - * Copyright (c) 2009 IBM Corporation. All rights reserved. - * Copyright (c) 2009 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - */ - -#include "ompi_config.h" - -#include "opal/class/opal_list.h" -#include "opal/util/crc.h" -#include "opal/threads/mutex.h" -#include "opal/prefetch.h" -#include "opal/util/output.h" - -#include "ompi/constants.h" -#include "ompi/communicator/communicator.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/pml/base/base.h" -#include "ompi/peruse/peruse-internal.h" -#include "ompi/memchecker.h" - -#include "pml_csum.h" -#include "pml_csum_comm.h" -#include "pml_csum_recvfrag.h" -#include "pml_csum_recvreq.h" -#include "pml_csum_sendreq.h" -#include "pml_csum_hdr.h" - -OBJ_CLASS_INSTANCE( mca_pml_csum_buffer_t, - ompi_free_list_item_t, - NULL, - NULL ); - -OBJ_CLASS_INSTANCE( mca_pml_csum_recv_frag_t, - opal_list_item_t, - NULL, - NULL ); - -/** - * Static functions. - */ - -/** - * Dump data elements that caused a checksum violation - */ -static void dump_csum_error_data(mca_btl_base_segment_t* segments, size_t num_segments) -{ - size_t i, j; - uint8_t *data; - - printf("CHECKSUM ERROR DATA\n"); - for (i = 0; i < num_segments; ++i) { - printf("Segment %lu", (unsigned long)i); - data = (uint8_t*)segments[i].seg_addr.pval; - for (j=0; j < segments[i].seg_len; j++) { - if (0 == (j % 40)) { - printf("\n"); - } - printf("%02x ", data[j]); - }; - } - printf("\nEND CHECKSUM ERROR DATA\n\n"); -} - -/** - * Append a unexpected descriptor to a queue. This function will allocate and - * initialize the fragment (if necessary) and then will add it to the specified - * queue. The allocated fragment is not returned to the caller. - */ -static void -append_frag_to_list(opal_list_t *queue, mca_btl_base_module_t *btl, - mca_pml_csum_match_hdr_t *hdr, mca_btl_base_segment_t* segments, - size_t num_segments, mca_pml_csum_recv_frag_t* frag) -{ - int rc; - - if(NULL == frag) { - MCA_PML_CSUM_RECV_FRAG_ALLOC(frag, rc); - MCA_PML_CSUM_RECV_FRAG_INIT(frag, hdr, segments, num_segments, btl); - } - opal_list_append(queue, (opal_list_item_t*)frag); -} - -/** - * Match incoming recv_frags against posted receives. - * Supports out of order delivery. - * - * @param frag_header (IN) Header of received recv_frag. - * @param frag_desc (IN) Received recv_frag descriptor. - * @param match_made (OUT) Flag indicating wether a match was made. - * @param additional_matches (OUT) List of additional matches - * @return OMPI_SUCCESS or error status on failure. - */ -static int mca_pml_csum_recv_frag_match( mca_btl_base_module_t *btl, - mca_pml_csum_match_hdr_t *hdr, - mca_btl_base_segment_t* segments, - size_t num_segments, - int type); - -static mca_pml_csum_recv_request_t* -match_one(mca_btl_base_module_t *btl, - mca_pml_csum_match_hdr_t *hdr, mca_btl_base_segment_t* segments, - size_t num_segments, ompi_communicator_t *comm_ptr, - mca_pml_csum_comm_proc_t *proc, - mca_pml_csum_recv_frag_t* frag); - -void mca_pml_csum_recv_frag_callback_match(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) -{ - mca_btl_base_segment_t* segments = des->des_dst; - mca_pml_csum_match_hdr_t* hdr = (mca_pml_csum_match_hdr_t*)segments->seg_addr.pval; - ompi_communicator_t *comm_ptr; - mca_pml_csum_recv_request_t *match = NULL; - mca_pml_csum_comm_t *comm; - mca_pml_csum_comm_proc_t *proc; - size_t num_segments = des->des_dst_cnt; - size_t bytes_received = 0; - uint16_t csum_received, csum=0; - uint32_t csum_data; - - assert(num_segments <= MCA_BTL_DES_MAX_SEGMENTS); - - if( OPAL_UNLIKELY(segments->seg_len < OMPI_PML_CSUM_MATCH_HDR_LEN) ) { - return; - } - csum_hdr_ntoh(((mca_pml_csum_hdr_t*) hdr), MCA_PML_CSUM_HDR_TYPE_MATCH); - - csum_received = hdr->hdr_common.hdr_csum; - hdr->hdr_common.hdr_csum = 0; -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - hdr->hdr_common.hdr_flags &= ~MCA_PML_CSUM_HDR_FLAGS_NBO; -#endif - csum = opal_csum16(hdr, OMPI_PML_CSUM_MATCH_HDR_LEN); - hdr->hdr_common.hdr_csum = csum_received; - - OPAL_OUTPUT_VERBOSE((5, mca_pml_base_output, - "%s:%s:%d common_hdr: %02x:%02x:%04x match_hdr: %04x:%04x:%08x:%08x:%08x", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, - hdr->hdr_common.hdr_type, hdr->hdr_common.hdr_flags, hdr->hdr_common.hdr_csum, - hdr->hdr_ctx, hdr->hdr_seq, hdr->hdr_src, hdr->hdr_tag, hdr->hdr_csum)); - - if (csum_received != csum) { - opal_output(0, "%s:%s:%d: Invalid \'match header\' - received csum:0x%04x != computed csum:0x%04x\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum); - dump_csum_error_data(segments, 1); - ompi_rte_abort(-1,NULL); - } - - /* communicator pointer */ - comm_ptr = ompi_comm_lookup(hdr->hdr_ctx); - if(OPAL_UNLIKELY(NULL == comm_ptr)) { - /* This is a special case. A message for a not yet existing - * communicator can happens. Instead of doing a matching we - * will temporarily add it the a pending queue in the PML. - * Later on, when the communicator is completely instantiated, - * this pending queue will be searched and all matching fragments - * moved to the right communicator. - */ - append_frag_to_list( &mca_pml_csum.non_existing_communicator_pending, - btl, hdr, segments, num_segments, NULL ); - return; - } - comm = (mca_pml_csum_comm_t *)comm_ptr->c_pml_comm; - - /* source sequence number */ - proc = &comm->procs[hdr->hdr_src]; - - /* We generate the MSG_ARRIVED event as soon as the PML is aware - * of a matching fragment arrival. Independing if it is received - * on the correct order or not. This will allow the tools to - * figure out if the messages are not received in the correct - * order (if multiple network interfaces). - */ - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm_ptr, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - - /* get next expected message sequence number - if threaded - * run, lock to make sure that if another thread is processing - * a frag from the same message a match is made only once. - * Also, this prevents other posted receives (for a pair of - * end points) from being processed, and potentially "loosing" - * the fragment. - */ - OPAL_THREAD_LOCK(&comm->matching_lock); - - /* get sequence number of next message that can be processed */ - if(OPAL_UNLIKELY((((uint16_t) hdr->hdr_seq) != ((uint16_t) proc->expected_sequence)) || - (opal_list_get_size(&proc->frags_cant_match) > 0 ))) { - goto slow_path; - } - - /* This is the sequence number we were expecting, so we can try - * matching it to already posted receives. - */ - - /* We're now expecting the next sequence number. */ - proc->expected_sequence++; - - /* We generate the SEARCH_POSTED_QUEUE only when the message is - * received in the correct sequence. Otherwise, we delay the event - * generation until we reach the correct sequence number. - */ - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_BEGIN, comm_ptr, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - - match = match_one(btl, hdr, segments, num_segments, comm_ptr, proc, NULL); - - /* The match is over. We generate the SEARCH_POSTED_Q_END here, - * before going into the mca_pml_csum_check_cantmatch_for_match so - * we can make a difference for the searching time for all - * messages. - */ - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - - /* release matching lock before processing fragment */ - OPAL_THREAD_UNLOCK(&comm->matching_lock); - - if(OPAL_LIKELY(match)) { - bytes_received = segments->seg_len - OMPI_PML_CSUM_MATCH_HDR_LEN; - match->req_recv.req_bytes_packed = bytes_received; - - MCA_PML_CSUM_RECV_REQUEST_MATCHED(match, hdr); - if(match->req_bytes_expected > 0) { - struct iovec iov[MCA_BTL_DES_MAX_SEGMENTS]; - uint32_t iov_count = 1; - - /* - * Make user buffer accessable(defined) before unpacking. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - match->req_recv.req_base.req_addr, - match->req_recv.req_base.req_count, - match->req_recv.req_base.req_datatype); - ); - - iov[0].iov_len = bytes_received; - iov[0].iov_base = (IOVBASE_TYPE*)((unsigned char*)segments->seg_addr.pval + - OMPI_PML_CSUM_MATCH_HDR_LEN); - while (iov_count < num_segments) { - bytes_received += segments[iov_count].seg_len; - iov[iov_count].iov_len = segments[iov_count].seg_len; - iov[iov_count].iov_base = (IOVBASE_TYPE*)((unsigned char*)segments[iov_count].seg_addr.pval); - iov_count++; - } - opal_convertor_unpack( &match->req_recv.req_base.req_convertor, - iov, - &iov_count, - &bytes_received ); - match->req_bytes_received = bytes_received; - /* - * Unpacking finished, make the user buffer unaccessable again. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - match->req_recv.req_base.req_addr, - match->req_recv.req_base.req_count, - match->req_recv.req_base.req_datatype); - ); - } - if (bytes_received > 0) { - csum_data = match->req_recv.req_base.req_convertor.checksum; - - OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, - "%s Received \'match\' with data csum:0x%x, header csum:0x%04x, size:%lu\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), hdr->hdr_csum, csum_received, (unsigned long)bytes_received)); - - if (csum_data != hdr->hdr_csum) { - opal_output(0, "%s:%s:%d: Invalid \'match data\' - received csum:0x%x != computed csum:0x%x\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_csum, csum_data); - dump_csum_error_data(segments, num_segments); - ompi_rte_abort(-1,NULL); - } - } - - /* no need to check if complete we know we are.. */ - /* don't need a rmb as that is for checking */ - recv_request_pml_complete(match); - } - return; - - slow_path: - OPAL_THREAD_UNLOCK(&comm->matching_lock); - mca_pml_csum_recv_frag_match(btl, hdr, segments, - num_segments, MCA_PML_CSUM_HDR_TYPE_MATCH); -} - - -void mca_pml_csum_recv_frag_callback_rndv(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) -{ - mca_btl_base_segment_t* segments = des->des_dst; - mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval; - uint16_t csum_received, csum; - - if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) { - return; - } - csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_RNDV); - - csum_received = hdr->hdr_common.hdr_csum; - hdr->hdr_common.hdr_csum = 0; -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - hdr->hdr_common.hdr_flags &= ~MCA_PML_CSUM_HDR_FLAGS_NBO; -#endif - csum = opal_csum16(hdr, sizeof(mca_pml_csum_rendezvous_hdr_t)); - hdr->hdr_common.hdr_csum = csum_received; - if (csum_received != csum) { - opal_output(0, "%s:%s:%d: Invalid \'rndv header\' - received csum:0x%04x != computed csum:0x%04x\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum); - dump_csum_error_data(segments, 1); - ompi_rte_abort(-1,NULL); - } - - mca_pml_csum_recv_frag_match(btl, &hdr->hdr_match, segments, - des->des_dst_cnt, MCA_PML_CSUM_HDR_TYPE_RNDV); - return; -} - -void mca_pml_csum_recv_frag_callback_rget(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) -{ - mca_btl_base_segment_t* segments = des->des_dst; - mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval; - - if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) { - return; - } - csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_RGET); - mca_pml_csum_recv_frag_match(btl, &hdr->hdr_match, segments, - des->des_dst_cnt, MCA_PML_CSUM_HDR_TYPE_RGET); - return; -} - - - -void mca_pml_csum_recv_frag_callback_ack(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) -{ - mca_btl_base_segment_t* segments = des->des_dst; - mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval; - mca_pml_csum_send_request_t* sendreq; - uint16_t csum_received, csum; - - if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) { - return; - } - - csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_ACK); - - csum_received = hdr->hdr_common.hdr_csum; - hdr->hdr_common.hdr_csum = 0; -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - hdr->hdr_common.hdr_flags &= ~MCA_PML_CSUM_HDR_FLAGS_NBO; -#endif - csum = opal_csum16(hdr, sizeof(mca_pml_csum_ack_hdr_t)); - hdr->hdr_common.hdr_csum = csum_received; - OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, - "%s Received \'ACK\' with header csum:0x%04x\n", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), csum)); - if (csum_received != csum) { - opal_output(0, "%s:%s:%d: Invalid \'ACK header\' - received csum:0x%04x != computed csum:0x%04x\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum); - dump_csum_error_data(segments, 1); - ompi_rte_abort(-1,NULL); - } - - sendreq = (mca_pml_csum_send_request_t*)hdr->hdr_ack.hdr_src_req.pval; - sendreq->req_recv = hdr->hdr_ack.hdr_dst_req; - - /* if the request should be delivered entirely by copy in/out - * then throttle sends */ - if(hdr->hdr_common.hdr_flags & MCA_PML_CSUM_HDR_FLAGS_NORDMA) - sendreq->req_throttle_sends = true; - - mca_pml_csum_send_request_copy_in_out(sendreq, - hdr->hdr_ack.hdr_send_offset, - sendreq->req_send.req_bytes_packed - - hdr->hdr_ack.hdr_send_offset); - - if (sendreq->req_state != 0) { - /* Typical receipt of an ACK message causes req_state to be - * decremented. However, a send request that started as an - * RGET request can become a RNDV. For example, when the - * receiver determines that its receive buffer is not - * contiguous and therefore cannot support the RGET - * protocol. A send request that started with the RGET - * protocol has req_state == 0 and as such should not be - * decremented. - */ - OPAL_THREAD_ADD32(&sendreq->req_state, -1); - } - - if(send_request_pml_complete_check(sendreq) == false) - mca_pml_csum_send_request_schedule(sendreq); - - return; -} - -void mca_pml_csum_recv_frag_callback_frag(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) { - mca_btl_base_segment_t* segments = des->des_dst; - mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval; - mca_pml_csum_recv_request_t* recvreq; - uint16_t csum_received, csum; - - if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) { - return; - } - csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_FRAG); - - csum_received = hdr->hdr_common.hdr_csum; - hdr->hdr_common.hdr_csum = 0; -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - hdr->hdr_common.hdr_flags &= ~MCA_PML_CSUM_HDR_FLAGS_NBO; -#endif - csum = opal_csum16(hdr, sizeof(mca_pml_csum_frag_hdr_t)); - hdr->hdr_common.hdr_csum = csum_received; - if(csum_received != csum) { - opal_output(0, "%s:%s:%d: Invalid \'frag header\' - received csum:0x%04x != computed csum:0x%04x\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum); - dump_csum_error_data(segments, 1); - ompi_rte_abort(-1,NULL); - } - - recvreq = (mca_pml_csum_recv_request_t*)hdr->hdr_frag.hdr_dst_req.pval; - mca_pml_csum_recv_request_progress_frag(recvreq,btl,segments,des->des_dst_cnt); - - return; -} - - -void mca_pml_csum_recv_frag_callback_put(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) { - mca_btl_base_segment_t* segments = des->des_dst; - mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval; - mca_pml_csum_send_request_t* sendreq; - uint16_t csum_received, csum; - - if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) { - return; - } - - csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_PUT); - - csum_received = hdr->hdr_common.hdr_csum; - hdr->hdr_common.hdr_csum = 0; -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - hdr->hdr_common.hdr_flags &= ~MCA_PML_CSUM_HDR_FLAGS_NBO; -#endif - csum = opal_csum16(hdr, sizeof(mca_pml_csum_rdma_hdr_t)); - hdr->hdr_common.hdr_csum = csum_received; - OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, - "%s Received \'PUT\' with header csum:0x%04x\n", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), csum)); - if(csum_received != csum) { - opal_output(0, "%s:%s:%d: Invalid \'PUT header\' - received csum:0x%04x != computed csum:0x%04x\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum); - dump_csum_error_data(segments, 1); - ompi_rte_abort(-1,NULL); - } - - sendreq = (mca_pml_csum_send_request_t*)hdr->hdr_rdma.hdr_req.pval; - mca_pml_csum_send_request_put(sendreq,btl,&hdr->hdr_rdma); - - return; -} - - -void mca_pml_csum_recv_frag_callback_fin(mca_btl_base_module_t* btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* des, - void* cbdata ) { - mca_btl_base_segment_t* segments = des->des_dst; - mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval; - mca_btl_base_descriptor_t* rdma; - uint16_t csum_received, csum; - - if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) { - return; - } - - csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_FIN); - - csum_received = hdr->hdr_common.hdr_csum; - hdr->hdr_common.hdr_csum = 0; -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - hdr->hdr_common.hdr_flags &= ~MCA_PML_CSUM_HDR_FLAGS_NBO; -#endif - csum = opal_csum16(hdr, sizeof(mca_pml_csum_fin_hdr_t)); - hdr->hdr_common.hdr_csum = csum_received; - OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, - "%s Received \'FIN\' with header csum:0x%04x\n",OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),csum)); - if(csum_received != csum) { - opal_output(0, "%s:%s:%d: Invalid \'FIN header\' - received csum:0x%04x != computed csum:0x%04x\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum); - dump_csum_error_data(segments, 1); - ompi_rte_abort(-1,NULL); - } - - rdma = (mca_btl_base_descriptor_t*)hdr->hdr_fin.hdr_des.pval; - rdma->des_cbfunc(btl, NULL, rdma, - hdr->hdr_fin.hdr_fail ? OMPI_ERROR : OMPI_SUCCESS); - - return; -} - - - -#define PML_MAX_SEQ ~((mca_pml_sequence_t)0); - -static inline mca_pml_csum_recv_request_t* get_posted_recv(opal_list_t *queue) -{ - if(opal_list_get_size(queue) == 0) - return NULL; - - return (mca_pml_csum_recv_request_t*)opal_list_get_first(queue); -} - -static inline mca_pml_csum_recv_request_t* get_next_posted_recv( - opal_list_t *queue, - mca_pml_csum_recv_request_t* req) -{ - opal_list_item_t *i = opal_list_get_next((opal_list_item_t*)req); - - if(opal_list_get_end(queue) == i) - return NULL; - - return (mca_pml_csum_recv_request_t*)i; -} - -static mca_pml_csum_recv_request_t *match_incomming( - mca_pml_csum_match_hdr_t *hdr, mca_pml_csum_comm_t *comm, - mca_pml_csum_comm_proc_t *proc) -{ - mca_pml_csum_recv_request_t *specific_recv, *wild_recv; - mca_pml_sequence_t wild_recv_seq, specific_recv_seq; - int tag = hdr->hdr_tag; - - specific_recv = get_posted_recv(&proc->specific_receives); - wild_recv = get_posted_recv(&comm->wild_receives); - - wild_recv_seq = wild_recv ? - wild_recv->req_recv.req_base.req_sequence : PML_MAX_SEQ; - specific_recv_seq = specific_recv ? - specific_recv->req_recv.req_base.req_sequence : PML_MAX_SEQ; - - /* they are equal only if both are PML_MAX_SEQ */ - while(wild_recv_seq != specific_recv_seq) { - mca_pml_csum_recv_request_t **match; - opal_list_t *queue; - int req_tag; - mca_pml_sequence_t *seq; - - if (OPAL_UNLIKELY(wild_recv_seq < specific_recv_seq)) { - match = &wild_recv; - queue = &comm->wild_receives; - seq = &wild_recv_seq; - } else { - match = &specific_recv; - queue = &proc->specific_receives; - seq = &specific_recv_seq; - } - - req_tag = (*match)->req_recv.req_base.req_tag; - if(req_tag == tag || (req_tag == OMPI_ANY_TAG && tag >= 0)) { - opal_list_remove_item(queue, (opal_list_item_t*)(*match)); - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, - &((*match)->req_recv.req_base), PERUSE_RECV); - return *match; - } - - *match = get_next_posted_recv(queue, *match); - *seq = (*match) ? (*match)->req_recv.req_base.req_sequence : PML_MAX_SEQ; - } - - return NULL; -} - -static mca_pml_csum_recv_request_t* -match_one(mca_btl_base_module_t *btl, - mca_pml_csum_match_hdr_t *hdr, mca_btl_base_segment_t* segments, - size_t num_segments, ompi_communicator_t *comm_ptr, - mca_pml_csum_comm_proc_t *proc, - mca_pml_csum_recv_frag_t* frag) -{ - mca_pml_csum_recv_request_t *match; - mca_pml_csum_comm_t *comm = (mca_pml_csum_comm_t *)comm_ptr->c_pml_comm; - - do { - match = match_incomming(hdr, comm, proc); - - /* if match found, process data */ - if(OPAL_LIKELY(NULL != match)) { - match->req_recv.req_base.req_proc = proc->ompi_proc; - - if(OPAL_UNLIKELY(MCA_PML_REQUEST_PROBE == match->req_recv.req_base.req_type)) { - /* complete the probe */ - mca_pml_csum_recv_request_matched_probe(match, btl, segments, - num_segments); - /* attempt to match actual request */ - continue; - } - - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_MSG_MATCH_POSTED_REQ, - &(match->req_recv.req_base), PERUSE_RECV); - return match; - } - - /* if no match found, place on unexpected queue */ - append_frag_to_list(&proc->unexpected_frags, btl, hdr, segments, - num_segments, frag); - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm_ptr, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - return NULL; - } while(true); -} - -static mca_pml_csum_recv_frag_t* check_cantmatch_for_match(mca_pml_csum_comm_proc_t *proc) -{ - mca_pml_csum_recv_frag_t *frag; - - /* search the list for a fragment from the send with sequence - * number next_msg_seq_expected - */ - for(frag = (mca_pml_csum_recv_frag_t*)opal_list_get_first(&proc->frags_cant_match); - frag != (mca_pml_csum_recv_frag_t*)opal_list_get_end(&proc->frags_cant_match); - frag = (mca_pml_csum_recv_frag_t*)opal_list_get_next(frag)) - { - mca_pml_csum_match_hdr_t* hdr = &frag->hdr.hdr_match; - /* - * If the message has the next expected seq from that proc... - */ - if(hdr->hdr_seq != proc->expected_sequence) - continue; - - opal_list_remove_item(&proc->frags_cant_match, (opal_list_item_t*)frag); - return frag; - } - - return NULL; -} - -/** - * RCS/CTS receive side matching - * - * @param hdr list of parameters needed for matching - * This list is also embeded in frag, - * but this allows to save a memory copy when - * a match is made in this routine. (IN) - * @param frag pointer to receive fragment which we want - * to match (IN/OUT). If a match is not made, - * hdr is copied to frag. - * @param match_made parameter indicating if we matched frag/ - * hdr (OUT) - * @param additional_matches if a match is made with frag, we - * may be able to match fragments that previously - * have arrived out-of-order. If this is the - * case, the associated fragment descriptors are - * put on this list for further processing. (OUT) - * - * @return OMPI error code - * - * This routine is used to try and match a newly arrived message fragment - * to pre-posted receives. The following assumptions are made - * - fragments are received out of order - * - for long messages, e.g. more than one fragment, a RTS/CTS algorithm - * is used. - * - 2nd and greater fragments include a receive descriptor pointer - * - fragments may be dropped - * - fragments may be corrupt - * - this routine may be called simultaneously by more than one thread - */ -static int mca_pml_csum_recv_frag_match( mca_btl_base_module_t *btl, - mca_pml_csum_match_hdr_t *hdr, - mca_btl_base_segment_t* segments, - size_t num_segments, - int type) -{ - /* local variables */ - uint16_t next_msg_seq_expected, frag_msg_seq; - ompi_communicator_t *comm_ptr; - mca_pml_csum_recv_request_t *match = NULL; - mca_pml_csum_comm_t *comm; - mca_pml_csum_comm_proc_t *proc; - mca_pml_csum_recv_frag_t* frag = NULL; - - /* communicator pointer */ - comm_ptr = ompi_comm_lookup(hdr->hdr_ctx); - if(OPAL_UNLIKELY(NULL == comm_ptr)) { - /* This is a special case. A message for a not yet existing - * communicator can happens. Instead of doing a matching we - * will temporarily add it the a pending queue in the PML. - * Later on, when the communicator is completely instantiated, - * this pending queue will be searched and all matching fragments - * moved to the right communicator. - */ - append_frag_to_list( &mca_pml_csum.non_existing_communicator_pending, - btl, hdr, segments, num_segments, NULL ); - return OMPI_SUCCESS; - } - comm = (mca_pml_csum_comm_t *)comm_ptr->c_pml_comm; - - /* source sequence number */ - frag_msg_seq = hdr->hdr_seq; - proc = &comm->procs[hdr->hdr_src]; - - /** - * We generate the MSG_ARRIVED event as soon as the PML is aware of a matching - * fragment arrival. Independing if it is received on the correct order or not. - * This will allow the tools to figure out if the messages are not received in the - * correct order (if multiple network interfaces). - */ - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm_ptr, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - - /* get next expected message sequence number - if threaded - * run, lock to make sure that if another thread is processing - * a frag from the same message a match is made only once. - * Also, this prevents other posted receives (for a pair of - * end points) from being processed, and potentially "loosing" - * the fragment. - */ - OPAL_THREAD_LOCK(&comm->matching_lock); - - /* get sequence number of next message that can be processed */ - next_msg_seq_expected = (uint16_t)proc->expected_sequence; - if(OPAL_UNLIKELY(frag_msg_seq != next_msg_seq_expected)) - goto wrong_seq; - - /* - * This is the sequence number we were expecting, - * so we can try matching it to already posted - * receives. - */ - -out_of_order_match: - /* We're now expecting the next sequence number. */ - proc->expected_sequence++; - - /** - * We generate the SEARCH_POSTED_QUEUE only when the message is received - * in the correct sequence. Otherwise, we delay the event generation until - * we reach the correct sequence number. - */ - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_BEGIN, comm_ptr, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - - match = match_one(btl, hdr, segments, num_segments, comm_ptr, proc, frag); - - /** - * The match is over. We generate the SEARCH_POSTED_Q_END here, before going - * into the mca_pml_csum_check_cantmatch_for_match so we can make a difference - * for the searching time for all messages. - */ - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr, - hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV); - - /* release matching lock before processing fragment */ - OPAL_THREAD_UNLOCK(&comm->matching_lock); - - if(OPAL_LIKELY(match)) { - switch(type) { - case MCA_PML_CSUM_HDR_TYPE_MATCH: - mca_pml_csum_recv_request_progress_match(match, btl, segments, num_segments); - break; - case MCA_PML_CSUM_HDR_TYPE_RNDV: - mca_pml_csum_recv_request_progress_rndv(match, btl, segments, num_segments); - break; - case MCA_PML_CSUM_HDR_TYPE_RGET: - mca_pml_csum_recv_request_progress_rget(match, btl, segments, num_segments); - break; - } - - if(OPAL_UNLIKELY(frag)) - MCA_PML_CSUM_RECV_FRAG_RETURN(frag); - } - - /* - * Now that new message has arrived, check to see if - * any fragments on the c_c_frags_cant_match list - * may now be used to form new matchs - */ - if(OPAL_UNLIKELY(opal_list_get_size(&proc->frags_cant_match) > 0)) { - OPAL_THREAD_LOCK(&comm->matching_lock); - if((frag = check_cantmatch_for_match(proc))) { - hdr = &frag->hdr.hdr_match; - segments = frag->segments; - num_segments = frag->num_segments; - btl = frag->btl; - type = hdr->hdr_common.hdr_type; - goto out_of_order_match; - } - OPAL_THREAD_UNLOCK(&comm->matching_lock); - } - - return OMPI_SUCCESS; -wrong_seq: - /* - * This message comes after the next expected, so it - * is ahead of sequence. Save it for later. - */ - append_frag_to_list(&proc->frags_cant_match, btl, hdr, segments, - num_segments, NULL); - OPAL_THREAD_UNLOCK(&comm->matching_lock); - return OMPI_SUCCESS; -} diff --git a/ompi/mca/pml/csum/pml_csum_recvfrag.h b/ompi/mca/pml/csum/pml_csum_recvfrag.h deleted file mode 100644 index eab63c3ed5..0000000000 --- a/ompi/mca/pml/csum/pml_csum_recvfrag.h +++ /dev/null @@ -1,175 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2009 IBM Corporation. All rights reserved. - * Copyright (c) 2009 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ - -#ifndef MCA_PML_CSUM_RECVFRAG_H -#define MCA_PML_CSUM_RECVFRAG_H - -#include "ompi/mca/btl/btl.h" -#include "pml_csum_hdr.h" - -BEGIN_C_DECLS - -struct mca_pml_csum_buffer_t { - size_t len; - void * addr; -}; -typedef struct mca_pml_csum_buffer_t mca_pml_csum_buffer_t; - - -struct mca_pml_csum_recv_frag_t { - ompi_free_list_item_t super; - mca_pml_csum_hdr_t hdr; - size_t num_segments; - mca_btl_base_module_t* btl; - mca_btl_base_segment_t segments[MCA_BTL_DES_MAX_SEGMENTS]; - mca_pml_csum_buffer_t buffers[MCA_BTL_DES_MAX_SEGMENTS]; - unsigned char addr[1]; -}; -typedef struct mca_pml_csum_recv_frag_t mca_pml_csum_recv_frag_t; - -OBJ_CLASS_DECLARATION(mca_pml_csum_recv_frag_t); - - -#define MCA_PML_CSUM_RECV_FRAG_ALLOC(frag,rc) \ -do { \ - ompi_free_list_item_t* item; \ - OMPI_FREE_LIST_WAIT(&mca_pml_csum.recv_frags, item, rc); \ - frag = (mca_pml_csum_recv_frag_t*)item; \ -} while(0) - - -#define MCA_PML_CSUM_RECV_FRAG_INIT(frag, hdr, segs, cnt, btl ) \ -do { \ - size_t i, _size; \ - mca_btl_base_segment_t* macro_segments = frag->segments; \ - mca_pml_csum_buffer_t* buffers = frag->buffers; \ - unsigned char* _ptr = (unsigned char*)frag->addr; \ - /* init recv_frag */ \ - frag->btl = btl; \ - frag->hdr = *(mca_pml_csum_hdr_t*)hdr; \ - frag->num_segments = 1; \ - _size = segs[0].seg_len; \ - for( i = 1; i < cnt; i++ ) { \ - _size += segs[i].seg_len; \ - } \ - /* copy over data */ \ - if(_size <= mca_pml_csum.unexpected_limit ) { \ - macro_segments[0].seg_addr.pval = frag->addr; \ - } else { \ - buffers[0].len = _size; \ - buffers[0].addr = (char*) \ - mca_pml_csum.allocator->alc_alloc( mca_pml_csum.allocator, \ - buffers[0].len, \ - 0, NULL); \ - _ptr = (unsigned char*)(buffers[0].addr); \ - macro_segments[0].seg_addr.pval = buffers[0].addr; \ - } \ - macro_segments[0].seg_len = _size; \ - for( i = 0; i < cnt; i++ ) { \ - memcpy( _ptr, segs[i].seg_addr.pval, segs[i].seg_len); \ - _ptr += segs[i].seg_len; \ - } \ - } while(0) - - -#define MCA_PML_CSUM_RECV_FRAG_RETURN(frag) \ -do { \ - if( frag->segments[0].seg_len > mca_pml_csum.unexpected_limit ) { \ - /* return buffers */ \ - mca_pml_csum.allocator->alc_free( mca_pml_csum.allocator, \ - frag->buffers[0].addr ); \ - } \ - frag->num_segments = 0; \ - \ - /* return recv_frag */ \ - OMPI_FREE_LIST_RETURN(&mca_pml_csum.recv_frags, \ - (ompi_free_list_item_t*)frag); \ - } while(0) - - -/** - * Callback from BTL on receipt of a recv_frag (match). - */ - -extern void mca_pml_csum_recv_frag_callback_match( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); - -/** - * Callback from BTL on receipt of a recv_frag (rndv). - */ - -extern void mca_pml_csum_recv_frag_callback_rndv( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); -/** - * Callback from BTL on receipt of a recv_frag (rget). - */ - -extern void mca_pml_csum_recv_frag_callback_rget( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); - -/** - * Callback from BTL on receipt of a recv_frag (ack). - */ - -extern void mca_pml_csum_recv_frag_callback_ack( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); -/** - * Callback from BTL on receipt of a recv_frag (frag). - */ - -extern void mca_pml_csum_recv_frag_callback_frag( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); -/** - * Callback from BTL on receipt of a recv_frag (put). - */ - -extern void mca_pml_csum_recv_frag_callback_put( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); -/** - * Callback from BTL on receipt of a recv_frag (fin). - */ - -extern void mca_pml_csum_recv_frag_callback_fin( mca_btl_base_module_t *btl, - mca_btl_base_tag_t tag, - mca_btl_base_descriptor_t* descriptor, - void* cbdata ); - - -END_C_DECLS - -#endif - diff --git a/ompi/mca/pml/csum/pml_csum_recvreq.c b/ompi/mca/pml/csum/pml_csum_recvreq.c deleted file mode 100644 index a85356cb2b..0000000000 --- a/ompi/mca/pml/csum/pml_csum_recvreq.c +++ /dev/null @@ -1,1092 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2008 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2009 IBM Corporation. All rights reserved. - * Copyright (c) 2009-2012 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "opal/util/arch.h" -#include "opal/util/crc.h" -#include "opal/util/output.h" - -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/bml/bml.h" -#include "ompi/mca/btl/btl.h" -#include "ompi/mca/mpool/mpool.h" -#include "ompi/mca/bml/base/base.h" -#include "ompi/memchecker.h" -#include "ompi/mca/pml/base/base.h" - -#include "pml_csum_comm.h" -#include "pml_csum_recvreq.h" -#include "pml_csum_recvfrag.h" -#include "pml_csum_sendreq.h" -#include "pml_csum_rdmafrag.h" - -/** - * Dump data elements that caused a checksum violation - */ -static void dump_csum_error_data(mca_btl_base_segment_t* segments, size_t num_segments) -{ - size_t i, j; - uint8_t *data; - - printf("CHECKSUM ERROR DATA\n"); - for (i = 0; i < num_segments; ++i) { - printf("Segment %lu", (unsigned long)i); - data = (uint8_t*)segments[i].seg_addr.pval; - for (j=0; j < segments[i].seg_len; j++) { - if (0 == (j % 40)) { - printf("\n"); - } - printf("%02x ", data[j]); - }; - } - printf("\nEND CHECKSUM ERROR DATA\n\n"); -} - -void mca_pml_csum_recv_request_process_pending(void) -{ - mca_pml_csum_recv_request_t* recvreq; - int rc, i, s = (int)opal_list_get_size(&mca_pml_csum.recv_pending); - - for(i = 0; i < s; i++) { - OPAL_THREAD_LOCK(&mca_pml_csum.lock); - recvreq = (mca_pml_csum_recv_request_t*) - opal_list_remove_first(&mca_pml_csum.recv_pending); - OPAL_THREAD_UNLOCK(&mca_pml_csum.lock); - if( OPAL_UNLIKELY(NULL == recvreq) ) - break; - recvreq->req_pending = false; - rc = mca_pml_csum_recv_request_schedule_exclusive(recvreq, NULL); - if(OMPI_ERR_OUT_OF_RESOURCE == rc) - break; - } -} - -static int mca_pml_csum_recv_request_free(struct ompi_request_t** request) -{ - mca_pml_csum_recv_request_t* recvreq = *(mca_pml_csum_recv_request_t**)request; - - assert( false == recvreq->req_recv.req_base.req_free_called ); - - OPAL_THREAD_LOCK(&ompi_request_lock); - recvreq->req_recv.req_base.req_free_called = true; - - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_NOTIFY, - &(recvreq->req_recv.req_base), PERUSE_RECV ); - - if( true == recvreq->req_recv.req_base.req_pml_complete ) { - /* make buffer defined when the request is compeleted, - and before releasing the objects. */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - - MCA_PML_CSUM_RECV_REQUEST_RETURN( recvreq ); - } - - OPAL_THREAD_UNLOCK(&ompi_request_lock); - *request = MPI_REQUEST_NULL; - return OMPI_SUCCESS; -} - -static int mca_pml_csum_recv_request_cancel(struct ompi_request_t* ompi_request, int complete) -{ - mca_pml_csum_recv_request_t* request = (mca_pml_csum_recv_request_t*)ompi_request; - mca_pml_csum_comm_t* comm = request->req_recv.req_base.req_comm->c_pml_comm; - - if( true == ompi_request->req_complete ) { /* way to late to cancel this one */ - /* - * Receive request completed, make user buffer accessable. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - request->req_recv.req_base.req_addr, - request->req_recv.req_base.req_count, - request->req_recv.req_base.req_datatype); - ); - return OMPI_SUCCESS; - } - - /* The rest should be protected behind the match logic lock */ - OPAL_THREAD_LOCK(&comm->matching_lock); - if( OMPI_ANY_TAG == ompi_request->req_status.MPI_TAG ) { /* the match has not been already done */ - if( request->req_recv.req_base.req_peer == OMPI_ANY_SOURCE ) { - opal_list_remove_item( &comm->wild_receives, (opal_list_item_t*)request ); - } else { - mca_pml_csum_comm_proc_t* proc = comm->procs + request->req_recv.req_base.req_peer; - opal_list_remove_item(&proc->specific_receives, (opal_list_item_t*)request); - } - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q, - &(request->req_recv.req_base), PERUSE_RECV ); - /** - * As now the PML is done with this request we have to force the pml_complete - * to true. Otherwise, the request will never be freed. - */ - request->req_recv.req_base.req_pml_complete = true; - } - OPAL_THREAD_UNLOCK(&comm->matching_lock); - - OPAL_THREAD_LOCK(&ompi_request_lock); - ompi_request->req_status._cancelled = true; - /* This macro will set the req_complete to true so the MPI Test/Wait* functions - * on this request will be able to complete. As the status is marked as - * cancelled the cancel state will be detected. - */ - MCA_PML_CSUM_RECV_REQUEST_MPI_COMPLETE(request); - OPAL_THREAD_UNLOCK(&ompi_request_lock); - /* - * Receive request cancelled, make user buffer accessable. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - request->req_recv.req_base.req_addr, - request->req_recv.req_base.req_count, - request->req_recv.req_base.req_datatype); - ); - return OMPI_SUCCESS; -} - -static void mca_pml_csum_recv_request_construct(mca_pml_csum_recv_request_t* request) -{ - request->req_recv.req_base.req_type = MCA_PML_REQUEST_RECV; - request->req_recv.req_base.req_ompi.req_free = mca_pml_csum_recv_request_free; - request->req_recv.req_base.req_ompi.req_cancel = mca_pml_csum_recv_request_cancel; - request->req_rdma_cnt = 0; - OBJ_CONSTRUCT(&request->lock, opal_mutex_t); -} - -OBJ_CLASS_INSTANCE( - mca_pml_csum_recv_request_t, - mca_pml_base_recv_request_t, - mca_pml_csum_recv_request_construct, - NULL); - - -/* - * Release resources. - */ - -static void mca_pml_csum_recv_ctl_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context; - - MCA_PML_CSUM_PROGRESS_PENDING(bml_btl); -} - -/* - * Put operation has completed remotely - update request status - */ - -static void mca_pml_csum_put_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context; - mca_pml_csum_recv_request_t* recvreq = (mca_pml_csum_recv_request_t*)des->des_cbdata; - size_t bytes_received = 0; - - if( OPAL_LIKELY(status == OMPI_SUCCESS) ) { - bytes_received = mca_pml_csum_compute_segment_length (btl->btl_seg_size, - (void *) des->des_dst, - des->des_dst_cnt, 0); - } - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth,-1); - - mca_bml_base_free(bml_btl, des); - - /* check completion status */ - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); - if(recv_request_pml_complete_check(recvreq) == false && - recvreq->req_rdma_offset < recvreq->req_send_offset) { - /* schedule additional rdma operations */ - mca_pml_csum_recv_request_schedule(recvreq, bml_btl); - } - MCA_PML_CSUM_PROGRESS_PENDING(bml_btl); -} - -/* - * - */ - -int mca_pml_csum_recv_request_ack_send_btl( - ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl, - uint64_t hdr_src_req, void *hdr_dst_req, uint64_t hdr_send_offset, - bool nordma) -{ - mca_btl_base_descriptor_t* des; - mca_pml_csum_ack_hdr_t* ack; - int rc; - - /* allocate descriptor */ - mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, - sizeof(mca_pml_csum_ack_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK); - if( OPAL_UNLIKELY(NULL == des) ) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* fill out header */ - ack = (mca_pml_csum_ack_hdr_t*)des->des_src->seg_addr.pval; - ack->hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_ACK; - ack->hdr_common.hdr_flags = nordma ? MCA_PML_CSUM_HDR_FLAGS_NORDMA : 0; - ack->hdr_common.hdr_csum = 0; - ack->hdr_src_req.lval = hdr_src_req; - ack->hdr_dst_req.pval = hdr_dst_req; - ack->hdr_send_offset = hdr_send_offset; - ack->hdr_common.hdr_csum = opal_csum16(ack, sizeof(mca_pml_csum_ack_hdr_t)); - - OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, - "%s Sending \'ACK\' with header csum:0x%04x\n", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), ack->hdr_common.hdr_csum)); - - csum_hdr_hton(ack, MCA_PML_CSUM_HDR_TYPE_ACK, proc); - - /* initialize descriptor */ - des->des_cbfunc = mca_pml_csum_recv_ctl_completion; - - rc = mca_bml_base_send(bml_btl, des, MCA_PML_CSUM_HDR_TYPE_ACK); - if( OPAL_LIKELY( rc >= 0 ) ) { - return OMPI_SUCCESS; - } - mca_bml_base_free(bml_btl, des); - return OMPI_ERR_OUT_OF_RESOURCE; -} - -static int mca_pml_csum_recv_request_ack( - mca_pml_csum_recv_request_t* recvreq, - mca_pml_csum_rendezvous_hdr_t* hdr, - size_t bytes_received) -{ - ompi_proc_t* proc = (ompi_proc_t*)recvreq->req_recv.req_base.req_proc; - mca_bml_base_endpoint_t* bml_endpoint = NULL; - - bml_endpoint = (mca_bml_base_endpoint_t*) proc->proc_bml; - - /* by default copy everything */ - recvreq->req_send_offset = bytes_received; - if(hdr->hdr_msg_length > bytes_received) { - size_t rdma_num = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma); - /* - * lookup request buffer to determine if memory is already - * registered. - */ - - if(opal_convertor_need_buffers(&recvreq->req_recv.req_base.req_convertor) == 0 && - hdr->hdr_match.hdr_common.hdr_flags & MCA_PML_CSUM_HDR_FLAGS_CONTIG && - rdma_num != 0) { - unsigned char *base; - opal_convertor_get_current_pointer( &recvreq->req_recv.req_base.req_convertor, (void**)&(base) ); - - if(hdr->hdr_match.hdr_common.hdr_flags & MCA_PML_CSUM_HDR_FLAGS_PIN) - recvreq->req_rdma_cnt = mca_pml_csum_rdma_btls(bml_endpoint, - base, recvreq->req_recv.req_bytes_packed, - recvreq->req_rdma ); - else - recvreq->req_rdma_cnt = 0; - - /* memory is already registered on both sides */ - if (recvreq->req_rdma_cnt != 0) { - recvreq->req_send_offset = hdr->hdr_msg_length; - /* are rdma devices available for long rdma protocol */ - } else if(bml_endpoint->btl_send_limit < hdr->hdr_msg_length) { - /* use convertor to figure out the rdma offset for this request */ - recvreq->req_send_offset = hdr->hdr_msg_length - - bml_endpoint->btl_pipeline_send_length; - - if(recvreq->req_send_offset < bytes_received) - recvreq->req_send_offset = bytes_received; - - /* use converter to figure out the rdma offset for this - * request */ - opal_convertor_set_position(&recvreq->req_recv.req_base.req_convertor, - &recvreq->req_send_offset); - - recvreq->req_rdma_cnt = - mca_pml_csum_rdma_pipeline_btls(bml_endpoint, - recvreq->req_send_offset - bytes_received, - recvreq->req_rdma); - } - } - /* nothing to send by copy in/out - no need to ack */ - if(recvreq->req_send_offset == hdr->hdr_msg_length) - return OMPI_SUCCESS; - } - /* let know to shedule function there is no need to put ACK flag */ - recvreq->req_ack_sent = true; - return mca_pml_csum_recv_request_ack_send(proc, hdr->hdr_src_req.lval, - recvreq, recvreq->req_send_offset, - recvreq->req_send_offset == bytes_received); -} - -/** - * Return resources used by the RDMA - */ - -static void mca_pml_csum_rget_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context; - mca_pml_csum_rdma_frag_t* frag = (mca_pml_csum_rdma_frag_t*)des->des_cbdata; - mca_pml_csum_recv_request_t* recvreq = (mca_pml_csum_recv_request_t*)frag->rdma_req; - - /* check completion status */ - if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { - /* TSW - FIX */ - OMPI_ERROR_LOG(status); - ompi_rte_abort(-1, NULL); - } - - mca_pml_csum_send_fin(recvreq->req_recv.req_base.req_proc, - bml_btl, - frag->rdma_hdr.hdr_rget.hdr_des, - des->order, 0); - - /* is receive request complete */ - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length); - recv_request_pml_complete_check(recvreq); - - MCA_PML_CSUM_RDMA_FRAG_RETURN(frag); - - MCA_PML_CSUM_PROGRESS_PENDING(bml_btl); -} - - -/* - * - */ -int mca_pml_csum_recv_request_get_frag( mca_pml_csum_rdma_frag_t* frag ) -{ - mca_pml_csum_recv_request_t* recvreq = (mca_pml_csum_recv_request_t*)frag->rdma_req; - mca_bml_base_btl_t* bml_btl = frag->rdma_bml; - mca_btl_base_descriptor_t* descriptor; - size_t save_size = frag->rdma_length; - int rc; - - /* prepare descriptor */ - mca_bml_base_prepare_dst( bml_btl, - NULL, - &recvreq->req_recv.req_base.req_convertor, - MCA_BTL_NO_ORDER, - 0, - &frag->rdma_length, - MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK | - MCA_BTL_DES_FLAGS_GET, - &descriptor ); - if( OPAL_UNLIKELY(NULL == descriptor) ) { - frag->rdma_length = save_size; - OPAL_THREAD_LOCK(&mca_pml_csum.lock); - opal_list_append(&mca_pml_csum.rdma_pending, (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&mca_pml_csum.lock); - return OMPI_ERR_OUT_OF_RESOURCE; - } - - descriptor->des_src = (mca_btl_base_segment_t *) frag->rdma_segs; - descriptor->des_src_cnt = frag->rdma_hdr.hdr_rdma.hdr_seg_cnt; - descriptor->des_cbfunc = mca_pml_csum_rget_completion; - descriptor->des_cbdata = frag; - - PERUSE_TRACE_COMM_OMPI_EVENT(PERUSE_COMM_REQ_XFER_CONTINUE, - &(recvreq->req_recv.req_base), - frag->rdma_length, PERUSE_RECV); - - /* queue up get request */ - rc = mca_bml_base_get(bml_btl,descriptor); - if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) { - if(OMPI_ERR_OUT_OF_RESOURCE == rc) { - mca_bml_base_free(bml_btl, descriptor); - OPAL_THREAD_LOCK(&mca_pml_csum.lock); - opal_list_append(&mca_pml_csum.rdma_pending, - (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&mca_pml_csum.lock); - return OMPI_ERR_OUT_OF_RESOURCE; - } else { - OMPI_ERROR_LOG(rc); - ompi_rte_abort(-1, NULL); - } - } - - return OMPI_SUCCESS; -} - - - - -/* - * Update the recv request status to reflect the number of bytes - * received and actually delivered to the application. - */ - -void mca_pml_csum_recv_request_progress_frag( mca_pml_csum_recv_request_t* recvreq, - mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments ) -{ - size_t bytes_received, data_offset = 0; - size_t bytes_delivered __opal_attribute_unused__; /* is being set to zero in MCA_PML_CSUM_RECV_REQUEST_UNPACK */ - mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval; - uint32_t csum = OPAL_CSUM_ZERO; - - bytes_received = mca_pml_csum_compute_segment_length_base (segments, num_segments, - sizeof(mca_pml_csum_frag_hdr_t)); - data_offset = hdr->hdr_frag.hdr_frag_offset; - /* - * Make user buffer accessable(defined) before unpacking. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - MCA_PML_CSUM_RECV_REQUEST_UNPACK( recvreq, - segments, - num_segments, - sizeof(mca_pml_csum_frag_hdr_t), - data_offset, - bytes_received, - bytes_delivered ); - /* - * Unpacking finished, make the user buffer unaccessable again. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - - if (bytes_received > 0) { - csum = recvreq->req_recv.req_base.req_convertor.checksum; - OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, - "%s Received \'frag\' with data csum:0x%x, frag csum:0x%04x, size:%lu\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), csum, hdr->hdr_frag.hdr_csum, (unsigned long)bytes_received)); - if(csum != hdr->hdr_frag.hdr_csum) { - opal_output(0, "%s:%s:%d: Invalid \'frag data\' - received csum:0x%x != computed csum:0x%x\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_frag.hdr_csum, csum); - dump_csum_error_data(segments, num_segments); - ompi_rte_abort(-1,NULL); - } - } - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); - /* check completion status */ - if(recv_request_pml_complete_check(recvreq) == false && - recvreq->req_rdma_offset < recvreq->req_send_offset) { - /* schedule additional rdma operations */ - mca_pml_csum_recv_request_schedule(recvreq, NULL); - } -} - -/* - * Update the recv request status to reflect the number of bytes - * received and actually delivered to the application. - */ - -void mca_pml_csum_recv_request_progress_rget( mca_pml_csum_recv_request_t* recvreq, - mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments ) -{ - mca_pml_csum_rget_hdr_t* hdr = (mca_pml_csum_rget_hdr_t*)segments->seg_addr.pval; - mca_bml_base_endpoint_t* bml_endpoint = NULL; - mca_pml_csum_rdma_frag_t* frag; - size_t i, size = 0; - int rc; - - recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length; - - MCA_PML_CSUM_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_rndv.hdr_match); - - /* if receive buffer is not contiguous we can't just RDMA read into it, so - * fall back to copy in/out protocol. It is a pity because buffer on the - * sender side is already registered. We need to be smarter here, perhaps - * do couple of RDMA reads */ - if(opal_convertor_need_buffers(&recvreq->req_recv.req_base.req_convertor) == true) { - mca_pml_csum_recv_request_ack(recvreq, &hdr->hdr_rndv, 0); - return; - } - - MCA_PML_CSUM_RDMA_FRAG_ALLOC(frag,rc); - if( OPAL_UNLIKELY(NULL == frag) ) { - /* GLB - FIX */ - OMPI_ERROR_LOG(rc); - ompi_rte_abort(-1, NULL); - } - - /* lookup bml datastructures */ - bml_endpoint = (mca_bml_base_endpoint_t*)recvreq->req_recv.req_base.req_proc->proc_bml; - - assert (btl->btl_seg_size * hdr->hdr_seg_cnt <= sizeof (frag->rdma_segs)); - - /* allocate/initialize a fragment */ - memmove (frag->rdma_segs, hdr + 1, btl->btl_seg_size * hdr->hdr_seg_cnt); - - for(i = 0; i < hdr->hdr_seg_cnt; i++) { - mca_btl_base_segment_t *seg = (mca_btl_base_segment_t *)(frag->rdma_segs + i * btl->btl_seg_size); - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - if ((recvreq->req_recv.req_base.req_proc->proc_arch & OPAL_ARCH_ISBIGENDIAN) != - (ompi_proc_local()->proc_arch & OPAL_ARCH_ISBIGENDIAN)) { - size += opal_swap_bytes4(seg->seg_len); - } else -#endif - { - size += seg->seg_len; - } - } - frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl); - if( OPAL_UNLIKELY(NULL == frag->rdma_bml) ) { - opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - frag->rdma_hdr.hdr_rget = *hdr; - frag->rdma_req = recvreq; - frag->rdma_ep = bml_endpoint; - frag->rdma_length = size; - frag->rdma_state = MCA_PML_CSUM_RDMA_GET; - frag->reg = NULL; - - mca_pml_csum_recv_request_get_frag(frag); - return; -} - -/* - * Update the recv request status to reflect the number of bytes - * received and actually delivered to the application. - */ - -void mca_pml_csum_recv_request_progress_rndv( mca_pml_csum_recv_request_t* recvreq, - mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments ) -{ - size_t bytes_received; - size_t bytes_delivered __opal_attribute_unused__; /* is being set to zero in MCA_PML_CSUM_RECV_REQUEST_UNPACK */ - size_t data_offset = 0; - mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval; - uint32_t csum = OPAL_CSUM_ZERO; - - bytes_received = mca_pml_csum_compute_segment_length_base (segments, num_segments, - sizeof(mca_pml_csum_rendezvous_hdr_t)); - - recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length; - recvreq->remote_req_send = hdr->hdr_rndv.hdr_src_req; - recvreq->req_rdma_offset = bytes_received; - MCA_PML_CSUM_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_match); - mca_pml_csum_recv_request_ack(recvreq, &hdr->hdr_rndv, bytes_received); - /** - * The PUT protocol do not attach any data to the original request. - * Therefore, we might want to avoid unpacking if there is nothing to - * unpack. - */ - if( 0 < bytes_received ) { - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - MCA_PML_CSUM_RECV_REQUEST_UNPACK( recvreq, - segments, - num_segments, - sizeof(mca_pml_csum_rendezvous_hdr_t), - data_offset, - bytes_received, - bytes_delivered ); - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - - csum = recvreq->req_recv.req_base.req_convertor.checksum; - OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, - "%s Received \'rndv\' with csum:0x%x, header csum:0x%04x, size:%lu\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), csum, hdr->hdr_match.hdr_csum, (unsigned long)bytes_received)); - if (csum != hdr->hdr_match.hdr_csum) { - opal_output(0, "%s:%s:%d: Invalid \'rndv data\' - received csum:0x%x != computed csum:0x%x\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_match.hdr_csum, csum); - dump_csum_error_data(segments, num_segments); - ompi_rte_abort(-1,NULL); - } - } - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received); - /* check completion status */ - if(recv_request_pml_complete_check(recvreq) == false && - recvreq->req_rdma_offset < recvreq->req_send_offset) { - /* schedule additional rdma operations */ - mca_pml_csum_recv_request_schedule(recvreq, NULL); - } -} - -/* - * Update the recv request status to reflect the number of bytes - * received and actually delivered to the application. - */ -void mca_pml_csum_recv_request_progress_match( mca_pml_csum_recv_request_t* recvreq, - mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments ) -{ - size_t bytes_received, data_offset = 0; - size_t bytes_delivered __opal_attribute_unused__; /* is being set to zero in MCA_PML_CSUM_RECV_REQUEST_UNPACK */ - mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval; - uint32_t csum = OPAL_CSUM_ZERO; - - bytes_received = mca_pml_csum_compute_segment_length_base (segments, num_segments, - OMPI_PML_CSUM_MATCH_HDR_LEN); - - recvreq->req_recv.req_bytes_packed = bytes_received; - - MCA_PML_CSUM_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_match); - /* - * Make user buffer accessable(defined) before unpacking. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - MCA_PML_CSUM_RECV_REQUEST_UNPACK( recvreq, - segments, - num_segments, - OMPI_PML_CSUM_MATCH_HDR_LEN, - data_offset, - bytes_received, - bytes_delivered); - if (bytes_received > 0) { - csum = recvreq->req_recv.req_base.req_convertor.checksum; - OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, - "%s Received \'match\' with csum:0x%x, header csum:0x%04x, size:%lu\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), csum, hdr->hdr_match.hdr_csum, (unsigned long)bytes_received)); - if (csum != hdr->hdr_match.hdr_csum) { - opal_output(0, "%s:%s:%d: Invalid \'match data\' - received csum:0x%x != computed csum:0x%x\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_match.hdr_csum, csum); - dump_csum_error_data(segments, num_segments); - ompi_rte_abort(-1,NULL); - } - } - - /* - * Unpacking finished, make the user buffer unaccessable again. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - recvreq->req_recv.req_base.req_addr, - recvreq->req_recv.req_base.req_count, - recvreq->req_recv.req_base.req_datatype); - ); - - /* - * No need for atomic here, as we know there is only one fragment - * for this request. - */ - recvreq->req_bytes_received += bytes_received; - recv_request_pml_complete(recvreq); -} - - -/** - * Handle completion of a probe request - */ - -void mca_pml_csum_recv_request_matched_probe( mca_pml_csum_recv_request_t* recvreq, - mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments ) -{ - size_t bytes_packed = 0; - mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval; - - switch(hdr->hdr_common.hdr_type) { - case MCA_PML_CSUM_HDR_TYPE_MATCH: - bytes_packed = mca_pml_csum_compute_segment_length_base (segments, num_segments, - OMPI_PML_CSUM_MATCH_HDR_LEN); - break; - case MCA_PML_CSUM_HDR_TYPE_RNDV: - case MCA_PML_CSUM_HDR_TYPE_RGET: - bytes_packed = hdr->hdr_rndv.hdr_msg_length; - break; - } - - /* set completion status */ - recvreq->req_recv.req_base.req_ompi.req_status.MPI_TAG = hdr->hdr_match.hdr_tag; - recvreq->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = hdr->hdr_match.hdr_src; - recvreq->req_bytes_received = bytes_packed; - recvreq->req_bytes_expected = bytes_packed; - recv_request_pml_complete(recvreq); -} - - -/* - * Schedule RDMA protocol. - * -*/ - -int mca_pml_csum_recv_request_schedule_once( mca_pml_csum_recv_request_t* recvreq, - mca_bml_base_btl_t *start_bml_btl ) -{ - mca_bml_base_btl_t* bml_btl; - int num_tries = recvreq->req_rdma_cnt, num_fail = 0; - size_t i, prev_bytes_remaining = 0; - size_t bytes_remaining = recvreq->req_send_offset - - recvreq->req_rdma_offset; - - /* if starting bml_btl is provided schedule next fragment on it first */ - if(start_bml_btl != NULL) { - for(i = 0; i < recvreq->req_rdma_cnt; i++) { - if(recvreq->req_rdma[i].bml_btl != start_bml_btl) - continue; - /* something left to be send? */ - if( OPAL_LIKELY(recvreq->req_rdma[i].length) ) - recvreq->req_rdma_idx = i; - break; - } - } - - while(bytes_remaining > 0 && - recvreq->req_pipeline_depth < mca_pml_csum.recv_pipeline_depth) { - size_t size, seg_size; - mca_pml_csum_rdma_hdr_t* hdr; - mca_btl_base_descriptor_t* dst; - mca_btl_base_descriptor_t* ctl; - mca_mpool_base_registration_t * reg = NULL; - mca_btl_base_module_t* btl; - int rc, rdma_idx; - - if(prev_bytes_remaining == bytes_remaining) { - if(++num_fail == num_tries) { - OPAL_THREAD_LOCK(&mca_pml_csum.lock); - if(false == recvreq->req_pending) { - opal_list_append(&mca_pml_csum.recv_pending, - (opal_list_item_t*)recvreq); - recvreq->req_pending = true; - } - OPAL_THREAD_UNLOCK(&mca_pml_csum.lock); - return OMPI_ERR_OUT_OF_RESOURCE; - } - } else { - num_fail = 0; - prev_bytes_remaining = bytes_remaining; - } - - do { - rdma_idx = recvreq->req_rdma_idx; - bml_btl = recvreq->req_rdma[rdma_idx].bml_btl; - reg = recvreq->req_rdma[rdma_idx].btl_reg; - size = recvreq->req_rdma[rdma_idx].length; - if(++recvreq->req_rdma_idx >= recvreq->req_rdma_cnt) - recvreq->req_rdma_idx = 0; - } while(!size); - btl = bml_btl->btl; - - /* makes sure that we don't exceed BTL max rdma size - * if memory is not pinned already */ - if( (NULL == reg) && (btl->btl_rdma_pipeline_frag_size != 0) && - (size > btl->btl_rdma_pipeline_frag_size)) { - size = btl->btl_rdma_pipeline_frag_size; - } - - /* take lock to protect converter against concurrent access - * from unpack */ - OPAL_THREAD_LOCK(&recvreq->lock); - opal_convertor_set_position( &recvreq->req_recv.req_base.req_convertor, - &recvreq->req_rdma_offset ); - - /* prepare a descriptor for RDMA */ - mca_bml_base_prepare_dst(bml_btl, reg, - &recvreq->req_recv.req_base.req_convertor, - MCA_BTL_NO_ORDER, 0, &size, MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | - MCA_BTL_DES_FLAGS_PUT, &dst); - OPAL_THREAD_UNLOCK(&recvreq->lock); - - if(OPAL_UNLIKELY(dst == NULL)) { - continue; - } - - dst->des_cbfunc = mca_pml_csum_put_completion; - dst->des_cbdata = recvreq; - - seg_size = btl->btl_seg_size * dst->des_dst_cnt; - - /* prepare a descriptor for rdma control message */ - mca_bml_base_alloc(bml_btl, &ctl, MCA_BTL_NO_ORDER, sizeof(mca_pml_csum_rdma_hdr_t) + seg_size, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK); - - if( OPAL_UNLIKELY(NULL == ctl) ) { - mca_bml_base_free(bml_btl,dst); - continue; - } - ctl->des_cbfunc = mca_pml_csum_recv_ctl_completion; - - /* fill in rdma header */ - hdr = (mca_pml_csum_rdma_hdr_t*)ctl->des_src->seg_addr.pval; - hdr->hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_PUT; - hdr->hdr_common.hdr_flags = - (!recvreq->req_ack_sent) ? MCA_PML_CSUM_HDR_TYPE_ACK : 0; - hdr->hdr_common.hdr_csum = 0; - hdr->hdr_req = recvreq->remote_req_send; - hdr->hdr_des.pval = dst; - hdr->hdr_rdma_offset = recvreq->req_rdma_offset; - hdr->hdr_seg_cnt = dst->des_dst_cnt; - - /* copy segments */ - memmove (hdr + 1, dst->des_dst, seg_size); - - if(!recvreq->req_ack_sent) - recvreq->req_ack_sent = true; - - hdr->hdr_common.hdr_csum = opal_csum16(hdr, sizeof(mca_pml_csum_rdma_hdr_t)); - - OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, - "%s Sending \'PUT\' with header csum:0x%04x\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), hdr->hdr_common.hdr_csum)); - - csum_hdr_hton(hdr, MCA_PML_CSUM_HDR_TYPE_PUT, recvreq->req_recv.req_base.req_proc); - - PERUSE_TRACE_COMM_OMPI_EVENT( PERUSE_COMM_REQ_XFER_CONTINUE, - &(recvreq->req_recv.req_base), size, - PERUSE_RECV); - - /* send rdma request to peer */ - rc = mca_bml_base_send(bml_btl, ctl, MCA_PML_CSUM_HDR_TYPE_PUT); - if( OPAL_LIKELY( rc >= 0 ) ) { - /* update request state */ - recvreq->req_rdma_offset += size; - OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth, 1); - recvreq->req_rdma[rdma_idx].length -= size; - bytes_remaining -= size; - } else { - mca_bml_base_free(bml_btl,ctl); - mca_bml_base_free(bml_btl,dst); - } - } - - return OMPI_SUCCESS; -} - -#define IS_PROB_REQ(R) \ - ((MCA_PML_REQUEST_IPROBE == (R)->req_recv.req_base.req_type) || \ - (MCA_PML_REQUEST_PROBE == (R)->req_recv.req_base.req_type)) - -static inline void append_recv_req_to_queue(opal_list_t *queue, - mca_pml_csum_recv_request_t *req) -{ - if(OPAL_UNLIKELY(req->req_recv.req_base.req_type == MCA_PML_REQUEST_IPROBE)) - return; - - opal_list_append(queue, (opal_list_item_t*)req); - - /** - * We don't want to generate this kind of event for MPI_Probe. Hopefully, - * the compiler will optimize out the empty if loop in the case where PERUSE - * support is not required by the user. - */ - if(req->req_recv.req_base.req_type != MCA_PML_REQUEST_PROBE) { - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_INSERT_IN_POSTED_Q, - &(req->req_recv.req_base), PERUSE_RECV); - } -} - -/* - * this routine tries to match a posted receive. If a match is found, - * it places the request in the appropriate matched receive list. This - * function has to be called with the communicator matching lock held. -*/ -static mca_pml_csum_recv_frag_t* -recv_req_match_specific_proc( const mca_pml_csum_recv_request_t *req, - mca_pml_csum_comm_proc_t *proc ) -{ - opal_list_t* unexpected_frags = &proc->unexpected_frags; - opal_list_item_t *i; - mca_pml_csum_recv_frag_t* frag; - int tag = req->req_recv.req_base.req_tag; - - if(opal_list_get_size(unexpected_frags) == 0) - return NULL; - - if( OMPI_ANY_TAG == tag ) { - for (i = opal_list_get_first(unexpected_frags); - i != opal_list_get_end(unexpected_frags); - i = opal_list_get_next(i)) { - frag = (mca_pml_csum_recv_frag_t*)i; - - if( frag->hdr.hdr_match.hdr_tag >= 0 ) - return frag; - } - } else { - for (i = opal_list_get_first(unexpected_frags); - i != opal_list_get_end(unexpected_frags); - i = opal_list_get_next(i)) { - frag = (mca_pml_csum_recv_frag_t*)i; - - if( frag->hdr.hdr_match.hdr_tag == tag ) - return frag; - } - } - return NULL; -} - -/* - * this routine is used to try and match a wild posted receive - where - * wild is determined by the value assigned to the source process -*/ -static mca_pml_csum_recv_frag_t* -recv_req_match_wild( mca_pml_csum_recv_request_t* req, - mca_pml_csum_comm_proc_t **p) -{ - mca_pml_csum_comm_t* comm = req->req_recv.req_base.req_comm->c_pml_comm; - mca_pml_csum_comm_proc_t* proc = comm->procs; - size_t proc_count = comm->num_procs, i; - - /* - * Loop over all the outstanding messages to find one that matches. - * There is an outer loop over lists of messages from each - * process, then an inner loop over the messages from the - * process. - */ - for (i = 0; i < proc_count; i++) { - mca_pml_csum_recv_frag_t* frag; - - /* loop over messages from the current proc */ - if((frag = recv_req_match_specific_proc(req, &proc[i]))) { - *p = &proc[i]; - req->req_recv.req_base.req_proc = proc[i].ompi_proc; - prepare_recv_req_converter(req); - return frag; /* match found */ - } - } - - *p = NULL; - return NULL; -} - - -void mca_pml_csum_recv_req_start(mca_pml_csum_recv_request_t *req) -{ - mca_pml_csum_comm_t* comm = req->req_recv.req_base.req_comm->c_pml_comm; - mca_pml_csum_comm_proc_t* proc; - mca_pml_csum_recv_frag_t* frag; - opal_list_t *queue; - mca_pml_csum_hdr_t* hdr; - - /* init/re-init the request */ - req->req_lock = 0; - req->req_pipeline_depth = 0; - req->req_bytes_received = 0; - req->req_bytes_expected = 0; - /* What about req_rdma_cnt ? */ - req->req_rdma_idx = 0; - req->req_pending = false; - req->req_ack_sent = false; - - MCA_PML_BASE_RECV_START(&req->req_recv.req_base); - - OPAL_THREAD_LOCK(&comm->matching_lock); - /** - * The laps of time between the ACTIVATE event and the SEARCH_UNEX one include - * the cost of the request lock. - */ - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_SEARCH_UNEX_Q_BEGIN, - &(req->req_recv.req_base), PERUSE_RECV); - - /* assign sequence number */ - req->req_recv.req_base.req_sequence = comm->recv_sequence++; - - /* attempt to match posted recv */ - if(req->req_recv.req_base.req_peer == OMPI_ANY_SOURCE) { - frag = recv_req_match_wild(req, &proc); - queue = &comm->wild_receives; - } else { - proc = &comm->procs[req->req_recv.req_base.req_peer]; - req->req_recv.req_base.req_proc = proc->ompi_proc; - frag = recv_req_match_specific_proc(req, proc); - queue = &proc->specific_receives; - /* wild cardrecv will be prepared on match */ - prepare_recv_req_converter(req); - } - - if(OPAL_UNLIKELY(NULL == frag)) { - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_SEARCH_UNEX_Q_END, - &(req->req_recv.req_base), PERUSE_RECV); - /* We didn't find any matches. Record this irecv so we can match - it when the message comes in. */ - append_recv_req_to_queue(queue, req); - req->req_match_received = false; - OPAL_THREAD_UNLOCK(&comm->matching_lock); - } else { - if(OPAL_LIKELY(!IS_PROB_REQ(req))) { - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_MATCH_UNEX, - &(req->req_recv.req_base), PERUSE_RECV); - - hdr = (mca_pml_csum_hdr_t*)frag->segments->seg_addr.pval; - PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_REMOVE_FROM_UNEX_Q, - req->req_recv.req_base.req_comm, - hdr->hdr_match.hdr_src, - hdr->hdr_match.hdr_tag, - PERUSE_RECV); - - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_SEARCH_UNEX_Q_END, - &(req->req_recv.req_base), PERUSE_RECV); - - opal_list_remove_item(&proc->unexpected_frags, - (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&comm->matching_lock); - - switch(hdr->hdr_common.hdr_type) { - case MCA_PML_CSUM_HDR_TYPE_MATCH: - mca_pml_csum_recv_request_progress_match(req, frag->btl, frag->segments, - frag->num_segments); - break; - case MCA_PML_CSUM_HDR_TYPE_RNDV: - mca_pml_csum_recv_request_progress_rndv(req, frag->btl, frag->segments, - frag->num_segments); - break; - case MCA_PML_CSUM_HDR_TYPE_RGET: - mca_pml_csum_recv_request_progress_rget(req, frag->btl, frag->segments, - frag->num_segments); - break; - default: - assert(0); - } - - MCA_PML_CSUM_RECV_FRAG_RETURN(frag); - - } else { - OPAL_THREAD_UNLOCK(&comm->matching_lock); - mca_pml_csum_recv_request_matched_probe(req, frag->btl, - frag->segments, frag->num_segments); - } - } -} diff --git a/ompi/mca/pml/csum/pml_csum_recvreq.h b/ompi/mca/pml/csum/pml_csum_recvreq.h deleted file mode 100644 index 3245b82c2e..0000000000 --- a/ompi/mca/pml/csum/pml_csum_recvreq.h +++ /dev/null @@ -1,425 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2010 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - */ -#ifndef OMPI_PML_CSUM_RECV_REQUEST_H -#define OMPI_PML_CSUM_RECV_REQUEST_H - -#include "pml_csum.h" -#include "pml_csum_rdma.h" -#include "pml_csum_rdmafrag.h" -#include "ompi/proc/proc.h" -#include "ompi/mca/pml/csum/pml_csum_comm.h" -#include "ompi/mca/mpool/base/base.h" -#include "ompi/mca/pml/base/pml_base_recvreq.h" - -BEGIN_C_DECLS - -struct mca_pml_csum_recv_request_t { - mca_pml_base_recv_request_t req_recv; - ompi_ptr_t remote_req_send; - int32_t req_lock; - size_t req_pipeline_depth; - size_t req_bytes_received; /**< amount of data transferred into the user buffer */ - size_t req_bytes_expected; /**< local size of the data as suggested by the user */ - size_t req_rdma_offset; - size_t req_send_offset; - uint32_t req_rdma_cnt; - uint32_t req_rdma_idx; - bool req_pending; - bool req_ack_sent; /**< whether ack was sent to the sender */ - bool req_match_received; /**< Prevent request to be completed prematurely */ - opal_mutex_t lock; - mca_pml_csum_com_btl_t req_rdma[1]; -}; -typedef struct mca_pml_csum_recv_request_t mca_pml_csum_recv_request_t; - -OBJ_CLASS_DECLARATION(mca_pml_csum_recv_request_t); - -static inline bool lock_recv_request(mca_pml_csum_recv_request_t *recvreq) -{ - return OPAL_THREAD_ADD32(&recvreq->req_lock, 1) == 1; -} - -static inline bool unlock_recv_request(mca_pml_csum_recv_request_t *recvreq) -{ - return OPAL_THREAD_ADD32(&recvreq->req_lock, -1) == 0; -} - -/** - * Allocate a recv request from the modules free list. - * - * @param rc (OUT) OMPI_SUCCESS or error status on failure. - * @return Receive request. - */ -#define MCA_PML_CSUM_RECV_REQUEST_ALLOC(recvreq, rc) \ -do { \ - ompi_free_list_item_t* item; \ - rc = OMPI_SUCCESS; \ - OMPI_FREE_LIST_GET(&mca_pml_base_recv_requests, item, rc); \ - recvreq = (mca_pml_csum_recv_request_t*)item; \ -} while(0) - - -/** - * Initialize a receive request with call parameters. - * - * @param request (IN) Receive request. - * @param addr (IN) User buffer. - * @param count (IN) Number of elements of indicated datatype. - * @param datatype (IN) User defined datatype. - * @param src (IN) Source rank w/in the communicator. - * @param tag (IN) User defined tag. - * @param comm (IN) Communicator. - * @param persistent (IN) Is this a ersistent request. - */ -#define MCA_PML_CSUM_RECV_REQUEST_INIT( request, \ - addr, \ - count, \ - datatype, \ - src, \ - tag, \ - comm, \ - persistent) \ -do { \ - MCA_PML_BASE_RECV_REQUEST_INIT( &(request)->req_recv, \ - addr, \ - count, \ - datatype, \ - src, \ - tag, \ - comm, \ - persistent); \ -} while(0) - -/** - * Mark the request as completed at MPI level for internal purposes. - * - * @param recvreq (IN) Receive request. - */ -#define MCA_PML_CSUM_RECV_REQUEST_MPI_COMPLETE( recvreq ) \ - do { \ - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \ - &(recvreq->req_recv.req_base), PERUSE_RECV ); \ - ompi_request_complete( &(recvreq->req_recv.req_base.req_ompi), true ); \ - } while (0) - -/* - * Free the PML receive request - */ -#define MCA_PML_CSUM_RECV_REQUEST_RETURN(recvreq) \ - { \ - MCA_PML_BASE_RECV_REQUEST_FINI(&(recvreq)->req_recv); \ - OMPI_FREE_LIST_RETURN( &mca_pml_base_recv_requests, \ - (ompi_free_list_item_t*)(recvreq)); \ - } - -/** - * Complete receive request. Request structure cannot be accessed after calling - * this function any more. - * - * @param recvreq (IN) Receive request. - */ -static inline void -recv_request_pml_complete(mca_pml_csum_recv_request_t *recvreq) -{ - size_t i; - - assert(false == recvreq->req_recv.req_base.req_pml_complete); - - if(recvreq->req_recv.req_bytes_packed > 0) { - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END, - &recvreq->req_recv.req_base, PERUSE_RECV ); - } - - for(i = 0; i < recvreq->req_rdma_cnt; i++) { - mca_mpool_base_registration_t* btl_reg = recvreq->req_rdma[i].btl_reg; - if( NULL != btl_reg && btl_reg->mpool != NULL) { - btl_reg->mpool->mpool_deregister( btl_reg->mpool, btl_reg ); - } - } - recvreq->req_rdma_cnt = 0; - - OPAL_THREAD_LOCK(&ompi_request_lock); - if(true == recvreq->req_recv.req_base.req_free_called) { - MCA_PML_CSUM_RECV_REQUEST_RETURN(recvreq); - } else { - /* initialize request status */ - recvreq->req_recv.req_base.req_pml_complete = true; - recvreq->req_recv.req_base.req_ompi.req_status._ucount = - recvreq->req_bytes_received; - if (recvreq->req_recv.req_bytes_packed > recvreq->req_bytes_expected) { - recvreq->req_recv.req_base.req_ompi.req_status._ucount = - recvreq->req_recv.req_bytes_packed; - recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR = - MPI_ERR_TRUNCATE; - } - MCA_PML_CSUM_RECV_REQUEST_MPI_COMPLETE(recvreq); - } - OPAL_THREAD_UNLOCK(&ompi_request_lock); -} - -static inline bool -recv_request_pml_complete_check(mca_pml_csum_recv_request_t *recvreq) -{ -#if OPAL_ENABLE_MULTI_THREADS - opal_atomic_rmb(); -#endif - if(recvreq->req_match_received && - recvreq->req_bytes_received >= recvreq->req_recv.req_bytes_packed && - lock_recv_request(recvreq)) { - recv_request_pml_complete(recvreq); - return true; - } - - return false; -} - -extern void mca_pml_csum_recv_req_start(mca_pml_csum_recv_request_t *req); -#define MCA_PML_CSUM_RECV_REQUEST_START(r) mca_pml_csum_recv_req_start(r) - -static inline void prepare_recv_req_converter(mca_pml_csum_recv_request_t *req) -{ - if( req->req_recv.req_base.req_datatype->super.size | req->req_recv.req_base.req_count ) { - opal_convertor_copy_and_prepare_for_recv( - req->req_recv.req_base.req_proc->proc_convertor, - &(req->req_recv.req_base.req_datatype->super), - req->req_recv.req_base.req_count, - req->req_recv.req_base.req_addr, - 0, - &req->req_recv.req_base.req_convertor); - opal_convertor_get_unpacked_size(&req->req_recv.req_base.req_convertor, - &req->req_bytes_expected); - } -} - -#define MCA_PML_CSUM_RECV_REQUEST_MATCHED(request, hdr) \ - recv_req_matched(request, hdr) - -static inline void recv_req_matched(mca_pml_csum_recv_request_t *req, - mca_pml_csum_match_hdr_t *hdr) -{ - req->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = hdr->hdr_src; - req->req_recv.req_base.req_ompi.req_status.MPI_TAG = hdr->hdr_tag; - req->req_match_received = true; -#if OPAL_ENABLE_MULTI_THREADS - opal_atomic_wmb(); -#endif - if(req->req_recv.req_bytes_packed > 0) { - if(MPI_ANY_SOURCE == req->req_recv.req_base.req_peer) { - /* non wildcard prepared during post recv */ - prepare_recv_req_converter(req); - } - PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_XFER_BEGIN, - &req->req_recv.req_base, PERUSE_RECV); - } -} - - -/** - * - */ - -#define MCA_PML_CSUM_RECV_REQUEST_UNPACK( request, \ - segments, \ - num_segments, \ - seg_offset, \ - data_offset, \ - bytes_received, \ - bytes_delivered) \ -do { \ - bytes_delivered = 0; \ - if(request->req_recv.req_bytes_packed > 0) { \ - struct iovec iov[MCA_BTL_DES_MAX_SEGMENTS]; \ - uint32_t iov_count = 0; \ - size_t max_data = bytes_received; \ - size_t n, offset = seg_offset; \ - mca_btl_base_segment_t* segment = segments; \ - \ - OPAL_THREAD_LOCK(&request->lock); \ - for( n = 0; n < num_segments; n++, segment++ ) { \ - if(offset >= segment->seg_len) { \ - offset -= segment->seg_len; \ - } else { \ - iov[iov_count].iov_len = segment->seg_len - offset; \ - iov[iov_count].iov_base = (IOVBASE_TYPE*) \ - ((unsigned char*)segment->seg_addr.pval + offset); \ - iov_count++; \ - offset = 0; \ - } \ - } \ - PERUSE_TRACE_COMM_OMPI_EVENT (PERUSE_COMM_REQ_XFER_CONTINUE, \ - &(recvreq->req_recv.req_base), max_data, \ - PERUSE_RECV); \ - opal_convertor_set_position( &(request->req_recv.req_base.req_convertor), \ - &data_offset ); \ - opal_convertor_unpack( &(request)->req_recv.req_base.req_convertor, \ - iov, \ - &iov_count, \ - &max_data ); \ - bytes_delivered = max_data; \ - OPAL_THREAD_UNLOCK(&request->lock); \ - } \ -} while (0) - - -/** - * - */ - -void mca_pml_csum_recv_request_progress_match( - mca_pml_csum_recv_request_t* req, - struct mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments); - -/** - * - */ - -void mca_pml_csum_recv_request_progress_frag( - mca_pml_csum_recv_request_t* req, - struct mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments); - -/** - * - */ - -void mca_pml_csum_recv_request_progress_rndv( - mca_pml_csum_recv_request_t* req, - struct mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments); - -/** - * - */ - -void mca_pml_csum_recv_request_progress_rget( - mca_pml_csum_recv_request_t* req, - struct mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments); - -/** - * - */ - -void mca_pml_csum_recv_request_matched_probe( - mca_pml_csum_recv_request_t* req, - struct mca_btl_base_module_t* btl, - mca_btl_base_segment_t* segments, - size_t num_segments); - -/** - * - */ - -int mca_pml_csum_recv_request_schedule_once( - mca_pml_csum_recv_request_t* req, mca_bml_base_btl_t* start_bml_btl); - -static inline int mca_pml_csum_recv_request_schedule_exclusive( - mca_pml_csum_recv_request_t* req, - mca_bml_base_btl_t* start_bml_btl) -{ - int rc; - - do { - rc = mca_pml_csum_recv_request_schedule_once(req, start_bml_btl); - if(rc == OMPI_ERR_OUT_OF_RESOURCE) - break; - } while(!unlock_recv_request(req)); - - if(OMPI_SUCCESS == rc) - recv_request_pml_complete_check(req); - - return rc; -} - -static inline void mca_pml_csum_recv_request_schedule( - mca_pml_csum_recv_request_t* req, - mca_bml_base_btl_t* start_bml_btl) -{ - if(!lock_recv_request(req)) - return; - - (void)mca_pml_csum_recv_request_schedule_exclusive(req, start_bml_btl); -} - -#define MCA_PML_CSUM_ADD_ACK_TO_PENDING(P, S, D, O) \ - do { \ - mca_pml_csum_pckt_pending_t *_pckt; \ - int _rc; \ - \ - MCA_PML_CSUM_PCKT_PENDING_ALLOC(_pckt,_rc); \ - _pckt->hdr.hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_ACK; \ - _pckt->hdr.hdr_ack.hdr_src_req.lval = (S); \ - _pckt->hdr.hdr_ack.hdr_dst_req.pval = (D); \ - _pckt->hdr.hdr_ack.hdr_send_offset = (O); \ - _pckt->proc = (P); \ - _pckt->bml_btl = NULL; \ - OPAL_THREAD_LOCK(&mca_pml_csum.lock); \ - opal_list_append(&mca_pml_csum.pckt_pending, \ - (opal_list_item_t*)_pckt); \ - OPAL_THREAD_UNLOCK(&mca_pml_csum.lock); \ - } while(0) - -int mca_pml_csum_recv_request_ack_send_btl(ompi_proc_t* proc, - mca_bml_base_btl_t* bml_btl, uint64_t hdr_src_req, void *hdr_dst_req, - uint64_t hdr_rdma_offset, bool nordma); - -static inline int mca_pml_csum_recv_request_ack_send(ompi_proc_t* proc, - uint64_t hdr_src_req, void *hdr_dst_req, uint64_t hdr_send_offset, - bool nordma) -{ - size_t i; - mca_bml_base_btl_t* bml_btl; - mca_bml_base_endpoint_t* endpoint = - (mca_bml_base_endpoint_t*)proc->proc_bml; - - for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) { - bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); - if(mca_pml_csum_recv_request_ack_send_btl(proc, bml_btl, hdr_src_req, - hdr_dst_req, hdr_send_offset, nordma) == OMPI_SUCCESS) - return OMPI_SUCCESS; - } - - MCA_PML_CSUM_ADD_ACK_TO_PENDING(proc, hdr_src_req, hdr_dst_req, - hdr_send_offset); - - return OMPI_ERR_OUT_OF_RESOURCE; -} - -int mca_pml_csum_recv_request_get_frag(mca_pml_csum_rdma_frag_t* frag); - -/* This function tries to continue recvreq that stuck due to resource - * unavailability. Recvreq is added to recv_pending list if scheduling of put - * operation cannot be accomplished for some reason. */ -void mca_pml_csum_recv_request_process_pending(void); - -END_C_DECLS - -#endif - diff --git a/ompi/mca/pml/csum/pml_csum_sendreq.c b/ompi/mca/pml/csum/pml_csum_sendreq.c deleted file mode 100644 index a3ba985f34..0000000000 --- a/ompi/mca/pml/csum/pml_csum_sendreq.c +++ /dev/null @@ -1,1347 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2008 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2009 IBM Corporation. All rights reserved. - * Copyright (c) 2009-2012 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" -#include "ompi/constants.h" - -#include "opal/util/crc.h" -#include "opal/prefetch.h" -#include "opal/util/output.h" - -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/btl/btl.h" -#include "ompi/mca/mpool/mpool.h" -#include "ompi/mca/bml/base/base.h" -#include "ompi/memchecker.h" -#include "ompi/mca/pml/base/base.h" - -#include "pml_csum.h" -#include "pml_csum_hdr.h" -#include "pml_csum_sendreq.h" -#include "pml_csum_rdmafrag.h" -#include "pml_csum_recvreq.h" - -OBJ_CLASS_INSTANCE(mca_pml_csum_send_range_t, ompi_free_list_item_t, - NULL, NULL); - -void mca_pml_csum_send_request_process_pending(mca_bml_base_btl_t *bml_btl) -{ - int rc, i, s = opal_list_get_size(&mca_pml_csum.send_pending); - - /* advance pending requests */ - for(i = 0; i < s; i++) { - mca_pml_csum_send_pending_t pending_type = MCA_PML_CSUM_SEND_PENDING_NONE; - mca_pml_csum_send_request_t* sendreq; - mca_bml_base_btl_t *send_dst; - - sendreq = get_request_from_send_pending(&pending_type); - if(OPAL_UNLIKELY(NULL == sendreq)) - break; - - switch(pending_type) { - case MCA_PML_CSUM_SEND_PENDING_SCHEDULE: - rc = mca_pml_csum_send_request_schedule_exclusive(sendreq); - if(OMPI_ERR_OUT_OF_RESOURCE == rc) { - return; - } - break; - case MCA_PML_CSUM_SEND_PENDING_START: - send_dst = mca_bml_base_btl_array_find( - &sendreq->req_endpoint->btl_eager, bml_btl->btl); - if (NULL == send_dst) { - /* Put request back onto pending list and try next one. */ - add_request_to_send_pending(sendreq, - MCA_PML_CSUM_SEND_PENDING_START, true); - } else { - rc = mca_pml_csum_send_request_start_btl(sendreq, send_dst); - if (OMPI_ERR_OUT_OF_RESOURCE == rc) { - /* No more resources on this btl so prepend to the pending - * list to minimize reordering and give up for now. */ - add_request_to_send_pending(sendreq, - MCA_PML_CSUM_SEND_PENDING_START, false); - return; - } - } - break; - default: - opal_output(0, "[%s:%d] wrong send request type\n", - __FILE__, __LINE__); - break; - } - } -} - -/* - * The free call mark the final stage in a request life-cycle. Starting from this - * point the request is completed at both PML and user level, and can be used - * for others p2p communications. Therefore, in the case of the CSUM PML it should - * be added to the free request list. - */ -static int mca_pml_csum_send_request_free(struct ompi_request_t** request) -{ - mca_pml_csum_send_request_t* sendreq = *(mca_pml_csum_send_request_t**)request; - - assert( false == sendreq->req_send.req_base.req_free_called ); - - OPAL_THREAD_LOCK(&ompi_request_lock); - sendreq->req_send.req_base.req_free_called = true; - - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_NOTIFY, - &(sendreq->req_send.req_base), PERUSE_SEND ); - - if( true == sendreq->req_send.req_base.req_pml_complete ) { - /* make buffer defined when the request is compeleted, - and before releasing the objects. */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - - MCA_PML_CSUM_SEND_REQUEST_RETURN( sendreq ); - } - - OPAL_THREAD_UNLOCK(&ompi_request_lock); - - *request = MPI_REQUEST_NULL; - return OMPI_SUCCESS; -} - -static int mca_pml_csum_send_request_cancel(struct ompi_request_t* request, int complete) -{ - /* we dont cancel send requests by now */ - return OMPI_SUCCESS; -} - -static void mca_pml_csum_send_request_construct(mca_pml_csum_send_request_t* req) -{ - req->req_send.req_base.req_type = MCA_PML_REQUEST_SEND; - req->req_send.req_base.req_ompi.req_free = mca_pml_csum_send_request_free; - req->req_send.req_base.req_ompi.req_cancel = mca_pml_csum_send_request_cancel; - req->req_rdma_cnt = 0; - req->req_throttle_sends = false; - OBJ_CONSTRUCT(&req->req_send_ranges, opal_list_t); - OBJ_CONSTRUCT(&req->req_send_range_lock, opal_mutex_t); -} - -static void mca_pml_csum_send_request_destruct(mca_pml_csum_send_request_t* req) -{ - OBJ_DESTRUCT(&req->req_send_ranges); - OBJ_DESTRUCT(&req->req_send_range_lock); -} - -OBJ_CLASS_INSTANCE( mca_pml_csum_send_request_t, - mca_pml_base_send_request_t, - mca_pml_csum_send_request_construct, - mca_pml_csum_send_request_destruct ); - -/** - * Completion of a short message - nothing left to schedule. - */ - -static inline void -mca_pml_csum_match_completion_free_request( mca_bml_base_btl_t* bml_btl, - mca_pml_csum_send_request_t* sendreq ) -{ - if( sendreq->req_send.req_bytes_packed > 0 ) { - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN, - &(sendreq->req_send.req_base), PERUSE_SEND ); - } - - /* signal request completion */ - send_request_pml_complete(sendreq); - - /* check for pending requests */ - MCA_PML_CSUM_PROGRESS_PENDING(bml_btl); -} - -static void -mca_pml_csum_match_completion_free( struct mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_pml_csum_send_request_t* sendreq = (mca_pml_csum_send_request_t*)des->des_cbdata; - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; - - /* check completion status */ - if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { - /* TSW - FIX */ - opal_output(0, "%s:%d FATAL", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - mca_pml_csum_match_completion_free_request( bml_btl, sendreq ); -} - -static inline void -mca_pml_csum_rndv_completion_request( mca_bml_base_btl_t* bml_btl, - mca_pml_csum_send_request_t* sendreq, - size_t req_bytes_delivered ) -{ - if( sendreq->req_send.req_bytes_packed > 0 ) { - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN, - &(sendreq->req_send.req_base), PERUSE_SEND ); - } - - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); - - /* advance the request */ - OPAL_THREAD_ADD32(&sendreq->req_state, -1); - - send_request_pml_complete_check(sendreq); - - /* check for pending requests */ - MCA_PML_CSUM_PROGRESS_PENDING(bml_btl); -} - -/* - * Completion of the first fragment of a long message that - * requires an acknowledgement - */ -static void -mca_pml_csum_rndv_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_pml_csum_send_request_t* sendreq = (mca_pml_csum_send_request_t*)des->des_cbdata; - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context; - size_t req_bytes_delivered; - - /* check completion status */ - if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { - /* TSW - FIX */ - opal_output(0, "%s:%d FATAL", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - - /* count bytes of user data actually delivered. As the rndv completion only - * happens in one thread, the increase of the req_bytes_delivered does not - * have to be atomic. - */ - req_bytes_delivered = mca_pml_csum_compute_segment_length (btl->btl_seg_size, - (void *) des->des_src, - des->des_src_cnt, - sizeof(mca_pml_csum_rendezvous_hdr_t)); - - mca_pml_csum_rndv_completion_request( bml_btl, sendreq, req_bytes_delivered ); -} - - -/** - * Completion of a get request. - */ - -static void -mca_pml_csum_rget_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_pml_csum_send_request_t* sendreq = (mca_pml_csum_send_request_t*)des->des_cbdata; - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context; - size_t req_bytes_delivered = 0; - - /* count bytes of user data actually delivered and check for request completion */ - if (OPAL_LIKELY(OMPI_SUCCESS == status)) { - req_bytes_delivered = mca_pml_csum_compute_segment_length (btl->btl_seg_size, - (void *) des->des_src, - des->des_src_cnt, 0); - } - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); - - send_request_pml_complete_check(sendreq); - /* free the descriptor */ - mca_bml_base_free(bml_btl, des); - MCA_PML_CSUM_PROGRESS_PENDING(bml_btl); -} - - -/** - * Completion of a control message - return resources. - */ - -static void -mca_pml_csum_send_ctl_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; - - /* check for pending requests */ - MCA_PML_CSUM_PROGRESS_PENDING(bml_btl); -} - -/** - * Completion of additional fragments of a large message - may need - * to schedule additional fragments. - */ - -static void -mca_pml_csum_frag_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_pml_csum_send_request_t* sendreq = (mca_pml_csum_send_request_t*)des->des_cbdata; - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; - size_t req_bytes_delivered; - - /* check completion status */ - if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { - /* TSW - FIX */ - opal_output(0, "%s:%d FATAL", __FILE__, __LINE__); - ompi_rte_abort(-1, NULL); - } - - /* count bytes of user data actually delivered */ - req_bytes_delivered = mca_pml_csum_compute_segment_length (btl->btl_seg_size, - (void *) des->des_src, - des->des_src_cnt, - sizeof(mca_pml_csum_frag_hdr_t)); - - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth, -1); - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered); - - if(send_request_pml_complete_check(sendreq) == false) { - mca_pml_csum_send_request_schedule(sendreq); - } - - /* check for pending requests */ - MCA_PML_CSUM_PROGRESS_PENDING(bml_btl); -} - -/** - * Buffer the entire message and mark as complete. - */ - -int mca_pml_csum_send_request_start_buffered( - mca_pml_csum_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size) -{ - mca_btl_base_descriptor_t* des; - mca_btl_base_segment_t* segment; - mca_pml_csum_hdr_t* hdr; - struct iovec iov; - unsigned int iov_count; - size_t max_data, req_bytes_delivered; - int rc; - - /* allocate descriptor */ - mca_bml_base_alloc(bml_btl, &des, - MCA_BTL_NO_ORDER, - sizeof(mca_pml_csum_rendezvous_hdr_t) + size, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - if( OPAL_UNLIKELY(NULL == des) ) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - segment = des->des_src; - - /* pack the data into the BTL supplied buffer */ - iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)segment->seg_addr.pval + - sizeof(mca_pml_csum_rendezvous_hdr_t)); - iov.iov_len = size; - iov_count = 1; - max_data = size; - if((rc = opal_convertor_pack( &sendreq->req_send.req_base.req_convertor, - &iov, - &iov_count, - &max_data)) < 0) { - mca_bml_base_free(bml_btl, des); - return rc; - } - req_bytes_delivered = max_data; - - /* build rendezvous header */ - hdr = (mca_pml_csum_hdr_t*)segment->seg_addr.pval; - hdr->hdr_common.hdr_flags = 0; - hdr->hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_RNDV; - hdr->hdr_common.hdr_csum = 0; - hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed; - hdr->hdr_rndv.hdr_src_req.pval = sendreq; - hdr->hdr_match.hdr_csum = (size > 0 ? - sendreq->req_send.req_base.req_convertor.checksum : OPAL_CSUM_ZERO); - hdr->hdr_common.hdr_csum = opal_csum16(hdr, sizeof(mca_pml_csum_rendezvous_hdr_t)); - - OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, - "%s:%s:%d Sending \'match\' with data csum:0x%x, header csum:0x%x, size:%lu \n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, - hdr->hdr_match.hdr_csum, hdr->hdr_common.hdr_csum, (unsigned long)max_data)); - - csum_hdr_hton(hdr, MCA_PML_CSUM_HDR_TYPE_RNDV, - sendreq->req_send.req_base.req_proc); - - /* update lengths */ - segment->seg_len = sizeof(mca_pml_csum_rendezvous_hdr_t) + max_data; - - des->des_cbfunc = mca_pml_csum_rndv_completion; - des->des_cbdata = sendreq; - - /* buffer the remainder of the message */ - rc = mca_pml_base_bsend_request_alloc((ompi_request_t*)sendreq); - if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) { - mca_bml_base_free(bml_btl, des); - return rc; - } - - iov.iov_base = (IOVBASE_TYPE*)(((unsigned char*)sendreq->req_send.req_addr) + max_data); - iov.iov_len = max_data = sendreq->req_send.req_bytes_packed - max_data; - - if((rc = opal_convertor_pack( &sendreq->req_send.req_base.req_convertor, - &iov, - &iov_count, - &max_data)) < 0) { - mca_bml_base_free(bml_btl, des); - return rc; - } - - /* re-init convertor for packed data */ - opal_convertor_prepare_for_send( &sendreq->req_send.req_base.req_convertor, - &(ompi_mpi_byte.dt.super), - sendreq->req_send.req_bytes_packed, - sendreq->req_send.req_addr ); - - /* wait for ack and completion */ - sendreq->req_state = 2; - - /* request is complete at mpi level */ - OPAL_THREAD_LOCK(&ompi_request_lock); - MCA_PML_CSUM_SEND_REQUEST_MPI_COMPLETE(sendreq, true); - OPAL_THREAD_UNLOCK(&ompi_request_lock); - - /* send */ - rc = mca_bml_base_send(bml_btl, des, MCA_PML_CSUM_HDR_TYPE_RNDV); - if( OPAL_LIKELY( rc >= 0 ) ) { - if( OPAL_LIKELY( 1 == rc ) ) { - mca_pml_csum_rndv_completion_request( bml_btl, sendreq, req_bytes_delivered); - } - return OMPI_SUCCESS; - } - mca_bml_base_free(bml_btl, des ); - return rc; -} - - -/** - * We work on a buffered request with a size smaller than the eager size - * or the BTL is not able to send the data IN_PLACE. Request a segment - * that is used for initial hdr and any eager data. This is used only - * from the _START macro. - */ -int mca_pml_csum_send_request_start_copy( mca_pml_csum_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size ) -{ - mca_btl_base_descriptor_t* des = NULL; - mca_btl_base_segment_t* segment; - mca_pml_csum_hdr_t* hdr; - struct iovec iov; - unsigned int iov_count; - size_t max_data = size; - int rc; - - if(NULL != bml_btl->btl->btl_sendi && 0) { - mca_pml_csum_match_hdr_t match; - match.hdr_common.hdr_flags = 0; - match.hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_MATCH; - match.hdr_common.hdr_csum = 0; - match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - match.hdr_tag = sendreq->req_send.req_base.req_tag; - match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - match.hdr_csum = (size > 0 ? - sendreq->req_send.req_base.req_convertor.checksum : OPAL_CSUM_ZERO); - match.hdr_common.hdr_csum = opal_csum16(&match, OMPI_PML_CSUM_MATCH_HDR_LEN); - - OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, - "%s:%s:%d Sending \'match\' with data csum:0x%x, header csum:0x%x, size:%lu \n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, - match.hdr_csum, match.hdr_common.hdr_csum, (unsigned long)max_data)); - - csum_hdr_hton(&match, MCA_PML_CSUM_HDR_TYPE_MATCH, - sendreq->req_send.req_base.req_proc); - - /* try to send immediately */ - rc = mca_bml_base_sendi( bml_btl, &sendreq->req_send.req_base.req_convertor, - &match, OMPI_PML_CSUM_MATCH_HDR_LEN, - size, MCA_BTL_NO_ORDER, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, - MCA_PML_CSUM_HDR_TYPE_MATCH, - &des); - if( OPAL_LIKELY(OMPI_SUCCESS == rc) ) { - /* signal request completion */ - send_request_pml_complete(sendreq); - - /* check for pending requests */ - MCA_PML_CSUM_PROGRESS_PENDING(bml_btl); - return OMPI_SUCCESS; - } - } else { - /* allocate descriptor */ - mca_bml_base_alloc( bml_btl, &des, - MCA_BTL_NO_ORDER, - OMPI_PML_CSUM_MATCH_HDR_LEN + size, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - } - if( OPAL_UNLIKELY(NULL == des) ) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - segment = des->des_src; - - if(size > 0) { - /* pack the data into the supplied buffer */ - iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)segment->seg_addr.pval + - OMPI_PML_CSUM_MATCH_HDR_LEN); - iov.iov_len = size; - iov_count = 1; - /* - * Before copy the user buffer, make the target part - * accessible. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - (void)opal_convertor_pack( &sendreq->req_send.req_base.req_convertor, - &iov, &iov_count, &max_data ); - /* - * Packing finished, make the user buffer unaccessable. - */ - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - } - - - /* build match header */ - hdr = (mca_pml_csum_hdr_t*)segment->seg_addr.pval; - hdr->hdr_common.hdr_flags = 0; - hdr->hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_MATCH; - hdr->hdr_common.hdr_csum = 0; - hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - hdr->hdr_match.hdr_csum = (size > 0 ? - sendreq->req_send.req_base.req_convertor.checksum : OPAL_CSUM_ZERO); - hdr->hdr_common.hdr_csum = opal_csum16(hdr, OMPI_PML_CSUM_MATCH_HDR_LEN); - - OPAL_OUTPUT_VERBOSE((5, mca_pml_base_output, - "%s:%s:%d common_hdr: %02x:%02x:%04x match_hdr: %04x:%04x:%08x:%08x:%08x", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, - hdr->hdr_common.hdr_type, hdr->hdr_common.hdr_flags, hdr->hdr_common.hdr_csum, - hdr->hdr_match.hdr_ctx, hdr->hdr_match.hdr_seq, hdr->hdr_match.hdr_src, hdr->hdr_match.hdr_tag, hdr->hdr_match.hdr_csum)); - - OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, - "%s:%s:%d Sending \'match\' with data csum:0x%x, header csum:0x%x, size:%lu \n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, - hdr->hdr_match.hdr_csum, hdr->hdr_common.hdr_csum, (unsigned long)max_data)); - - csum_hdr_hton(hdr, MCA_PML_CSUM_HDR_TYPE_MATCH, - sendreq->req_send.req_base.req_proc); - - /* update lengths */ - segment->seg_len = OMPI_PML_CSUM_MATCH_HDR_LEN + max_data; - - /* short message */ - des->des_cbdata = sendreq; - des->des_cbfunc = mca_pml_csum_match_completion_free; - - /* send */ - rc = mca_bml_base_send_status(bml_btl, des, MCA_PML_CSUM_HDR_TYPE_MATCH); - if( OPAL_LIKELY( rc >= OMPI_SUCCESS ) ) { - if( OPAL_LIKELY( 1 == rc ) ) { - mca_pml_csum_match_completion_free_request( bml_btl, sendreq ); - } - return OMPI_SUCCESS; - } - switch(rc) { - case OMPI_ERR_RESOURCE_BUSY: - /* No more resources. Allow the upper level to queue the send */ - rc = OMPI_ERR_OUT_OF_RESOURCE; - break; - default: - mca_bml_base_free(bml_btl, des); - break; - } - return rc; -} - -/** - * BTL can send directly from user buffer so allow the BTL - * to prepare the segment list. Start sending a small message. - */ - -int mca_pml_csum_send_request_start_prepare( mca_pml_csum_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size ) -{ - mca_btl_base_descriptor_t* des; - mca_btl_base_segment_t* segment; - mca_pml_csum_hdr_t* hdr; - int rc; - - /* prepare descriptor */ - mca_bml_base_prepare_src( bml_btl, - NULL, - &sendreq->req_send.req_base.req_convertor, - MCA_BTL_NO_ORDER, - OMPI_PML_CSUM_MATCH_HDR_LEN, - &size, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, - &des ); - if( OPAL_UNLIKELY(NULL == des) ) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - segment = des->des_src; - - /* build match header */ - hdr = (mca_pml_csum_hdr_t*)segment->seg_addr.pval; - hdr->hdr_common.hdr_flags = 0; - hdr->hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_MATCH; - hdr->hdr_common.hdr_csum = 0; - hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - hdr->hdr_match.hdr_csum = (size > 0 ? - sendreq->req_send.req_base.req_convertor.checksum : OPAL_CSUM_ZERO); - hdr->hdr_common.hdr_csum = opal_csum16(hdr, OMPI_PML_CSUM_MATCH_HDR_LEN); - - OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, - "%s:%s:%d Sending \'match\' with data csum:0x%x, header csum:0x%x, size:%lu \n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, - hdr->hdr_match.hdr_csum, hdr->hdr_common.hdr_csum, (unsigned long)size)); - - csum_hdr_hton(hdr, MCA_PML_CSUM_HDR_TYPE_MATCH, - sendreq->req_send.req_base.req_proc); - - /* short message */ - des->des_cbfunc = mca_pml_csum_match_completion_free; - des->des_cbdata = sendreq; - - /* send */ - rc = mca_bml_base_send(bml_btl, des, MCA_PML_CSUM_HDR_TYPE_MATCH); - if( OPAL_LIKELY( rc >= 0 ) ) { - if( OPAL_LIKELY( 1 == rc ) ) { - mca_pml_csum_match_completion_free_request( bml_btl, sendreq ); - } - return OMPI_SUCCESS; - } - mca_bml_base_free(bml_btl, des ); - return rc; -} - - -/** - * We have contigous data that is registered - schedule across - * available nics. - */ - -int mca_pml_csum_send_request_start_rdma( mca_pml_csum_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size ) -{ - /* - * When req_rdma array is constructed the first element of the array always - * assigned different btl in round robin fashion (if there are more than - * one RDMA capable BTLs). This way round robin distribution of RDMA - * operation is achieved. - */ - - mca_btl_base_descriptor_t* des, *src = NULL; - mca_btl_base_segment_t* segment; - mca_pml_csum_hdr_t* hdr; - bool need_local_cb = false; - int rc; - - bml_btl = sendreq->req_rdma[0].bml_btl; - if((sendreq->req_rdma_cnt == 1) && (bml_btl->btl_flags & MCA_BTL_FLAGS_GET)) { - mca_mpool_base_registration_t* reg = sendreq->req_rdma[0].btl_reg; - mca_btl_base_descriptor_t* src; - size_t seg_size; - size_t old_position = sendreq->req_send.req_base.req_convertor.bConverted; - - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - /* prepare source descriptor/segment(s) */ - /* PML owns this descriptor and will free it in */ - /* get_completion */ - mca_bml_base_prepare_src( bml_btl, - reg, - &sendreq->req_send.req_base.req_convertor, - MCA_BTL_NO_ORDER, - 0, - &size, - MCA_BTL_DES_FLAGS_GET, - &src ); - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - if( OPAL_UNLIKELY(NULL == src) ) { - opal_convertor_set_position(&sendreq->req_send.req_base.req_convertor, - &old_position); - return OMPI_ERR_OUT_OF_RESOURCE; - } - src->des_cbfunc = mca_pml_csum_rget_completion; - src->des_cbdata = sendreq; - - seg_size = bml_btl->btl->btl_seg_size * src->des_src_cnt; - - /* allocate space for get hdr + segment list */ - mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, - sizeof(mca_pml_csum_rget_hdr_t) + seg_size, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - if( OPAL_UNLIKELY(NULL == des) ) { - opal_convertor_set_position( &sendreq->req_send.req_base.req_convertor, - &old_position ); - mca_bml_base_free(bml_btl, src); - return OMPI_ERR_OUT_OF_RESOURCE; - } - segment = des->des_src; - - /* build match header */ - hdr = (mca_pml_csum_hdr_t*)segment->seg_addr.pval; - hdr->hdr_common.hdr_flags = MCA_PML_CSUM_HDR_FLAGS_CONTIG|MCA_PML_CSUM_HDR_FLAGS_PIN; - hdr->hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_RGET; - hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed; - hdr->hdr_rndv.hdr_src_req.pval = sendreq; - hdr->hdr_rget.hdr_des.pval = src; - hdr->hdr_rget.hdr_seg_cnt = src->des_src_cnt; - - csum_hdr_hton(hdr, MCA_PML_CSUM_HDR_TYPE_RGET, - sendreq->req_send.req_base.req_proc); - - /* copy segment data */ - memmove (&hdr->hdr_rget + 1, src->des_src, seg_size); - - des->des_cbfunc = mca_pml_csum_send_ctl_completion; - - /** - * Well, it's a get so we will not know when the peer get the data anyway. - * If we generate the PERUSE event here, at least we will know when do we - * sent the GET message ... - */ - if( sendreq->req_send.req_bytes_packed > 0 ) { - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_BEGIN, - &(sendreq->req_send.req_base), PERUSE_SEND ); - } - - } else { - - /* allocate a rendezvous header - dont eager send any data - * receiver will schedule rdma put(s) of the entire message - */ - - mca_bml_base_alloc(bml_btl, &des, - MCA_BTL_NO_ORDER, - sizeof(mca_pml_csum_rendezvous_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); - if( OPAL_UNLIKELY(NULL == des)) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - segment = des->des_src; - - /* build hdr */ - hdr = (mca_pml_csum_hdr_t*)segment->seg_addr.pval; - hdr->hdr_common.hdr_flags = MCA_PML_CSUM_HDR_FLAGS_CONTIG|MCA_PML_CSUM_HDR_FLAGS_PIN; - hdr->hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_RNDV; - hdr->hdr_common.hdr_csum = OPAL_CSUM_ZERO; - hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed; - hdr->hdr_rndv.hdr_src_req.pval = sendreq; - hdr->hdr_match.hdr_csum = OPAL_CSUM_ZERO; - hdr->hdr_common.hdr_csum = opal_csum16(hdr, sizeof(mca_pml_csum_rendezvous_hdr_t)); - - OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, - "%s Sending \'rndv\'(initiate RDMA PUT) with data csum:0x%x, header csum:0x%04x, size:%lu\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), hdr->hdr_match.hdr_csum, hdr->hdr_common.hdr_csum, (unsigned long)0)); - - csum_hdr_hton(hdr, MCA_PML_CSUM_HDR_TYPE_RNDV, - sendreq->req_send.req_base.req_proc); - - /* update lengths with number of bytes actually packed */ - segment->seg_len = sizeof(mca_pml_csum_rendezvous_hdr_t); - - /* first fragment of a long message */ - des->des_cbfunc = mca_pml_csum_rndv_completion; - need_local_cb = true; - - /* wait for ack and completion */ - sendreq->req_state = 2; - } - - des->des_cbdata = sendreq; - - /* send */ - rc = mca_bml_base_send(bml_btl, des, hdr->hdr_common.hdr_type); - if( OPAL_LIKELY( rc >= 0 ) ) { - if( OPAL_LIKELY( 1 == rc ) && (true == need_local_cb)) { - mca_pml_csum_rndv_completion_request( bml_btl, sendreq, 0 ); - } - return OMPI_SUCCESS; - } - mca_bml_base_free(bml_btl, des); - if (src) { - mca_bml_base_free (bml_btl, src); - } - - return rc; -} - - -/** - * Rendezvous is required. Not doing rdma so eager send up to - * the btls eager limit. - */ - -int mca_pml_csum_send_request_start_rndv( mca_pml_csum_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size, - int flags ) -{ - mca_btl_base_descriptor_t* des; - mca_btl_base_segment_t* segment; - mca_pml_csum_hdr_t* hdr; - int rc; - - /* prepare descriptor */ - if(size == 0) { - mca_bml_base_alloc( bml_btl, - &des, - MCA_BTL_NO_ORDER, - sizeof(mca_pml_csum_rendezvous_hdr_t), - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP ); - } else { - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - mca_bml_base_prepare_src( bml_btl, - NULL, - &sendreq->req_send.req_base.req_convertor, - MCA_BTL_NO_ORDER, - sizeof(mca_pml_csum_rendezvous_hdr_t), - &size, - MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, - &des ); - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - } - - if( OPAL_UNLIKELY(NULL == des) ) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - segment = des->des_src; - - /* build hdr */ - hdr = (mca_pml_csum_hdr_t*)segment->seg_addr.pval; - hdr->hdr_common.hdr_flags = flags; - hdr->hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_RNDV; - hdr->hdr_common.hdr_csum = 0; - hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; - hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence; - hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed; - hdr->hdr_rndv.hdr_src_req.pval = sendreq; - hdr->hdr_match.hdr_csum = (size > 0 ? - sendreq->req_send.req_base.req_convertor.checksum : OPAL_CSUM_ZERO); - hdr->hdr_common.hdr_csum = opal_csum16(hdr, sizeof(mca_pml_csum_rendezvous_hdr_t)); - - OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, - "%s Sending \'rndv\' with data csum:0x%x, header csum:0x%04x, size:%lu\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), hdr->hdr_match.hdr_csum, hdr->hdr_common.hdr_csum, (unsigned long)size)); - - csum_hdr_hton(hdr, MCA_PML_CSUM_HDR_TYPE_RNDV, - sendreq->req_send.req_base.req_proc); - - /* first fragment of a long message */ - des->des_cbdata = sendreq; - des->des_cbfunc = mca_pml_csum_rndv_completion; - - /* wait for ack and completion */ - sendreq->req_state = 2; - - /* send */ - rc = mca_bml_base_send(bml_btl, des, MCA_PML_CSUM_HDR_TYPE_RNDV); - if( OPAL_LIKELY( rc >= 0 ) ) { - if( OPAL_LIKELY( 1 == rc ) ) { - mca_pml_csum_rndv_completion_request( bml_btl, sendreq, size ); - } - return OMPI_SUCCESS; - } - mca_bml_base_free(bml_btl, des ); - return rc; -} - -void mca_pml_csum_send_request_copy_in_out( mca_pml_csum_send_request_t *sendreq, - uint64_t send_offset, - uint64_t send_length ) -{ - mca_pml_csum_send_range_t *sr; - ompi_free_list_item_t *i; - mca_bml_base_endpoint_t* bml_endpoint = sendreq->req_endpoint; - int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_send); - int rc, n; - double weight_total = 0; - - if( OPAL_UNLIKELY(0 == send_length) ) - return; - - OMPI_FREE_LIST_WAIT(&mca_pml_csum.send_ranges, i, rc); - - sr = (mca_pml_csum_send_range_t*)i; - - sr->range_send_offset = send_offset; - sr->range_send_length = send_length; - sr->range_btl_idx = 0; - - for(n = 0; n < num_btls && n < mca_pml_csum.max_send_per_range; n++) { - sr->range_btls[n].bml_btl = - mca_bml_base_btl_array_get_next(&bml_endpoint->btl_send); - weight_total += sr->range_btls[n].bml_btl->btl_weight; - } - - sr->range_btl_cnt = n; - mca_pml_csum_calc_weighted_length(sr->range_btls, n, send_length, - weight_total); - - OPAL_THREAD_LOCK(&sendreq->req_send_range_lock); - opal_list_append(&sendreq->req_send_ranges, (opal_list_item_t*)sr); - OPAL_THREAD_UNLOCK(&sendreq->req_send_range_lock); -} - -static inline mca_pml_csum_send_range_t * -get_send_range_nolock(mca_pml_csum_send_request_t* sendreq) -{ - opal_list_item_t *item; - - item = opal_list_get_first(&sendreq->req_send_ranges); - - if(opal_list_get_end(&sendreq->req_send_ranges) == item) - return NULL; - - return (mca_pml_csum_send_range_t*)item; -} - -static inline mca_pml_csum_send_range_t * -get_send_range(mca_pml_csum_send_request_t* sendreq) -{ - mca_pml_csum_send_range_t *range; - - OPAL_THREAD_LOCK(&sendreq->req_send_range_lock); - range = get_send_range_nolock(sendreq); - OPAL_THREAD_UNLOCK(&sendreq->req_send_range_lock); - - return range; -} - -static inline mca_pml_csum_send_range_t * -get_next_send_range(mca_pml_csum_send_request_t* sendreq, - mca_pml_csum_send_range_t *range) -{ - OPAL_THREAD_LOCK(&sendreq->req_send_range_lock); - opal_list_remove_item(&sendreq->req_send_ranges, (opal_list_item_t *)range); - OMPI_FREE_LIST_RETURN(&mca_pml_csum.send_ranges, &range->base); - range = get_send_range_nolock(sendreq); - OPAL_THREAD_UNLOCK(&sendreq->req_send_range_lock); - - return range; -} - -/** - * Schedule pipeline of send descriptors for the given request. - * Up to the rdma threshold. If this is a send based protocol, - * the rdma threshold is the end of the message. Otherwise, schedule - * fragments up to the threshold to overlap initial registration/setup - * costs of the rdma. Only one thread can be inside this function. - */ - -int -mca_pml_csum_send_request_schedule_once(mca_pml_csum_send_request_t* sendreq) -{ - size_t prev_bytes_remaining = 0; - mca_pml_csum_send_range_t *range; - int num_fail = 0; - - /* check pipeline_depth here before attempting to get any locks */ - if(true == sendreq->req_throttle_sends && - sendreq->req_pipeline_depth >= mca_pml_csum.send_pipeline_depth) - return OMPI_SUCCESS; - - range = get_send_range(sendreq); - - while(range && (false == sendreq->req_throttle_sends || - sendreq->req_pipeline_depth < mca_pml_csum.send_pipeline_depth)) { - mca_pml_csum_frag_hdr_t* hdr; - mca_btl_base_descriptor_t* des; - int rc, btl_idx; - size_t size, offset, data_remaining = 0; - mca_bml_base_btl_t* bml_btl; - - assert(range->range_send_length != 0); - - if(prev_bytes_remaining == range->range_send_length) - num_fail++; - else - num_fail = 0; - - prev_bytes_remaining = range->range_send_length; - - if( OPAL_UNLIKELY(num_fail == range->range_btl_cnt) ) { - assert(sendreq->req_pending == MCA_PML_CSUM_SEND_PENDING_NONE); - add_request_to_send_pending(sendreq, - MCA_PML_CSUM_SEND_PENDING_SCHEDULE, true); - /* Note that request remains locked. send_request_process_pending() - * function will call shedule_exclusive() directly without taking - * the lock */ - return OMPI_ERR_OUT_OF_RESOURCE; - } - -cannot_pack: - do { - btl_idx = range->range_btl_idx; - if(++range->range_btl_idx == range->range_btl_cnt) - range->range_btl_idx = 0; - } while(!range->range_btls[btl_idx].length); - - bml_btl = range->range_btls[btl_idx].bml_btl; - /* If there is a remaining data from another BTL that was too small - * for converter to pack then send it through another BTL */ - range->range_btls[btl_idx].length += data_remaining; - size = range->range_btls[btl_idx].length; - - /* makes sure that we don't exceed BTL max send size */ - if(bml_btl->btl->btl_max_send_size != 0) { - size_t max_send_size = bml_btl->btl->btl_max_send_size - - sizeof(mca_pml_csum_frag_hdr_t); - - if (size > max_send_size) { - size = max_send_size; - } - } - - /* pack into a descriptor */ - offset = (size_t)range->range_send_offset; - opal_convertor_set_position(&sendreq->req_send.req_base.req_convertor, - &offset); - range->range_send_offset = (uint64_t)offset; - - data_remaining = size; - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_defined, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - mca_bml_base_prepare_src(bml_btl, NULL, - &sendreq->req_send.req_base.req_convertor, - MCA_BTL_NO_ORDER, - sizeof(mca_pml_csum_frag_hdr_t), - &size, MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK, &des); - MEMCHECKER( - memchecker_call(&opal_memchecker_base_mem_noaccess, - sendreq->req_send.req_base.req_addr, - sendreq->req_send.req_base.req_count, - sendreq->req_send.req_base.req_datatype); - ); - - if( OPAL_UNLIKELY(des == NULL || size == 0) ) { - if(des) { - /* Converter can't pack this chunk. Append to another chunk - * from other BTL */ - mca_bml_base_free(bml_btl, des); - range->range_btls[btl_idx].length -= data_remaining; - goto cannot_pack; - } - continue; - } - - des->des_cbfunc = mca_pml_csum_frag_completion; - des->des_cbdata = sendreq; - - /* setup header */ - hdr = (mca_pml_csum_frag_hdr_t*)des->des_src->seg_addr.pval; - hdr->hdr_common.hdr_flags = 0; - hdr->hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_FRAG; - hdr->hdr_common.hdr_csum = 0; - hdr->hdr_frag_offset = range->range_send_offset; - hdr->hdr_src_req.pval = sendreq; - hdr->hdr_dst_req = sendreq->req_recv; - hdr->hdr_csum = (size > 0 ? - sendreq->req_send.req_base.req_convertor.checksum : OPAL_CSUM_ZERO); - hdr->hdr_common.hdr_csum = opal_csum16(hdr, sizeof(mca_pml_csum_frag_hdr_t)); - - OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output, - "%s Sending \'frag\' with data csum:0x%x, header csum:0x%04x, size:%lu\n", - OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), hdr->hdr_csum, hdr->hdr_common.hdr_csum, (unsigned long)size)); - - csum_hdr_hton(hdr, MCA_PML_CSUM_HDR_TYPE_FRAG, - sendreq->req_send.req_base.req_proc); - -#if OMPI_WANT_PERUSE - PERUSE_TRACE_COMM_OMPI_EVENT(PERUSE_COMM_REQ_XFER_CONTINUE, - &(sendreq->req_send.req_base), size, PERUSE_SEND); -#endif /* OMPI_WANT_PERUSE */ - - /* initiate send - note that this may complete before the call returns */ - rc = mca_bml_base_send(bml_btl, des, MCA_PML_CSUM_HDR_TYPE_FRAG); - if( OPAL_LIKELY(rc >= 0) ) { - /* update state */ - range->range_btls[btl_idx].length -= size; - range->range_send_length -= size; - range->range_send_offset += size; - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth, 1); - if(range->range_send_length == 0) { - range = get_next_send_range(sendreq, range); - prev_bytes_remaining = 0; - } - } else { - mca_bml_base_free(bml_btl,des); - } - } - - return OMPI_SUCCESS; -} - - -/** - * An RDMA put operation has completed: - * (1) Update request status and if required set completed - * (2) Send FIN control message to the destination - */ - -static void mca_pml_csum_put_completion( mca_btl_base_module_t* btl, - struct mca_btl_base_endpoint_t* ep, - struct mca_btl_base_descriptor_t* des, - int status ) -{ - mca_pml_csum_rdma_frag_t* frag = (mca_pml_csum_rdma_frag_t*)des->des_cbdata; - mca_pml_csum_send_request_t* sendreq = (mca_pml_csum_send_request_t*)frag->rdma_req; - mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context; - - /* check completion status */ - if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) { - /* TSW - FIX */ - OMPI_ERROR_LOG(status); - ompi_rte_abort(-1, NULL); - } - - mca_pml_csum_send_fin(sendreq->req_send.req_base.req_proc, - bml_btl, - frag->rdma_hdr.hdr_rdma.hdr_des, - des->order, 0); - - /* check for request completion */ - OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length); - - send_request_pml_complete_check(sendreq); - - MCA_PML_CSUM_RDMA_FRAG_RETURN(frag); - - MCA_PML_CSUM_PROGRESS_PENDING(bml_btl); -} - -int mca_pml_csum_send_request_put_frag( mca_pml_csum_rdma_frag_t* frag ) -{ - mca_mpool_base_registration_t* reg = NULL; - mca_bml_base_btl_t* bml_btl = frag->rdma_bml; - mca_btl_base_descriptor_t* des; - size_t save_size = frag->rdma_length; - int rc; - - /* setup descriptor */ - mca_bml_base_prepare_src( bml_btl, - reg, - &frag->convertor, - MCA_BTL_NO_ORDER, - 0, - &frag->rdma_length, - MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | - MCA_BTL_DES_FLAGS_PUT, - &des ); - - if( OPAL_UNLIKELY(NULL == des) ) { - if(frag->retries < mca_pml_csum.rdma_put_retries_limit) { - size_t offset = (size_t)frag->rdma_hdr.hdr_rdma.hdr_rdma_offset; - frag->rdma_length = save_size; - opal_convertor_set_position(&frag->convertor, &offset); - OPAL_THREAD_LOCK(&mca_pml_csum.lock); - opal_list_append(&mca_pml_csum.rdma_pending, (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&mca_pml_csum.lock); - } else { - mca_pml_csum_send_request_t *sendreq = - (mca_pml_csum_send_request_t*)frag->rdma_req; - - /* tell receiver to unregister memory */ - mca_pml_csum_send_fin(sendreq->req_send.req_base.req_proc, - bml_btl, frag->rdma_hdr.hdr_rdma.hdr_des, - MCA_BTL_NO_ORDER, 1); - - /* send fragment by copy in/out */ - mca_pml_csum_send_request_copy_in_out(sendreq, - frag->rdma_hdr.hdr_rdma.hdr_rdma_offset, frag->rdma_length); - /* if a pointer to a receive request is not set it means that - * ACK was not yet received. Don't schedule sends before ACK */ - if(NULL != sendreq->req_recv.pval) - mca_pml_csum_send_request_schedule(sendreq); - } - return OMPI_ERR_OUT_OF_RESOURCE; - } - - des->des_dst = (mca_btl_base_segment_t *) frag->rdma_segs; - des->des_dst_cnt = frag->rdma_hdr.hdr_rdma.hdr_seg_cnt; - des->des_cbfunc = mca_pml_csum_put_completion; - des->des_cbdata = frag; - - PERUSE_TRACE_COMM_OMPI_EVENT( PERUSE_COMM_REQ_XFER_CONTINUE, - &(((mca_pml_csum_send_request_t*)frag->rdma_req)->req_send.req_base), save_size, PERUSE_SEND ); - - rc = mca_bml_base_put(bml_btl, des); - if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) { - mca_bml_base_free(bml_btl, des); - frag->rdma_length = save_size; - if(OMPI_ERR_OUT_OF_RESOURCE == rc) { - OPAL_THREAD_LOCK(&mca_pml_csum.lock); - opal_list_append(&mca_pml_csum.rdma_pending, (opal_list_item_t*)frag); - OPAL_THREAD_UNLOCK(&mca_pml_csum.lock); - return OMPI_ERR_OUT_OF_RESOURCE; - } else { - /* TSW - FIX */ - OMPI_ERROR_LOG(rc); - ompi_rte_abort(-1, NULL); - } - } - return OMPI_SUCCESS; -} - -/** - * Receiver has scheduled an RDMA operation: - * (1) Allocate an RDMA fragment to maintain the state of the operation - * (2) Call BTL prepare_src to pin/prepare source buffers - * (3) Queue the RDMA put - */ - -void mca_pml_csum_send_request_put( mca_pml_csum_send_request_t* sendreq, - mca_btl_base_module_t* btl, - mca_pml_csum_rdma_hdr_t* hdr ) -{ - mca_bml_base_endpoint_t *bml_endpoint = sendreq->req_endpoint; - mca_pml_csum_rdma_frag_t* frag; - int rc; - size_t i, size = 0; - - if(hdr->hdr_common.hdr_flags & MCA_PML_CSUM_HDR_TYPE_ACK) { - OPAL_THREAD_ADD32(&sendreq->req_state, -1); - } - - MCA_PML_CSUM_RDMA_FRAG_ALLOC(frag, rc); - - if( OPAL_UNLIKELY(NULL == frag) ) { - /* TSW - FIX */ - OMPI_ERROR_LOG(rc); - ompi_rte_abort(-1, NULL); - } - - assert (btl->btl_seg_size * hdr->hdr_seg_cnt <= sizeof (frag->rdma_segs)); - - /* setup fragment */ - memmove (frag->rdma_segs, hdr + 1, btl->btl_seg_size * hdr->hdr_seg_cnt); - - for( i = 0; i < hdr->hdr_seg_cnt; i++ ) { - mca_btl_base_segment_t *seg = (mca_btl_base_segment_t *) ((uintptr_t)(frag->rdma_segs) + i * btl->btl_seg_size); - -#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT - if ((sendreq->req_send.req_base.req_proc->proc_arch & OPAL_ARCH_ISBIGENDIAN) != - (ompi_proc_local()->proc_arch & OPAL_ARCH_ISBIGENDIAN)) { - size += opal_swap_bytes4(seg->seg_len); - } else -#endif - { - size += seg->seg_len; - } - } - - frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl); - frag->rdma_hdr.hdr_rdma = *hdr; - frag->rdma_req = sendreq; - frag->rdma_ep = bml_endpoint; - frag->rdma_length = size; - frag->rdma_state = MCA_PML_CSUM_RDMA_PUT; - frag->reg = NULL; - frag->retries = 0; - - /* lookup the corresponding registration */ - for(i=0; ireq_rdma_cnt; i++) { - if(sendreq->req_rdma[i].bml_btl == frag->rdma_bml) { - frag->reg = sendreq->req_rdma[i].btl_reg; - break; - } - } - - /* RDMA writes may proceed in parallel to send and to each other, so - * create clone of the convertor for each RDMA fragment - */ - size = hdr->hdr_rdma_offset; - opal_convertor_clone_with_position(&sendreq->req_send.req_base.req_convertor, - &frag->convertor, 0, &size); - - mca_pml_csum_send_request_put_frag(frag); -} - diff --git a/ompi/mca/pml/csum/pml_csum_sendreq.h b/ompi/mca/pml/csum/pml_csum_sendreq.h deleted file mode 100644 index f2e84840f7..0000000000 --- a/ompi/mca/pml/csum/pml_csum_sendreq.h +++ /dev/null @@ -1,470 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2010 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2009 IBM Corporation. All rights reserved. - * Copyright (c) 2009-2012 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OMPI_PML_CSUM_SEND_REQUEST_H -#define OMPI_PML_CSUM_SEND_REQUEST_H - -#include "ompi/mca/btl/btl.h" -#include "ompi/mca/pml/base/pml_base_sendreq.h" -#include "ompi/mca/mpool/base/base.h" -#include "pml_csum_comm.h" -#include "pml_csum_hdr.h" -#include "pml_csum_rdma.h" -#include "pml_csum_rdmafrag.h" -#include "opal/datatype/opal_convertor.h" -#include "ompi/mca/bml/bml.h" - -BEGIN_C_DECLS - -typedef enum { - MCA_PML_CSUM_SEND_PENDING_NONE, - MCA_PML_CSUM_SEND_PENDING_SCHEDULE, - MCA_PML_CSUM_SEND_PENDING_START -} mca_pml_csum_send_pending_t; - -struct mca_pml_csum_send_request_t { - mca_pml_base_send_request_t req_send; - mca_bml_base_endpoint_t* req_endpoint; - ompi_ptr_t req_recv; - int32_t req_state; - int32_t req_lock; - bool req_throttle_sends; - size_t req_pipeline_depth; - size_t req_bytes_delivered; - uint32_t req_rdma_cnt; - mca_pml_csum_send_pending_t req_pending; - opal_mutex_t req_send_range_lock; - opal_list_t req_send_ranges; - mca_pml_csum_com_btl_t req_rdma[1]; -}; -typedef struct mca_pml_csum_send_request_t mca_pml_csum_send_request_t; - -OBJ_CLASS_DECLARATION(mca_pml_csum_send_request_t); - -struct mca_pml_csum_send_range_t { - ompi_free_list_item_t base; - uint64_t range_send_offset; - uint64_t range_send_length; - int range_btl_idx; - int range_btl_cnt; - mca_pml_csum_com_btl_t range_btls[1]; -}; -typedef struct mca_pml_csum_send_range_t mca_pml_csum_send_range_t; -OBJ_CLASS_DECLARATION(mca_pml_csum_send_range_t); - -static inline bool lock_send_request(mca_pml_csum_send_request_t *sendreq) -{ - return OPAL_THREAD_ADD32(&sendreq->req_lock, 1) == 1; -} - -static inline bool unlock_send_request(mca_pml_csum_send_request_t *sendreq) -{ - return OPAL_THREAD_ADD32(&sendreq->req_lock, -1) == 0; -} - -static inline void -add_request_to_send_pending(mca_pml_csum_send_request_t* sendreq, - const mca_pml_csum_send_pending_t type, - const bool append) -{ - opal_list_item_t *item = (opal_list_item_t*)sendreq; - - OPAL_THREAD_LOCK(&mca_pml_csum.lock); - sendreq->req_pending = type; - if(append) - opal_list_append(&mca_pml_csum.send_pending, item); - else - opal_list_prepend(&mca_pml_csum.send_pending, item); - - OPAL_THREAD_UNLOCK(&mca_pml_csum.lock); -} - -static inline mca_pml_csum_send_request_t* -get_request_from_send_pending(mca_pml_csum_send_pending_t *type) -{ - mca_pml_csum_send_request_t *sendreq; - - OPAL_THREAD_LOCK(&mca_pml_csum.lock); - sendreq = (mca_pml_csum_send_request_t*) - opal_list_remove_first(&mca_pml_csum.send_pending); - if(sendreq) { - *type = sendreq->req_pending; - sendreq->req_pending = MCA_PML_CSUM_SEND_PENDING_NONE; - } - OPAL_THREAD_UNLOCK(&mca_pml_csum.lock); - - return sendreq; -} - -#define MCA_PML_CSUM_SEND_REQUEST_ALLOC( comm, \ - dst, \ - sendreq, \ - rc) \ - { \ - ompi_proc_t *proc = ompi_comm_peer_lookup( comm, dst ); \ - ompi_free_list_item_t* item; \ - \ - rc = OMPI_ERR_OUT_OF_RESOURCE; \ - if( OPAL_LIKELY(NULL != proc) ) { \ - rc = OMPI_SUCCESS; \ - OMPI_FREE_LIST_WAIT(&mca_pml_base_send_requests, item, rc); \ - sendreq = (mca_pml_csum_send_request_t*)item; \ - sendreq->req_send.req_base.req_proc = proc; \ - } \ - } - - -#define MCA_PML_CSUM_SEND_REQUEST_INIT(sendreq, \ - buf, \ - count, \ - datatype, \ - dst, \ - tag, \ - comm, \ - sendmode, \ - persistent) \ - { \ - MCA_PML_BASE_SEND_REQUEST_INIT(&sendreq->req_send, \ - buf, \ - count, \ - datatype, \ - dst, \ - tag, \ - comm, \ - sendmode, \ - persistent, \ - 0); /* convertor_flags */ \ - (sendreq)->req_recv.pval = NULL; \ - } - - -static inline void mca_pml_csum_free_rdma_resources(mca_pml_csum_send_request_t* sendreq) -{ - size_t r; - - /* return mpool resources */ - for(r = 0; r < sendreq->req_rdma_cnt; r++) { - mca_mpool_base_registration_t* reg = sendreq->req_rdma[r].btl_reg; - if( NULL != reg && reg->mpool != NULL ) { - reg->mpool->mpool_deregister(reg->mpool, reg); - } - } - sendreq->req_rdma_cnt = 0; -} - - -/** - * Start a send request. - */ - -#define MCA_PML_CSUM_SEND_REQUEST_START(sendreq, rc) \ - do { \ - rc = mca_pml_csum_send_request_start(sendreq); \ - } while (0) - - -/* - * Mark a send request as completed at the MPI level. - */ - -#define MCA_PML_CSUM_SEND_REQUEST_MPI_COMPLETE(sendreq, with_signal) \ -do { \ - (sendreq)->req_send.req_base.req_ompi.req_status.MPI_SOURCE = \ - (sendreq)->req_send.req_base.req_comm->c_my_rank; \ - (sendreq)->req_send.req_base.req_ompi.req_status.MPI_TAG = \ - (sendreq)->req_send.req_base.req_tag; \ - (sendreq)->req_send.req_base.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; \ - (sendreq)->req_send.req_base.req_ompi.req_status._ucount = \ - (sendreq)->req_send.req_bytes_packed; \ - ompi_request_complete( &((sendreq)->req_send.req_base.req_ompi), (with_signal) ); \ - \ - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \ - &(sendreq->req_send.req_base), PERUSE_SEND); \ -} while(0) - -/* - * Release resources associated with a request - */ - -#define MCA_PML_CSUM_SEND_REQUEST_RETURN(sendreq) \ - do { \ - /* Let the base handle the reference counts */ \ - MCA_PML_BASE_SEND_REQUEST_FINI((&(sendreq)->req_send)); \ - OMPI_FREE_LIST_RETURN( &mca_pml_base_send_requests, \ - (ompi_free_list_item_t*)sendreq); \ - } while(0) - - -/* - * The PML has completed a send request. Note that this request - * may have been orphaned by the user or have already completed - * at the MPI level. - * This function will never be called directly from the upper level, as it - * should only be an internal call to the PML. - * - */ -static inline void -send_request_pml_complete(mca_pml_csum_send_request_t *sendreq) -{ - assert(false == sendreq->req_send.req_base.req_pml_complete); - - if(sendreq->req_send.req_bytes_packed > 0) { - PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END, - &(sendreq->req_send.req_base), PERUSE_SEND); - } - - /* return mpool resources */ - mca_pml_csum_free_rdma_resources(sendreq); - - if (sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED && - sendreq->req_send.req_addr != sendreq->req_send.req_base.req_addr) { - mca_pml_base_bsend_request_fini((ompi_request_t*)sendreq); - } - - OPAL_THREAD_LOCK(&ompi_request_lock); - if(false == sendreq->req_send.req_base.req_ompi.req_complete) { - /* Should only be called for long messages (maybe synchronous) */ - MCA_PML_CSUM_SEND_REQUEST_MPI_COMPLETE(sendreq, true); - } - sendreq->req_send.req_base.req_pml_complete = true; - - if(sendreq->req_send.req_base.req_free_called) { - MCA_PML_CSUM_SEND_REQUEST_RETURN(sendreq); - } - OPAL_THREAD_UNLOCK(&ompi_request_lock); -} - -/* returns true if request was completed on PML level */ -static inline bool -send_request_pml_complete_check(mca_pml_csum_send_request_t *sendreq) -{ -#if OPAL_ENABLE_MULTI_THREADS - opal_atomic_rmb(); -#endif - /* if no more events are expected for the request and the whole message is - * already sent and send fragment scheduling isn't running in another - * thread then complete the request on PML level. From now on, if user - * called free on this request, the request structure can be reused for - * another request or if the request is persistent it can be restarted */ - if(sendreq->req_state == 0 && - sendreq->req_bytes_delivered >= sendreq->req_send.req_bytes_packed - && lock_send_request(sendreq)) { - send_request_pml_complete(sendreq); - return true; - } - - return false; -} - -/** - * Schedule additional fragments - */ -int -mca_pml_csum_send_request_schedule_once(mca_pml_csum_send_request_t*); - -static inline int -mca_pml_csum_send_request_schedule_exclusive(mca_pml_csum_send_request_t* sendreq) -{ - int rc; - do { - rc = mca_pml_csum_send_request_schedule_once(sendreq); - if(rc == OMPI_ERR_OUT_OF_RESOURCE) - break; - } while(!unlock_send_request(sendreq)); - - if(OMPI_SUCCESS == rc) - send_request_pml_complete_check(sendreq); - - return rc; -} - -static inline void -mca_pml_csum_send_request_schedule(mca_pml_csum_send_request_t* sendreq) -{ - /* - * Only allow one thread in this routine for a given request. - * However, we cannot block callers on a mutex, so simply keep track - * of the number of times the routine has been called and run through - * the scheduling logic once for every call. - */ - - if(!lock_send_request(sendreq)) - return; - - mca_pml_csum_send_request_schedule_exclusive(sendreq); -} - -/** - * Start the specified request - */ - -int mca_pml_csum_send_request_start_buffered( - mca_pml_csum_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size); - -int mca_pml_csum_send_request_start_copy( - mca_pml_csum_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size); - -int mca_pml_csum_send_request_start_prepare( - mca_pml_csum_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size); - -int mca_pml_csum_send_request_start_rdma( - mca_pml_csum_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size); - -int mca_pml_csum_send_request_start_rndv( - mca_pml_csum_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl, - size_t size, - int flags); - -static inline int -mca_pml_csum_send_request_start_btl( mca_pml_csum_send_request_t* sendreq, - mca_bml_base_btl_t* bml_btl ) -{ - size_t size = sendreq->req_send.req_bytes_packed; - mca_btl_base_module_t* btl = bml_btl->btl; - size_t eager_limit = btl->btl_eager_limit - sizeof(mca_pml_csum_hdr_t); - int rc; - - if( OPAL_LIKELY(size <= eager_limit) ) { - switch(sendreq->req_send.req_send_mode) { - case MCA_PML_BASE_SEND_SYNCHRONOUS: - rc = mca_pml_csum_send_request_start_rndv(sendreq, bml_btl, size, 0); - break; - case MCA_PML_BASE_SEND_BUFFERED: - rc = mca_pml_csum_send_request_start_copy(sendreq, bml_btl, size); - break; - case MCA_PML_BASE_SEND_COMPLETE: - rc = mca_pml_csum_send_request_start_prepare(sendreq, bml_btl, size); - break; - default: - if (size != 0 && bml_btl->btl_flags & MCA_BTL_FLAGS_SEND_INPLACE) { - rc = mca_pml_csum_send_request_start_prepare(sendreq, bml_btl, size); - } else { - rc = mca_pml_csum_send_request_start_copy(sendreq, bml_btl, size); - } - break; - } - } else { - size = eager_limit; - if(OPAL_UNLIKELY(btl->btl_rndv_eager_limit < eager_limit)) - size = btl->btl_rndv_eager_limit; - if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { - rc = mca_pml_csum_send_request_start_buffered(sendreq, bml_btl, size); - } else if - (opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) { - unsigned char *base; - opal_convertor_get_current_pointer( &sendreq->req_send.req_base.req_convertor, (void**)&base ); - - if( 0 != (sendreq->req_rdma_cnt = (uint32_t)mca_pml_csum_rdma_btls( - sendreq->req_endpoint, - base, - sendreq->req_send.req_bytes_packed, - sendreq->req_rdma))) { - rc = mca_pml_csum_send_request_start_rdma(sendreq, bml_btl, - sendreq->req_send.req_bytes_packed); - if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) { - mca_pml_csum_free_rdma_resources(sendreq); - } - } else { - rc = mca_pml_csum_send_request_start_rndv(sendreq, bml_btl, size, - MCA_PML_CSUM_HDR_FLAGS_CONTIG); - } - } else { - rc = mca_pml_csum_send_request_start_rndv(sendreq, bml_btl, size, 0); - } - } - - return rc; -} - -static inline int -mca_pml_csum_send_request_start( mca_pml_csum_send_request_t* sendreq ) -{ - mca_pml_csum_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm; - mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*) - sendreq->req_send.req_base.req_proc->proc_bml; - size_t i; - - if( OPAL_UNLIKELY(endpoint == NULL) ) { - return OMPI_ERR_UNREACH; - } - - sendreq->req_endpoint = endpoint; - sendreq->req_state = 0; - sendreq->req_lock = 0; - sendreq->req_pipeline_depth = 0; - sendreq->req_bytes_delivered = 0; - sendreq->req_pending = MCA_PML_CSUM_SEND_PENDING_NONE; - sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32( - &comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1); - - MCA_PML_BASE_SEND_START( &sendreq->req_send.req_base ); - - for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) { - mca_bml_base_btl_t* bml_btl; - int rc; - - /* select a btl */ - bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); - rc = mca_pml_csum_send_request_start_btl(sendreq, bml_btl); - if( OPAL_LIKELY(OMPI_ERR_OUT_OF_RESOURCE != rc) ) - return rc; - } - add_request_to_send_pending(sendreq, MCA_PML_CSUM_SEND_PENDING_START, true); - - return OMPI_SUCCESS; -} - -/** - * Initiate a put scheduled by the receiver. - */ - -void mca_pml_csum_send_request_put( mca_pml_csum_send_request_t* sendreq, - mca_btl_base_module_t* btl, - mca_pml_csum_rdma_hdr_t* hdr ); - -int mca_pml_csum_send_request_put_frag(mca_pml_csum_rdma_frag_t* frag); - -/* This function tries to continue sendreq that was stuck because of resource - * unavailability. A sendreq may be added to send_pending list if there is no - * resource to send initial packet or there is not resource to schedule data - * for sending. The reason the sendreq was added to the list is stored inside - * sendreq struct and appropriate operation is retried when resource became - * available. bml_btl passed to the function doesn't represents sendreq - * destination, it represents BTL on which resource was freed, so only this BTL - * should be considered for sending packets */ -void mca_pml_csum_send_request_process_pending(mca_bml_base_btl_t *bml_btl); - -void mca_pml_csum_send_request_copy_in_out(mca_pml_csum_send_request_t *sendreq, - uint64_t send_offset, uint64_t send_length); - -END_C_DECLS - -#endif /* OMPI_PML_CSUM_SEND_REQUEST_H */ diff --git a/ompi/mca/pml/csum/pml_csum_start.c b/ompi/mca/pml/csum/pml_csum_start.c deleted file mode 100644 index eb45a391e4..0000000000 --- a/ompi/mca/pml/csum/pml_csum_start.c +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "pml_csum.h" -#include "pml_csum_recvreq.h" -#include "pml_csum_sendreq.h" -#include "ompi/memchecker.h" - - -int mca_pml_csum_start(size_t count, ompi_request_t** requests) -{ - int rc; - size_t i; - bool reuse_old_request = true; - - for(i=0; ireq_type) { - continue; - } - - /* If the persistent request is currently active - obtain the - * request lock and verify the status is incomplete. if the - * pml layer has not completed the request - mark the request - * as free called - so that it will be freed when the request - * completes - and create a new request. - */ - - reuse_old_request = true; - switch(pml_request->req_ompi.req_state) { - case OMPI_REQUEST_INACTIVE: - if(pml_request->req_pml_complete == true) - break; - /* otherwise fall through */ - case OMPI_REQUEST_ACTIVE: { - - ompi_request_t *request; - OPAL_THREAD_LOCK(&ompi_request_lock); - if (pml_request->req_pml_complete == false) { - /* free request after it completes */ - pml_request->req_free_called = true; - } else { - /* can reuse the existing request */ - OPAL_THREAD_UNLOCK(&ompi_request_lock); - break; - } - - reuse_old_request = false; - /* allocate a new request */ - switch(pml_request->req_type) { - case MCA_PML_REQUEST_SEND: { - mca_pml_base_send_mode_t sendmode = - ((mca_pml_base_send_request_t*)pml_request)->req_send_mode; - rc = mca_pml_csum_isend_init( - pml_request->req_addr, - pml_request->req_count, - pml_request->req_datatype, - pml_request->req_peer, - pml_request->req_tag, - sendmode, - pml_request->req_comm, - &request); - break; - } - case MCA_PML_REQUEST_RECV: - rc = mca_pml_csum_irecv_init( - pml_request->req_addr, - pml_request->req_count, - pml_request->req_datatype, - pml_request->req_peer, - pml_request->req_tag, - pml_request->req_comm, - &request); - break; - default: - rc = OMPI_ERR_REQUEST; - break; - } - OPAL_THREAD_UNLOCK(&ompi_request_lock); - if(OMPI_SUCCESS != rc) - return rc; - pml_request = (mca_pml_base_request_t*)request; - requests[i] = request; - break; - } - default: - return OMPI_ERR_REQUEST; - } - - /* start the request */ - switch(pml_request->req_type) { - case MCA_PML_REQUEST_SEND: - { - mca_pml_csum_send_request_t* sendreq = (mca_pml_csum_send_request_t*)pml_request; - MEMCHECKER( - memchecker_call(&opal_memchecker_base_isdefined, - pml_request->req_addr, pml_request->req_count, - pml_request->req_datatype); - ); - if( reuse_old_request && (sendreq->req_send.req_bytes_packed != 0) ) { - size_t offset = 0; - /** - * Reset the convertor in case we're dealing with the original - * request, which when completed do not reset the convertor. - */ - opal_convertor_set_position( &sendreq->req_send.req_base.req_convertor, - &offset ); - } - MCA_PML_CSUM_SEND_REQUEST_START(sendreq, rc); - if(rc != OMPI_SUCCESS) - return rc; - break; - } - case MCA_PML_REQUEST_RECV: - { - mca_pml_csum_recv_request_t* recvreq = (mca_pml_csum_recv_request_t*)pml_request; - MCA_PML_CSUM_RECV_REQUEST_START(recvreq); - break; - } - default: - return OMPI_ERR_REQUEST; - } - } - return OMPI_SUCCESS; -} - diff --git a/ompi/mca/pml/csum/post_configure.sh b/ompi/mca/pml/csum/post_configure.sh deleted file mode 100644 index 68d1a0516f..0000000000 --- a/ompi/mca/pml/csum/post_configure.sh +++ /dev/null @@ -1 +0,0 @@ -DIRECT_CALL_HEADER="ompi/mca/pml/csum/pml_csum.h"