remove csum pml
This commit was SVN r28133.
Этот коммит содержится в:
родитель
1370d4569a
Коммит
b5a2cd1cce
@ -1,66 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
#
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
dist_pkgdata_DATA = \
|
||||
help-mpi-pml-csum.txt
|
||||
|
||||
EXTRA_DIST = post_configure.sh pml_csum_endpoint.c pml_csum_endpoint.h
|
||||
|
||||
csum_sources = \
|
||||
pml_csum.c \
|
||||
pml_csum.h \
|
||||
pml_csum_comm.c \
|
||||
pml_csum_comm.h \
|
||||
pml_csum_component.c \
|
||||
pml_csum_component.h \
|
||||
pml_csum_hdr.h \
|
||||
pml_csum_iprobe.c \
|
||||
pml_csum_irecv.c \
|
||||
pml_csum_isend.c \
|
||||
pml_csum_progress.c \
|
||||
pml_csum_rdma.c \
|
||||
pml_csum_rdma.h \
|
||||
pml_csum_rdmafrag.c \
|
||||
pml_csum_rdmafrag.h \
|
||||
pml_csum_recvfrag.c \
|
||||
pml_csum_recvfrag.h \
|
||||
pml_csum_recvreq.c \
|
||||
pml_csum_recvreq.h \
|
||||
pml_csum_sendreq.c \
|
||||
pml_csum_sendreq.h \
|
||||
pml_csum_start.c
|
||||
|
||||
if MCA_BUILD_ompi_pml_csum_DSO
|
||||
component_noinst =
|
||||
component_install = mca_pml_csum.la
|
||||
else
|
||||
component_noinst = libmca_pml_csum.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_pml_csum_la_SOURCES = $(csum_sources)
|
||||
mca_pml_csum_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_pml_csum_la_SOURCES = $(csum_sources)
|
||||
libmca_pml_csum_la_LDFLAGS = -module -avoid-version
|
@ -1,20 +0,0 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
[eager_limit_too_small]
|
||||
The "eager limit" MCA parameter in the %s BTL was set to a value which
|
||||
is too low for Open MPI to function properly. Please re-run your job
|
||||
with a higher eager limit value for this BTL; the exact MCA parameter
|
||||
name and its corresponding minimum value is shown below.
|
||||
|
||||
Local host: %s
|
||||
BTL name: %s
|
||||
BTL eager limit value: %d (set via btl_%s_eager_limit)
|
||||
BTL eager limit minimum: %d
|
||||
MCA parameter name: btl_%s_eager_limit
|
@ -1,903 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
|
||||
* Copyright (c) 2006-2008 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2009-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved
|
||||
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "opal/class/opal_bitmap.h"
|
||||
#include "opal/util/crc.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
#include "ompi/mca/pml/base/base.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "ompi/mca/pml/base/base.h"
|
||||
#include "ompi/mca/btl/base/base.h"
|
||||
#include "ompi/mca/bml/base/base.h"
|
||||
#include "ompi/runtime/ompi_cr.h"
|
||||
|
||||
#include "pml_csum.h"
|
||||
#include "pml_csum_component.h"
|
||||
#include "pml_csum_comm.h"
|
||||
#include "pml_csum_hdr.h"
|
||||
#include "pml_csum_recvfrag.h"
|
||||
#include "pml_csum_sendreq.h"
|
||||
#include "pml_csum_recvreq.h"
|
||||
#include "pml_csum_rdmafrag.h"
|
||||
|
||||
mca_pml_csum_t mca_pml_csum = {
|
||||
{
|
||||
mca_pml_csum_add_procs,
|
||||
mca_pml_csum_del_procs,
|
||||
mca_pml_csum_enable,
|
||||
mca_pml_csum_progress,
|
||||
mca_pml_csum_add_comm,
|
||||
mca_pml_csum_del_comm,
|
||||
mca_pml_csum_irecv_init,
|
||||
mca_pml_csum_irecv,
|
||||
mca_pml_csum_recv,
|
||||
mca_pml_csum_isend_init,
|
||||
mca_pml_csum_isend,
|
||||
mca_pml_csum_send,
|
||||
mca_pml_csum_iprobe,
|
||||
mca_pml_csum_probe,
|
||||
mca_pml_csum_start,
|
||||
mca_pml_csum_improbe,
|
||||
mca_pml_csum_mprobe,
|
||||
mca_pml_csum_imrecv,
|
||||
mca_pml_csum_mrecv,
|
||||
mca_pml_csum_dump,
|
||||
mca_pml_csum_ft_event,
|
||||
65535,
|
||||
INT_MAX
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void mca_pml_csum_error_handler( struct mca_btl_base_module_t* btl,
|
||||
int32_t flags, ompi_proc_t* errproc,
|
||||
char* btlinfo );
|
||||
|
||||
int mca_pml_csum_enable(bool enable)
|
||||
{
|
||||
if( false == enable ) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
OBJ_CONSTRUCT(&mca_pml_csum.lock, opal_mutex_t);
|
||||
|
||||
/* fragments */
|
||||
OBJ_CONSTRUCT(&mca_pml_csum.rdma_frags, ompi_free_list_t);
|
||||
ompi_free_list_init_new( &mca_pml_csum.rdma_frags,
|
||||
sizeof(mca_pml_csum_rdma_frag_t),
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(mca_pml_csum_rdma_frag_t),
|
||||
0,opal_cache_line_size,
|
||||
mca_pml_csum.free_list_num,
|
||||
mca_pml_csum.free_list_max,
|
||||
mca_pml_csum.free_list_inc,
|
||||
NULL );
|
||||
|
||||
OBJ_CONSTRUCT(&mca_pml_csum.recv_frags, ompi_free_list_t);
|
||||
|
||||
ompi_free_list_init_new( &mca_pml_csum.recv_frags,
|
||||
sizeof(mca_pml_csum_recv_frag_t) + mca_pml_csum.unexpected_limit,
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(mca_pml_csum_recv_frag_t),
|
||||
0,opal_cache_line_size,
|
||||
mca_pml_csum.free_list_num,
|
||||
mca_pml_csum.free_list_max,
|
||||
mca_pml_csum.free_list_inc,
|
||||
NULL );
|
||||
|
||||
OBJ_CONSTRUCT(&mca_pml_csum.pending_pckts, ompi_free_list_t);
|
||||
ompi_free_list_init_new( &mca_pml_csum.pending_pckts,
|
||||
sizeof(mca_pml_csum_pckt_pending_t),
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(mca_pml_csum_pckt_pending_t),
|
||||
0,opal_cache_line_size,
|
||||
mca_pml_csum.free_list_num,
|
||||
mca_pml_csum.free_list_max,
|
||||
mca_pml_csum.free_list_inc,
|
||||
NULL );
|
||||
|
||||
|
||||
OBJ_CONSTRUCT(&mca_pml_csum.buffers, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&mca_pml_csum.send_ranges, ompi_free_list_t);
|
||||
ompi_free_list_init_new( &mca_pml_csum.send_ranges,
|
||||
sizeof(mca_pml_csum_send_range_t) +
|
||||
(mca_pml_csum.max_send_per_range - 1) * sizeof(mca_pml_csum_com_btl_t),
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(mca_pml_csum_send_range_t),
|
||||
0,opal_cache_line_size,
|
||||
mca_pml_csum.free_list_num,
|
||||
mca_pml_csum.free_list_max,
|
||||
mca_pml_csum.free_list_inc,
|
||||
NULL );
|
||||
|
||||
/* pending operations */
|
||||
OBJ_CONSTRUCT(&mca_pml_csum.send_pending, opal_list_t);
|
||||
OBJ_CONSTRUCT(&mca_pml_csum.recv_pending, opal_list_t);
|
||||
OBJ_CONSTRUCT(&mca_pml_csum.pckt_pending, opal_list_t);
|
||||
OBJ_CONSTRUCT(&mca_pml_csum.rdma_pending, opal_list_t);
|
||||
/* missing communicator pending list */
|
||||
OBJ_CONSTRUCT(&mca_pml_csum.non_existing_communicator_pending, opal_list_t);
|
||||
|
||||
/**
|
||||
* If we get here this is the PML who get selected for the run. We
|
||||
* should get ownership for the send and receive requests list, and
|
||||
* initialize them with the size of our own requests.
|
||||
*/
|
||||
ompi_free_list_init_new( &mca_pml_base_send_requests,
|
||||
sizeof(mca_pml_csum_send_request_t) +
|
||||
(mca_pml_csum.max_rdma_per_request - 1) *
|
||||
sizeof(mca_pml_csum_com_btl_t),
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(mca_pml_csum_send_request_t),
|
||||
0,opal_cache_line_size,
|
||||
mca_pml_csum.free_list_num,
|
||||
mca_pml_csum.free_list_max,
|
||||
mca_pml_csum.free_list_inc,
|
||||
NULL );
|
||||
|
||||
ompi_free_list_init_new( &mca_pml_base_recv_requests,
|
||||
sizeof(mca_pml_csum_recv_request_t) +
|
||||
(mca_pml_csum.max_rdma_per_request - 1) *
|
||||
sizeof(mca_pml_csum_com_btl_t),
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(mca_pml_csum_recv_request_t),
|
||||
0,opal_cache_line_size,
|
||||
mca_pml_csum.free_list_num,
|
||||
mca_pml_csum.free_list_max,
|
||||
mca_pml_csum.free_list_inc,
|
||||
NULL );
|
||||
|
||||
mca_pml_csum.enabled = true;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_pml_csum_add_comm(ompi_communicator_t* comm)
|
||||
{
|
||||
/* allocate pml specific comm data */
|
||||
mca_pml_csum_comm_t* pml_comm = OBJ_NEW(mca_pml_csum_comm_t);
|
||||
opal_list_item_t *item, *next_item;
|
||||
mca_pml_csum_recv_frag_t* frag;
|
||||
mca_pml_csum_comm_proc_t* pml_proc;
|
||||
mca_pml_csum_match_hdr_t* hdr;
|
||||
int i;
|
||||
|
||||
if (NULL == pml_comm) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* should never happen, but it was, so check */
|
||||
if (comm->c_contextid > mca_pml_csum.super.pml_max_contextid) {
|
||||
OBJ_RELEASE(pml_comm);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
mca_pml_csum_comm_init_size(pml_comm, comm->c_remote_group->grp_proc_count);
|
||||
comm->c_pml_comm = pml_comm;
|
||||
|
||||
for( i = 0; i < comm->c_remote_group->grp_proc_count; i++ ) {
|
||||
pml_comm->procs[i].ompi_proc = ompi_group_peer_lookup(comm->c_remote_group,i);
|
||||
OBJ_RETAIN(pml_comm->procs[i].ompi_proc);
|
||||
}
|
||||
/* Grab all related messages from the non_existing_communicator pending queue */
|
||||
for( item = opal_list_get_first(&mca_pml_csum.non_existing_communicator_pending);
|
||||
item != opal_list_get_end(&mca_pml_csum.non_existing_communicator_pending);
|
||||
item = next_item ) {
|
||||
frag = (mca_pml_csum_recv_frag_t*)item;
|
||||
next_item = opal_list_get_next(item);
|
||||
hdr = &frag->hdr.hdr_match;
|
||||
|
||||
/* Is this fragment for the current communicator ? */
|
||||
if( frag->hdr.hdr_match.hdr_ctx != comm->c_contextid )
|
||||
continue;
|
||||
|
||||
/* As we now know we work on a fragment for this communicator
|
||||
* we should remove it from the
|
||||
* non_existing_communicator_pending list. */
|
||||
opal_list_remove_item( &mca_pml_csum.non_existing_communicator_pending,
|
||||
item );
|
||||
|
||||
add_fragment_to_unexpected:
|
||||
|
||||
/* We generate the MSG_ARRIVED event as soon as the PML is aware
|
||||
* of a matching fragment arrival. Independing if it is received
|
||||
* on the correct order or not. This will allow the tools to
|
||||
* figure out if the messages are not received in the correct
|
||||
* order (if multiple network interfaces).
|
||||
*/
|
||||
PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm,
|
||||
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
|
||||
|
||||
/* There is no matching to be done, and no lock to be held on the communicator as
|
||||
* we know at this point that the communicator has not yet been returned to the user.
|
||||
* The only required protection is around the non_existing_communicator_pending queue.
|
||||
* We just have to push the fragment into the unexpected list of the corresponding
|
||||
* proc, or into the out-of-order (cant_match) list.
|
||||
*/
|
||||
pml_proc = &(pml_comm->procs[hdr->hdr_src]);
|
||||
|
||||
if( ((uint16_t)hdr->hdr_seq) == ((uint16_t)pml_proc->expected_sequence) ) {
|
||||
/* We're now expecting the next sequence number. */
|
||||
pml_proc->expected_sequence++;
|
||||
opal_list_append( &pml_proc->unexpected_frags, (opal_list_item_t*)frag );
|
||||
PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm,
|
||||
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
|
||||
/* And now the ugly part. As some fragments can be inserted in the cant_match list,
|
||||
* every time we succesfully add a fragment in the unexpected list we have to make
|
||||
* sure the next one is not in the cant_match. Otherwise, we will endup in a deadlock
|
||||
* situation as the cant_match is only checked when a new fragment is received from
|
||||
* the network.
|
||||
*/
|
||||
for(frag = (mca_pml_csum_recv_frag_t *)opal_list_get_first(&pml_proc->frags_cant_match);
|
||||
frag != (mca_pml_csum_recv_frag_t *)opal_list_get_end(&pml_proc->frags_cant_match);
|
||||
frag = (mca_pml_csum_recv_frag_t *)opal_list_get_next(frag)) {
|
||||
hdr = &frag->hdr.hdr_match;
|
||||
/* If the message has the next expected seq from that proc... */
|
||||
if(hdr->hdr_seq != pml_proc->expected_sequence)
|
||||
continue;
|
||||
|
||||
opal_list_remove_item(&pml_proc->frags_cant_match, (opal_list_item_t*)frag);
|
||||
goto add_fragment_to_unexpected;
|
||||
}
|
||||
} else {
|
||||
opal_list_append( &pml_proc->frags_cant_match, (opal_list_item_t*)frag );
|
||||
}
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_pml_csum_del_comm(ompi_communicator_t* comm)
|
||||
{
|
||||
mca_pml_csum_comm_t* pml_comm = comm->c_pml_comm;
|
||||
int i;
|
||||
|
||||
for( i = 0; i < comm->c_remote_group->grp_proc_count; i++ ) {
|
||||
OBJ_RELEASE(pml_comm->procs[i].ompi_proc);
|
||||
}
|
||||
OBJ_RELEASE(comm->c_pml_comm);
|
||||
comm->c_pml_comm = NULL;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* For each proc setup a datastructure that indicates the BTLs
|
||||
* that can be used to reach the destination.
|
||||
*
|
||||
*/
|
||||
|
||||
int mca_pml_csum_add_procs(ompi_proc_t** procs, size_t nprocs)
|
||||
{
|
||||
opal_bitmap_t reachable;
|
||||
int rc;
|
||||
size_t i;
|
||||
opal_list_item_t *item;
|
||||
opal_convertor_t *local_convertor;
|
||||
|
||||
if(nprocs == 0)
|
||||
return OMPI_SUCCESS;
|
||||
/* Create a convertor for processes on the same node &
|
||||
disable checksum computation for local communication */
|
||||
local_convertor = opal_convertor_create(ompi_proc_local()->proc_arch, 0);
|
||||
local_convertor->flags &= ~CONVERTOR_WITH_CHECKSUM;
|
||||
|
||||
for (i = 0 ; i < nprocs ; ++i) {
|
||||
/* we don't have any endpoint data we need to cache on the
|
||||
ompi_proc_t, so set proc_pml to NULL */
|
||||
procs[i]->proc_pml = NULL;
|
||||
/* if the proc isn't local, tell the convertor to
|
||||
* checksum the data
|
||||
*/
|
||||
if (!OPAL_PROC_ON_LOCAL_NODE(procs[i]->proc_flags)) {
|
||||
procs[i]->proc_convertor->flags |= CONVERTOR_WITH_CHECKSUM;
|
||||
} else {
|
||||
OBJ_RELEASE(procs[i]->proc_convertor);
|
||||
procs[i]->proc_convertor = local_convertor;
|
||||
OBJ_RETAIN(local_convertor);
|
||||
}
|
||||
}
|
||||
/* Decrement reference count by one, as we increment it twice for ourselves */
|
||||
OBJ_RELEASE(local_convertor);
|
||||
|
||||
OBJ_CONSTRUCT(&reachable, opal_bitmap_t);
|
||||
rc = opal_bitmap_init(&reachable, (int)nprocs);
|
||||
if(OMPI_SUCCESS != rc)
|
||||
return rc;
|
||||
|
||||
/*
|
||||
* JJH: Disable this in FT enabled builds since
|
||||
* we use a wrapper PML. It will cause this check to
|
||||
* return failure as all processes will return the wrapper PML
|
||||
* component in use instead of the wrapped PML component underneath.
|
||||
*/
|
||||
#if OPAL_ENABLE_FT_CR == 0
|
||||
/* make sure remote procs are using the same PML as us */
|
||||
if (OMPI_SUCCESS != (rc = mca_pml_base_pml_check_selected("csum",
|
||||
procs,
|
||||
nprocs))) {
|
||||
return rc;
|
||||
}
|
||||
#endif
|
||||
|
||||
rc = mca_bml.bml_add_procs( nprocs,
|
||||
procs,
|
||||
&reachable );
|
||||
if(OMPI_SUCCESS != rc)
|
||||
goto cleanup_and_return;
|
||||
|
||||
/* Check that values supplied by all initialized btls will work
|
||||
for us. Note that this is the list of all initialized BTLs,
|
||||
not the ones used for the just added procs. This is a little
|
||||
overkill and inaccurate, as we may end up not using the BTL in
|
||||
question and all add_procs calls after the first one are
|
||||
duplicating an already completed check. But the final
|
||||
initialization of the PML occurs before the final
|
||||
initialization of the BTLs, and iterating through the in-use
|
||||
BTLs requires iterating over the procs, as the BML does not
|
||||
expose all currently in use btls. */
|
||||
|
||||
for (item = opal_list_get_first(&mca_btl_base_modules_initialized) ;
|
||||
item != opal_list_get_end(&mca_btl_base_modules_initialized) ;
|
||||
item = opal_list_get_next(item)) {
|
||||
mca_btl_base_selected_module_t *sm =
|
||||
(mca_btl_base_selected_module_t*) item;
|
||||
if (sm->btl_module->btl_eager_limit < sizeof(mca_pml_csum_hdr_t)) {
|
||||
opal_show_help("help-mpi-pml-csum.txt", "eager_limit_too_small",
|
||||
true,
|
||||
sm->btl_component->btl_version.mca_component_name,
|
||||
ompi_process_info.nodename,
|
||||
sm->btl_component->btl_version.mca_component_name,
|
||||
sm->btl_module->btl_eager_limit,
|
||||
sm->btl_component->btl_version.mca_component_name,
|
||||
sizeof(mca_pml_csum_hdr_t),
|
||||
sm->btl_component->btl_version.mca_component_name);
|
||||
rc = OMPI_ERR_BAD_PARAM;
|
||||
goto cleanup_and_return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* TODO: Move these callback registration to another place */
|
||||
rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_MATCH,
|
||||
mca_pml_csum_recv_frag_callback_match,
|
||||
NULL );
|
||||
if(OMPI_SUCCESS != rc)
|
||||
goto cleanup_and_return;
|
||||
|
||||
rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_RNDV,
|
||||
mca_pml_csum_recv_frag_callback_rndv,
|
||||
NULL );
|
||||
if(OMPI_SUCCESS != rc)
|
||||
goto cleanup_and_return;
|
||||
|
||||
rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_RGET,
|
||||
mca_pml_csum_recv_frag_callback_rget,
|
||||
NULL );
|
||||
if(OMPI_SUCCESS != rc)
|
||||
goto cleanup_and_return;
|
||||
|
||||
rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_ACK,
|
||||
mca_pml_csum_recv_frag_callback_ack,
|
||||
NULL );
|
||||
if(OMPI_SUCCESS != rc)
|
||||
goto cleanup_and_return;
|
||||
|
||||
rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_FRAG,
|
||||
mca_pml_csum_recv_frag_callback_frag,
|
||||
NULL );
|
||||
if(OMPI_SUCCESS != rc)
|
||||
goto cleanup_and_return;
|
||||
|
||||
rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_PUT,
|
||||
mca_pml_csum_recv_frag_callback_put,
|
||||
NULL );
|
||||
if(OMPI_SUCCESS != rc)
|
||||
goto cleanup_and_return;
|
||||
|
||||
rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_FIN,
|
||||
mca_pml_csum_recv_frag_callback_fin,
|
||||
NULL );
|
||||
if(OMPI_SUCCESS != rc)
|
||||
goto cleanup_and_return;
|
||||
|
||||
/* register error handlers */
|
||||
rc = mca_bml.bml_register_error(mca_pml_csum_error_handler);
|
||||
if(OMPI_SUCCESS != rc)
|
||||
goto cleanup_and_return;
|
||||
|
||||
cleanup_and_return:
|
||||
OBJ_DESTRUCT(&reachable);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* iterate through each proc and notify any PTLs associated
|
||||
* with the proc that it is/has gone away
|
||||
*/
|
||||
|
||||
int mca_pml_csum_del_procs(ompi_proc_t** procs, size_t nprocs)
|
||||
{
|
||||
return mca_bml.bml_del_procs(nprocs, procs);
|
||||
}
|
||||
|
||||
/*
|
||||
* diagnostics
|
||||
*/
|
||||
|
||||
int mca_pml_csum_dump(struct ompi_communicator_t* comm, int verbose)
|
||||
{
|
||||
struct mca_pml_comm_t* pml_comm = comm->c_pml_comm;
|
||||
int i;
|
||||
|
||||
/* iterate through all procs on communicator */
|
||||
for( i = 0; i < (int)pml_comm->num_procs; i++ ) {
|
||||
mca_pml_csum_comm_proc_t* proc = &pml_comm->procs[i];
|
||||
mca_bml_base_endpoint_t* ep = (mca_bml_base_endpoint_t*)proc->ompi_proc->proc_bml;
|
||||
size_t n;
|
||||
|
||||
opal_output(0, "[Rank %d]\n", i);
|
||||
/* dump all receive queues */
|
||||
|
||||
/* dump all btls */
|
||||
for(n=0; n<ep->btl_eager.arr_size; n++) {
|
||||
mca_bml_base_btl_t* bml_btl = &ep->btl_eager.bml_btls[n];
|
||||
bml_btl->btl->btl_dump(bml_btl->btl, bml_btl->btl_endpoint, verbose);
|
||||
}
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static void mca_pml_csum_fin_completion( mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* ep,
|
||||
struct mca_btl_base_descriptor_t* des,
|
||||
int status )
|
||||
{
|
||||
|
||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
|
||||
|
||||
/* check for pending requests */
|
||||
MCA_PML_CSUM_PROGRESS_PENDING(bml_btl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Send an FIN to the peer. If we fail to send this ack (no more available
|
||||
* fragments or the send failed) this function automatically add the FIN
|
||||
* to the list of pending FIN, Which guarantee that the FIN will be sent
|
||||
* later.
|
||||
*/
|
||||
int mca_pml_csum_send_fin( ompi_proc_t* proc,
|
||||
mca_bml_base_btl_t* bml_btl,
|
||||
ompi_ptr_t hdr_des,
|
||||
uint8_t order,
|
||||
uint32_t status )
|
||||
{
|
||||
mca_btl_base_descriptor_t* fin;
|
||||
mca_pml_csum_fin_hdr_t* hdr;
|
||||
int rc;
|
||||
|
||||
mca_bml_base_alloc(bml_btl, &fin, order, sizeof(mca_pml_csum_fin_hdr_t),
|
||||
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
|
||||
|
||||
if(NULL == fin) {
|
||||
MCA_PML_CSUM_ADD_FIN_TO_PENDING(proc, hdr_des, bml_btl, order, status);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
fin->des_cbfunc = mca_pml_csum_fin_completion;
|
||||
fin->des_cbdata = NULL;
|
||||
|
||||
/* fill in header */
|
||||
hdr = (mca_pml_csum_fin_hdr_t*)fin->des_src->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_flags = 0;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_FIN;
|
||||
hdr->hdr_common.hdr_csum = 0;
|
||||
hdr->hdr_des = hdr_des;
|
||||
hdr->hdr_fail = status;
|
||||
|
||||
hdr->hdr_common.hdr_csum = opal_csum16(hdr, sizeof(mca_pml_csum_fin_hdr_t));
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
|
||||
"%s: Sending \'FIN\' with header csum:0x%04x\n",
|
||||
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), hdr->hdr_common.hdr_csum));
|
||||
|
||||
csum_hdr_hton(hdr, MCA_PML_CSUM_HDR_TYPE_FIN, proc);
|
||||
|
||||
/* queue request */
|
||||
rc = mca_bml_base_send( bml_btl,
|
||||
fin,
|
||||
MCA_PML_CSUM_HDR_TYPE_FIN );
|
||||
if( OPAL_LIKELY( rc >= 0 ) ) {
|
||||
if( OPAL_LIKELY( 1 == rc ) ) {
|
||||
MCA_PML_CSUM_PROGRESS_PENDING(bml_btl);
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
mca_bml_base_free(bml_btl, fin);
|
||||
MCA_PML_CSUM_ADD_FIN_TO_PENDING(proc, hdr_des, bml_btl, order, status);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
void mca_pml_csum_process_pending_packets(mca_bml_base_btl_t* bml_btl)
|
||||
{
|
||||
mca_pml_csum_pckt_pending_t *pckt;
|
||||
int32_t i, rc, s = (int32_t)opal_list_get_size(&mca_pml_csum.pckt_pending);
|
||||
|
||||
for(i = 0; i < s; i++) {
|
||||
mca_bml_base_btl_t *send_dst = NULL;
|
||||
OPAL_THREAD_LOCK(&mca_pml_csum.lock);
|
||||
pckt = (mca_pml_csum_pckt_pending_t*)
|
||||
opal_list_remove_first(&mca_pml_csum.pckt_pending);
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_csum.lock);
|
||||
if(NULL == pckt)
|
||||
break;
|
||||
if(pckt->bml_btl != NULL &&
|
||||
pckt->bml_btl->btl == bml_btl->btl) {
|
||||
send_dst = pckt->bml_btl;
|
||||
} else {
|
||||
send_dst = mca_bml_base_btl_array_find(
|
||||
&pckt->proc->proc_bml->btl_eager, bml_btl->btl);
|
||||
}
|
||||
if(NULL == send_dst) {
|
||||
OPAL_THREAD_LOCK(&mca_pml_csum.lock);
|
||||
opal_list_append(&mca_pml_csum.pckt_pending,
|
||||
(opal_list_item_t*)pckt);
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_csum.lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
switch(pckt->hdr.hdr_common.hdr_type) {
|
||||
case MCA_PML_CSUM_HDR_TYPE_ACK:
|
||||
rc = mca_pml_csum_recv_request_ack_send_btl(pckt->proc,
|
||||
send_dst,
|
||||
pckt->hdr.hdr_ack.hdr_src_req.lval,
|
||||
pckt->hdr.hdr_ack.hdr_dst_req.pval,
|
||||
pckt->hdr.hdr_ack.hdr_send_offset,
|
||||
pckt->hdr.hdr_common.hdr_flags & MCA_PML_CSUM_HDR_FLAGS_NORDMA);
|
||||
if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) {
|
||||
OPAL_THREAD_LOCK(&mca_pml_csum.lock);
|
||||
opal_list_append(&mca_pml_csum.pckt_pending,
|
||||
(opal_list_item_t*)pckt);
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_csum.lock);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case MCA_PML_CSUM_HDR_TYPE_FIN:
|
||||
rc = mca_pml_csum_send_fin(pckt->proc, send_dst,
|
||||
pckt->hdr.hdr_fin.hdr_des,
|
||||
pckt->order,
|
||||
pckt->hdr.hdr_fin.hdr_fail);
|
||||
if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) {
|
||||
return;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
opal_output(0, "[%s:%d] wrong header type\n",
|
||||
__FILE__, __LINE__);
|
||||
break;
|
||||
}
|
||||
/* We're done with this packet, return it back to the free list */
|
||||
MCA_PML_CSUM_PCKT_PENDING_RETURN(pckt);
|
||||
}
|
||||
}
|
||||
|
||||
void mca_pml_csum_process_pending_rdma(void)
|
||||
{
|
||||
mca_pml_csum_rdma_frag_t* frag;
|
||||
int32_t i, rc, s = (int32_t)opal_list_get_size(&mca_pml_csum.rdma_pending);
|
||||
|
||||
for(i = 0; i < s; i++) {
|
||||
OPAL_THREAD_LOCK(&mca_pml_csum.lock);
|
||||
frag = (mca_pml_csum_rdma_frag_t*)
|
||||
opal_list_remove_first(&mca_pml_csum.rdma_pending);
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_csum.lock);
|
||||
if(NULL == frag)
|
||||
break;
|
||||
if(frag->rdma_state == MCA_PML_CSUM_RDMA_PUT) {
|
||||
frag->retries++;
|
||||
rc = mca_pml_csum_send_request_put_frag(frag);
|
||||
} else {
|
||||
rc = mca_pml_csum_recv_request_get_frag(frag);
|
||||
}
|
||||
if(OMPI_ERR_OUT_OF_RESOURCE == rc)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void mca_pml_csum_error_handler(
|
||||
struct mca_btl_base_module_t* btl, int32_t flags,
|
||||
ompi_proc_t* errproc, char* btlinfo ) {
|
||||
ompi_rte_abort(-1, NULL);
|
||||
}
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 0
|
||||
int mca_pml_csum_ft_event( int state ) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
#else
|
||||
int mca_pml_csum_ft_event( int state )
|
||||
{
|
||||
static bool first_continue_pass = false;
|
||||
ompi_proc_t** procs = NULL;
|
||||
size_t num_procs;
|
||||
int ret, p;
|
||||
ompi_rte_collective_t *coll, *modex;
|
||||
|
||||
coll = OBJ_NEW(ompi_rte_collective_t);
|
||||
coll->id = ompi_process_info.peer_init_barrier;
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
if( opal_cr_timing_barrier_enabled ) {
|
||||
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCPBR1);
|
||||
ompi_rte_barrier(coll);
|
||||
ORTE_WAIT_FOR_COMPLETION(coll->active);
|
||||
}
|
||||
|
||||
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P0);
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
first_continue_pass = !first_continue_pass;
|
||||
|
||||
if( !first_continue_pass ) {
|
||||
if( opal_cr_timing_barrier_enabled ) {
|
||||
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_COREBR0);
|
||||
ompi_rte_barrier(coll);
|
||||
ORTE_WAIT_FOR_COMPLETION(coll->active);
|
||||
}
|
||||
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P2);
|
||||
}
|
||||
|
||||
if( orte_cr_continue_like_restart && !first_continue_pass ) {
|
||||
/*
|
||||
* Get a list of processes
|
||||
*/
|
||||
procs = ompi_proc_all(&num_procs);
|
||||
if(NULL == procs) {
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto clean;
|
||||
}
|
||||
|
||||
/*
|
||||
* Refresh the proc structure, and publish our proc info in the modex.
|
||||
* NOTE: Do *not* call ompi_proc_finalize as there are many places in
|
||||
* the code that point to indv. procs in this strucutre. For our
|
||||
* needs here we only need to fix up the modex, bml and pml
|
||||
* references.
|
||||
*/
|
||||
if (OMPI_SUCCESS != (ret = ompi_proc_refresh())) {
|
||||
opal_output(0,
|
||||
"pml:csum: ft_event(Restart): proc_refresh Failed %d",
|
||||
ret);
|
||||
for(p = 0; p < (int)num_procs; ++p) {
|
||||
OBJ_RELEASE(procs[p]);
|
||||
}
|
||||
free (procs);
|
||||
goto clean;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(OPAL_CRS_RESTART_PRE == state ) {
|
||||
/* Nothing here */
|
||||
}
|
||||
else if(OPAL_CRS_RESTART == state ) {
|
||||
/*
|
||||
* Get a list of processes
|
||||
*/
|
||||
procs = ompi_proc_all(&num_procs);
|
||||
if(NULL == procs) {
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto clean;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clean out the modex information since it is invalid now.
|
||||
* ompi_rte_purge_proc_attrs();
|
||||
* This happens at the ORTE level, so doing it again here will cause
|
||||
* some issues with socket caching.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* Refresh the proc structure, and publish our proc info in the modex.
|
||||
* NOTE: Do *not* call ompi_proc_finalize as there are many places in
|
||||
* the code that point to indv. procs in this strucutre. For our
|
||||
* needs here we only need to fix up the modex, bml and pml
|
||||
* references.
|
||||
*/
|
||||
if (OMPI_SUCCESS != (ret = ompi_proc_refresh())) {
|
||||
opal_output(0,
|
||||
"pml:csum: ft_event(Restart): proc_refresh Failed %d",
|
||||
ret);
|
||||
for(p = 0; p < (int)num_procs; ++p) {
|
||||
OBJ_RELEASE(procs[p]);
|
||||
}
|
||||
free (procs);
|
||||
goto clean;
|
||||
}
|
||||
}
|
||||
else if(OPAL_CRS_TERM == state ) {
|
||||
;
|
||||
}
|
||||
else {
|
||||
;
|
||||
}
|
||||
|
||||
/* Call the BML
|
||||
* BML is expected to call ft_event in
|
||||
* - BTL(s)
|
||||
* - MPool(s)
|
||||
*/
|
||||
if( OMPI_SUCCESS != (ret = mca_bml.bml_ft_event(state))) {
|
||||
opal_output(0, "pml:base: ft_event: BML ft_event function failed: %d\n",
|
||||
ret);
|
||||
}
|
||||
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P1);
|
||||
|
||||
if( opal_cr_timing_barrier_enabled ) {
|
||||
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR0);
|
||||
/* JJH Cannot barrier here due to progress engine -- ompi_rte_barrier();*/
|
||||
}
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
if( !first_continue_pass ) {
|
||||
if( opal_cr_timing_barrier_enabled ) {
|
||||
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR1);
|
||||
ompi_rte_barrier(coll);
|
||||
ORTE_WAIT_FOR_COMPLETION(coll->active);
|
||||
}
|
||||
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P3);
|
||||
}
|
||||
|
||||
if( orte_cr_continue_like_restart && !first_continue_pass ) {
|
||||
/*
|
||||
* Exchange the modex information once again.
|
||||
* BTLs will have republished their modex information.
|
||||
*/
|
||||
modex = OBJ_NEW(ompi_rte_collective_t);
|
||||
modex->id = ompi_process_info.peer_modex;
|
||||
if (OMPI_SUCCESS != (ret = orte_grpcomm.modex(modex))) {
|
||||
opal_output(0,
|
||||
"pml:csum: ft_event(Restart): Failed orte_grpcomm.modex() = %d",
|
||||
ret);
|
||||
OBJ_RELEASE(modex);
|
||||
goto clean;
|
||||
}
|
||||
ORTE_WAIT_FOR_COMPLETION(modex->active);
|
||||
OBJ_RELEASE(modex);
|
||||
|
||||
/*
|
||||
* Startup the PML stack now that the modex is running again
|
||||
* Add the new procs (BTLs redo modex recv's)
|
||||
*/
|
||||
if( OMPI_SUCCESS != (ret = mca_pml_csum_add_procs(procs, num_procs) ) ) {
|
||||
opal_output(0, "pml:csum: ft_event(Restart): Failed in add_procs (%d)", ret);
|
||||
goto clean;
|
||||
}
|
||||
|
||||
/* Is this barrier necessary ? JJH */
|
||||
if (OMPI_SUCCESS != (ret = ompi_rte_barrier(coll))) {
|
||||
opal_output(0, "pml:csum: ft_event(Restart): Failed in ompi_rte_barrier (%d)", ret);
|
||||
goto clean;
|
||||
}
|
||||
ORTE_WAIT_FOR_COMPLETION(coll->active);
|
||||
|
||||
if( NULL != procs ) {
|
||||
for(p = 0; p < (int)num_procs; ++p) {
|
||||
OBJ_RELEASE(procs[p]);
|
||||
}
|
||||
free(procs);
|
||||
procs = NULL;
|
||||
}
|
||||
}
|
||||
if( !first_continue_pass ) {
|
||||
if( opal_cr_timing_barrier_enabled ) {
|
||||
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR2);
|
||||
ompi_rte_barrier(coll);
|
||||
ORTE_WAIT_FOR_COMPLETION(coll->active);
|
||||
}
|
||||
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCP1);
|
||||
}
|
||||
}
|
||||
else if(OPAL_CRS_RESTART_PRE == state ) {
|
||||
/* Nothing here */
|
||||
}
|
||||
else if(OPAL_CRS_RESTART == state ) {
|
||||
/*
|
||||
* Exchange the modex information once again.
|
||||
* BTLs will have republished their modex information.
|
||||
*/
|
||||
modex = OBJ_NEW(ompi_rte_collective_t);
|
||||
modex->id = ompi_process_info.peer_modex;
|
||||
if (OMPI_SUCCESS != (ret = orte_grpcomm.modex(NULL))) {
|
||||
opal_output(0,
|
||||
"pml:csum: ft_event(Restart): Failed orte_grpcomm.modex() = %d",
|
||||
ret);
|
||||
OBJ_RELEASE(modex);
|
||||
goto clean;
|
||||
}
|
||||
ORTE_WAIT_FOR_COMPLETION(modex->active);
|
||||
OBJ_RELEASE(modex);
|
||||
|
||||
/*
|
||||
* Startup the PML stack now that the modex is running again
|
||||
* Add the new procs (BTLs redo modex recv's)
|
||||
*/
|
||||
if( OMPI_SUCCESS != (ret = mca_pml_csum_add_procs(procs, num_procs) ) ) {
|
||||
opal_output(0, "pml:csum: ft_event(Restart): Failed in add_procs (%d)", ret);
|
||||
goto clean;
|
||||
}
|
||||
|
||||
/* Is this barrier necessary ? JJH */
|
||||
if (OMPI_SUCCESS != (ret = ompi_rte_barrier(coll))) {
|
||||
opal_output(0, "pml:csum: ft_event(Restart): Failed in ompi_rte_barrier (%d)", ret);
|
||||
goto clean;
|
||||
}
|
||||
ORTE_WAIT_FOR_COMPLETION(coll->active);
|
||||
|
||||
if( NULL != procs ) {
|
||||
for(p = 0; p < (int)num_procs; ++p) {
|
||||
OBJ_RELEASE(procs[p]);
|
||||
}
|
||||
free(procs);
|
||||
procs = NULL;
|
||||
}
|
||||
}
|
||||
else if(OPAL_CRS_TERM == state ) {
|
||||
;
|
||||
}
|
||||
else {
|
||||
;
|
||||
}
|
||||
|
||||
ret = OMPI_SUCCESS;
|
||||
|
||||
clean:
|
||||
OBJ_RELEASE(coll);
|
||||
return ret;
|
||||
}
|
||||
#endif /* OPAL_ENABLE_FT_CR */
|
||||
|
||||
int mca_pml_csum_com_btl_comp(const void *v1, const void *v2)
|
||||
{
|
||||
const mca_pml_csum_com_btl_t *b1 = (const mca_pml_csum_com_btl_t *) v1;
|
||||
const mca_pml_csum_com_btl_t *b2 = (const mca_pml_csum_com_btl_t *) v2;
|
||||
|
||||
if(b1->bml_btl->btl_weight < b2->bml_btl->btl_weight)
|
||||
return 1;
|
||||
if(b1->bml_btl->btl_weight > b2->bml_btl->btl_weight)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1,361 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2007 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2009-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*/
|
||||
|
||||
#ifndef MCA_PML_CSUM_H
|
||||
#define MCA_PML_CSUM_H
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "ompi/class/ompi_free_list.h"
|
||||
#include "ompi/request/request.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
#include "ompi/mca/pml/base/pml_base_request.h"
|
||||
#include "ompi/mca/pml/base/pml_base_bsend.h"
|
||||
#include "ompi/mca/pml/base/pml_base_sendreq.h"
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
#include "pml_csum_hdr.h"
|
||||
#include "ompi/mca/bml/base/base.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "ompi/mca/allocator/base/base.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/**
|
||||
* CSUM PML module
|
||||
*/
|
||||
|
||||
struct mca_pml_csum_t {
|
||||
mca_pml_base_module_t super;
|
||||
|
||||
int priority;
|
||||
int free_list_num; /* initial size of free list */
|
||||
int free_list_max; /* maximum size of free list */
|
||||
int free_list_inc; /* number of elements to grow free list */
|
||||
size_t send_pipeline_depth;
|
||||
size_t recv_pipeline_depth;
|
||||
size_t rdma_put_retries_limit;
|
||||
int max_rdma_per_request;
|
||||
int max_send_per_range;
|
||||
bool leave_pinned;
|
||||
int leave_pinned_pipeline;
|
||||
|
||||
/* lock queue access */
|
||||
opal_mutex_t lock;
|
||||
|
||||
/* free lists */
|
||||
ompi_free_list_t rdma_frags;
|
||||
ompi_free_list_t recv_frags;
|
||||
ompi_free_list_t pending_pckts;
|
||||
ompi_free_list_t buffers;
|
||||
ompi_free_list_t send_ranges;
|
||||
|
||||
/* list of pending operations */
|
||||
opal_list_t pckt_pending;
|
||||
opal_list_t send_pending;
|
||||
opal_list_t recv_pending;
|
||||
opal_list_t rdma_pending;
|
||||
/* List of pending fragments without a matching communicator */
|
||||
opal_list_t non_existing_communicator_pending;
|
||||
bool enabled;
|
||||
char* allocator_name;
|
||||
mca_allocator_base_module_t* allocator;
|
||||
uint32_t unexpected_limit;
|
||||
};
|
||||
typedef struct mca_pml_csum_t mca_pml_csum_t;
|
||||
|
||||
extern mca_pml_csum_t mca_pml_csum;
|
||||
extern int mca_pml_csum_output;
|
||||
|
||||
/*
|
||||
* PML interface functions.
|
||||
*/
|
||||
|
||||
extern int mca_pml_csum_add_comm(
|
||||
struct ompi_communicator_t* comm
|
||||
);
|
||||
|
||||
extern int mca_pml_csum_del_comm(
|
||||
struct ompi_communicator_t* comm
|
||||
);
|
||||
|
||||
extern int mca_pml_csum_add_procs(
|
||||
struct ompi_proc_t **procs,
|
||||
size_t nprocs
|
||||
);
|
||||
|
||||
extern int mca_pml_csum_del_procs(
|
||||
struct ompi_proc_t **procs,
|
||||
size_t nprocs
|
||||
);
|
||||
|
||||
extern int mca_pml_csum_enable( bool enable );
|
||||
|
||||
extern int mca_pml_csum_progress(void);
|
||||
|
||||
extern int mca_pml_csum_iprobe( int dst,
|
||||
int tag,
|
||||
struct ompi_communicator_t* comm,
|
||||
int *matched,
|
||||
ompi_status_public_t* status );
|
||||
|
||||
extern int mca_pml_csum_probe( int dst,
|
||||
int tag,
|
||||
struct ompi_communicator_t* comm,
|
||||
ompi_status_public_t* status );
|
||||
|
||||
extern int mca_pml_csum_improbe( int dst,
|
||||
int tag,
|
||||
struct ompi_communicator_t* comm,
|
||||
int *matched,
|
||||
struct ompi_message_t **message,
|
||||
ompi_status_public_t* status );
|
||||
|
||||
extern int mca_pml_csum_mprobe( int dst,
|
||||
int tag,
|
||||
struct ompi_communicator_t* comm,
|
||||
struct ompi_message_t **message,
|
||||
ompi_status_public_t* status );
|
||||
|
||||
extern int mca_pml_csum_isend_init( void *buf,
|
||||
size_t count,
|
||||
ompi_datatype_t *datatype,
|
||||
int dst,
|
||||
int tag,
|
||||
mca_pml_base_send_mode_t mode,
|
||||
struct ompi_communicator_t* comm,
|
||||
struct ompi_request_t **request );
|
||||
|
||||
extern int mca_pml_csum_isend( void *buf,
|
||||
size_t count,
|
||||
ompi_datatype_t *datatype,
|
||||
int dst,
|
||||
int tag,
|
||||
mca_pml_base_send_mode_t mode,
|
||||
struct ompi_communicator_t* comm,
|
||||
struct ompi_request_t **request );
|
||||
|
||||
extern int mca_pml_csum_send( void *buf,
|
||||
size_t count,
|
||||
ompi_datatype_t *datatype,
|
||||
int dst,
|
||||
int tag,
|
||||
mca_pml_base_send_mode_t mode,
|
||||
struct ompi_communicator_t* comm );
|
||||
|
||||
extern int mca_pml_csum_irecv_init( void *buf,
|
||||
size_t count,
|
||||
ompi_datatype_t *datatype,
|
||||
int src,
|
||||
int tag,
|
||||
struct ompi_communicator_t* comm,
|
||||
struct ompi_request_t **request );
|
||||
|
||||
extern int mca_pml_csum_irecv( void *buf,
|
||||
size_t count,
|
||||
ompi_datatype_t *datatype,
|
||||
int src,
|
||||
int tag,
|
||||
struct ompi_communicator_t* comm,
|
||||
struct ompi_request_t **request );
|
||||
|
||||
extern int mca_pml_csum_recv( void *buf,
|
||||
size_t count,
|
||||
ompi_datatype_t *datatype,
|
||||
int src,
|
||||
int tag,
|
||||
struct ompi_communicator_t* comm,
|
||||
ompi_status_public_t* status );
|
||||
|
||||
extern int mca_pml_csum_imrecv( void *buf,
|
||||
size_t count,
|
||||
ompi_datatype_t *datatype,
|
||||
struct ompi_message_t **message,
|
||||
struct ompi_request_t **request );
|
||||
|
||||
extern int mca_pml_csum_mrecv( void *buf,
|
||||
size_t count,
|
||||
ompi_datatype_t *datatype,
|
||||
struct ompi_message_t **message,
|
||||
ompi_status_public_t* status );
|
||||
|
||||
extern int mca_pml_csum_dump( struct ompi_communicator_t* comm,
|
||||
int verbose );
|
||||
|
||||
extern int mca_pml_csum_start( size_t count,
|
||||
ompi_request_t** requests );
|
||||
|
||||
extern int mca_pml_csum_ft_event( int state );
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
struct mca_pml_csum_pckt_pending_t {
|
||||
ompi_free_list_item_t super;
|
||||
ompi_proc_t* proc;
|
||||
mca_pml_csum_hdr_t hdr;
|
||||
struct mca_bml_base_btl_t *bml_btl;
|
||||
uint8_t order;
|
||||
};
|
||||
typedef struct mca_pml_csum_pckt_pending_t mca_pml_csum_pckt_pending_t;
|
||||
OBJ_CLASS_DECLARATION(mca_pml_csum_pckt_pending_t);
|
||||
|
||||
#define MCA_PML_CSUM_PCKT_PENDING_ALLOC(pckt,rc) \
|
||||
do { \
|
||||
ompi_free_list_item_t* item; \
|
||||
OMPI_FREE_LIST_WAIT(&mca_pml_csum.pending_pckts, item, rc); \
|
||||
pckt = (mca_pml_csum_pckt_pending_t*)item; \
|
||||
} while (0)
|
||||
|
||||
#define MCA_PML_CSUM_PCKT_PENDING_RETURN(pckt) \
|
||||
do { \
|
||||
/* return packet */ \
|
||||
OMPI_FREE_LIST_RETURN(&mca_pml_csum.pending_pckts, \
|
||||
(ompi_free_list_item_t*)pckt); \
|
||||
} while(0)
|
||||
|
||||
#define MCA_PML_CSUM_ADD_FIN_TO_PENDING(P, D, B, O, S) \
|
||||
do { \
|
||||
mca_pml_csum_pckt_pending_t *_pckt; \
|
||||
int _rc; \
|
||||
\
|
||||
MCA_PML_CSUM_PCKT_PENDING_ALLOC(_pckt,_rc); \
|
||||
_pckt->hdr.hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_FIN; \
|
||||
_pckt->hdr.hdr_fin.hdr_des = (D); \
|
||||
_pckt->hdr.hdr_fin.hdr_fail = (S); \
|
||||
_pckt->proc = (P); \
|
||||
_pckt->bml_btl = (B); \
|
||||
_pckt->order = (O); \
|
||||
OPAL_THREAD_LOCK(&mca_pml_csum.lock); \
|
||||
opal_list_append(&mca_pml_csum.pckt_pending, \
|
||||
(opal_list_item_t*)_pckt); \
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_csum.lock); \
|
||||
} while(0)
|
||||
|
||||
|
||||
int mca_pml_csum_send_fin(ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl,
|
||||
ompi_ptr_t hdr_des, uint8_t order, uint32_t status);
|
||||
|
||||
/* This function tries to resend FIN/ACK packets from pckt_pending queue.
|
||||
* Packets are added to the queue when sending of FIN or ACK is failed due to
|
||||
* resource unavailability. bml_btl passed to the function doesn't represents
|
||||
* packet's destination, it represents BTL on which resource was freed, so only
|
||||
* this BTL should be considered for resending packets */
|
||||
void mca_pml_csum_process_pending_packets(mca_bml_base_btl_t* bml_btl);
|
||||
|
||||
/* This function retries failed PUT/GET operations on frag. When RDMA operation
|
||||
* cannot be accomplished for some reason, frag is put on the rdma_pending list.
|
||||
* Later the operation is retried. The destination of RDMA operation is stored
|
||||
* inside the frag structure */
|
||||
void mca_pml_csum_process_pending_rdma(void);
|
||||
|
||||
#define MCA_PML_CSUM_PROGRESS_PENDING(bml_btl) \
|
||||
do { \
|
||||
if(opal_list_get_size(&mca_pml_csum.pckt_pending)) \
|
||||
mca_pml_csum_process_pending_packets(bml_btl); \
|
||||
if(opal_list_get_size(&mca_pml_csum.recv_pending)) \
|
||||
mca_pml_csum_recv_request_process_pending(); \
|
||||
if(opal_list_get_size(&mca_pml_csum.send_pending)) \
|
||||
mca_pml_csum_send_request_process_pending(bml_btl); \
|
||||
if(opal_list_get_size(&mca_pml_csum.rdma_pending)) \
|
||||
mca_pml_csum_process_pending_rdma(); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Compute the total number of bytes on supplied descriptor
|
||||
*/
|
||||
static inline int mca_pml_csum_compute_segment_length (size_t seg_size, void *segments, size_t count,
|
||||
size_t hdrlen) {
|
||||
size_t i, length;
|
||||
|
||||
for (i = 0, length = -hdrlen ; i < count ; ++i) {
|
||||
mca_btl_base_segment_t *segment =
|
||||
(mca_btl_base_segment_t *)((char *) segments + i * seg_size);
|
||||
|
||||
length += segment->seg_len;
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
static inline int mca_pml_csum_compute_segment_length_base (mca_btl_base_segment_t *segments,
|
||||
size_t count, size_t hdrlen) {
|
||||
size_t i, length;
|
||||
|
||||
for (i = 0, length = -hdrlen ; i < count ; ++i) {
|
||||
length += segments[i].seg_len;
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
/* represent BTL chosen for sending request */
|
||||
struct mca_pml_csum_com_btl_t {
|
||||
mca_bml_base_btl_t *bml_btl;
|
||||
struct mca_mpool_base_registration_t* btl_reg;
|
||||
size_t length;
|
||||
};
|
||||
typedef struct mca_pml_csum_com_btl_t mca_pml_csum_com_btl_t;
|
||||
|
||||
int mca_pml_csum_com_btl_comp(const void *v1, const void *v2);
|
||||
|
||||
/* Calculate what percentage of a message to send through each BTL according to
|
||||
* relative weight */
|
||||
static inline void
|
||||
mca_pml_csum_calc_weighted_length( mca_pml_csum_com_btl_t *btls, int num_btls, size_t size,
|
||||
double weight_total )
|
||||
{
|
||||
int i;
|
||||
size_t length_left;
|
||||
|
||||
/* shortcut for common case for only one BTL */
|
||||
if( OPAL_LIKELY(1 == num_btls) ) {
|
||||
btls[0].length = size;
|
||||
return;
|
||||
}
|
||||
|
||||
/* sort BTLs according of their weights so BTLs with smaller weight will
|
||||
* not hijack all of the traffic */
|
||||
qsort( btls, num_btls, sizeof(mca_pml_csum_com_btl_t),
|
||||
mca_pml_csum_com_btl_comp );
|
||||
|
||||
for(length_left = size, i = 0; i < num_btls; i++) {
|
||||
mca_bml_base_btl_t* bml_btl = btls[i].bml_btl;
|
||||
size_t length = 0;
|
||||
if( OPAL_UNLIKELY(0 != length_left) ) {
|
||||
length = (length_left > bml_btl->btl->btl_eager_limit)?
|
||||
((size_t)(size * (bml_btl->btl_weight / weight_total))) :
|
||||
length_left;
|
||||
|
||||
if(length > length_left)
|
||||
length = length_left;
|
||||
length_left -= length;
|
||||
}
|
||||
btls[i].length = length;
|
||||
}
|
||||
|
||||
/* account for rounding errors */
|
||||
btls[0].length += length_left;
|
||||
}
|
||||
|
||||
#endif
|
@ -1,98 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include <string.h>
|
||||
|
||||
#include "pml_csum.h"
|
||||
#include "pml_csum_comm.h"
|
||||
|
||||
|
||||
|
||||
static void mca_pml_csum_comm_proc_construct(mca_pml_csum_comm_proc_t* proc)
|
||||
{
|
||||
proc->expected_sequence = 1;
|
||||
proc->ompi_proc = NULL;
|
||||
proc->send_sequence = 0;
|
||||
OBJ_CONSTRUCT(&proc->frags_cant_match, opal_list_t);
|
||||
OBJ_CONSTRUCT(&proc->specific_receives, opal_list_t);
|
||||
OBJ_CONSTRUCT(&proc->unexpected_frags, opal_list_t);
|
||||
}
|
||||
|
||||
|
||||
static void mca_pml_csum_comm_proc_destruct(mca_pml_csum_comm_proc_t* proc)
|
||||
{
|
||||
OBJ_DESTRUCT(&proc->frags_cant_match);
|
||||
OBJ_DESTRUCT(&proc->specific_receives);
|
||||
OBJ_DESTRUCT(&proc->unexpected_frags);
|
||||
}
|
||||
|
||||
|
||||
static OBJ_CLASS_INSTANCE(
|
||||
mca_pml_csum_comm_proc_t,
|
||||
opal_object_t,
|
||||
mca_pml_csum_comm_proc_construct,
|
||||
mca_pml_csum_comm_proc_destruct);
|
||||
|
||||
|
||||
static void mca_pml_csum_comm_construct(mca_pml_csum_comm_t* comm)
|
||||
{
|
||||
OBJ_CONSTRUCT(&comm->wild_receives, opal_list_t);
|
||||
OBJ_CONSTRUCT(&comm->matching_lock, opal_mutex_t);
|
||||
comm->recv_sequence = 0;
|
||||
comm->procs = NULL;
|
||||
comm->num_procs = 0;
|
||||
}
|
||||
|
||||
|
||||
static void mca_pml_csum_comm_destruct(mca_pml_csum_comm_t* comm)
|
||||
{
|
||||
size_t i;
|
||||
for(i=0; i<comm->num_procs; i++)
|
||||
OBJ_DESTRUCT((&comm->procs[i]));
|
||||
if(NULL != comm->procs)
|
||||
free(comm->procs);
|
||||
OBJ_DESTRUCT(&comm->wild_receives);
|
||||
OBJ_DESTRUCT(&comm->matching_lock);
|
||||
}
|
||||
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
mca_pml_csum_comm_t,
|
||||
opal_object_t,
|
||||
mca_pml_csum_comm_construct,
|
||||
mca_pml_csum_comm_destruct);
|
||||
|
||||
|
||||
int mca_pml_csum_comm_init_size(mca_pml_csum_comm_t* comm, size_t size)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
/* send message sequence-number support - sender side */
|
||||
comm->procs = (mca_pml_csum_comm_proc_t*)malloc(sizeof(mca_pml_csum_comm_proc_t)*size);
|
||||
if(NULL == comm->procs) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
for(i=0; i<size; i++) {
|
||||
OBJ_CONSTRUCT(comm->procs+i, mca_pml_csum_comm_proc_t);
|
||||
}
|
||||
comm->num_procs = size;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -1,79 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*/
|
||||
#ifndef MCA_PML_OB1_COMM_H
|
||||
#define MCA_PML_OB1_COMM_H
|
||||
|
||||
#include "opal/threads/mutex.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
BEGIN_C_DECLS
|
||||
|
||||
|
||||
struct mca_pml_csum_comm_proc_t {
|
||||
opal_object_t super;
|
||||
uint16_t expected_sequence; /**< send message sequence number - receiver side */
|
||||
struct ompi_proc_t* ompi_proc;
|
||||
#if OPAL_ENABLE_MULTI_THREADS
|
||||
volatile int32_t send_sequence; /**< send side sequence number */
|
||||
#else
|
||||
int32_t send_sequence; /**< send side sequence number */
|
||||
#endif
|
||||
opal_list_t frags_cant_match; /**< out-of-order fragment queues */
|
||||
opal_list_t specific_receives; /**< queues of unmatched specific receives */
|
||||
opal_list_t unexpected_frags; /**< unexpected fragment queues */
|
||||
};
|
||||
typedef struct mca_pml_csum_comm_proc_t mca_pml_csum_comm_proc_t;
|
||||
|
||||
|
||||
/**
|
||||
* Cached on ompi_communicator_t to hold queues/state
|
||||
* used by the PML<->PTL interface for matching logic.
|
||||
*/
|
||||
struct mca_pml_comm_t {
|
||||
opal_object_t super;
|
||||
#if OPAL_ENABLE_MULTI_THREADS
|
||||
volatile uint32_t recv_sequence; /**< recv request sequence number - receiver side */
|
||||
#else
|
||||
uint32_t recv_sequence; /**< recv request sequence number - receiver side */
|
||||
#endif
|
||||
opal_mutex_t matching_lock; /**< matching lock */
|
||||
opal_list_t wild_receives; /**< queue of unmatched wild (source process not specified) receives */
|
||||
mca_pml_csum_comm_proc_t* procs;
|
||||
size_t num_procs;
|
||||
};
|
||||
typedef struct mca_pml_comm_t mca_pml_csum_comm_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_pml_csum_comm_t);
|
||||
|
||||
|
||||
/**
|
||||
* Initialize an instance of mca_pml_csum_comm_t based on the communicator size.
|
||||
*
|
||||
* @param comm Instance of mca_pml_csum_comm_t
|
||||
* @param size Size of communicator
|
||||
* @return OMPI_SUCCESS or error status on failure.
|
||||
*/
|
||||
|
||||
extern int mca_pml_csum_comm_init_size(mca_pml_csum_comm_t* comm, size_t size);
|
||||
|
||||
END_C_DECLS
|
||||
#endif
|
||||
|
@ -1,254 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2009 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
#include "mpi.h"
|
||||
#include "ompi/runtime/params.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "ompi/mca/pml/base/pml_base_bsend.h"
|
||||
#include "pml_csum.h"
|
||||
#include "pml_csum_hdr.h"
|
||||
#include "pml_csum_sendreq.h"
|
||||
#include "pml_csum_recvreq.h"
|
||||
#include "pml_csum_rdmafrag.h"
|
||||
#include "pml_csum_recvfrag.h"
|
||||
#include "ompi/mca/bml/base/base.h"
|
||||
#include "pml_csum_component.h"
|
||||
#include "ompi/mca/allocator/base/base.h"
|
||||
|
||||
OBJ_CLASS_INSTANCE( mca_pml_csum_pckt_pending_t,
|
||||
ompi_free_list_item_t,
|
||||
NULL,
|
||||
NULL );
|
||||
|
||||
static int mca_pml_csum_component_open(void);
|
||||
static int mca_pml_csum_component_close(void);
|
||||
static mca_pml_base_module_t*
|
||||
mca_pml_csum_component_init( int* priority, bool enable_progress_threads,
|
||||
bool enable_mpi_threads );
|
||||
static int mca_pml_csum_component_fini(void);
|
||||
int mca_pml_csum_output = 0;
|
||||
|
||||
mca_pml_base_component_2_0_0_t mca_pml_csum_component = {
|
||||
|
||||
/* First, the mca_base_component_t struct containing meta
|
||||
information about the component itself */
|
||||
|
||||
{
|
||||
MCA_PML_BASE_VERSION_2_0_0,
|
||||
|
||||
"csum", /* MCA component name */
|
||||
OMPI_MAJOR_VERSION, /* MCA component major version */
|
||||
OMPI_MINOR_VERSION, /* MCA component minor version */
|
||||
OMPI_RELEASE_VERSION, /* MCA component release version */
|
||||
mca_pml_csum_component_open, /* component open */
|
||||
mca_pml_csum_component_close /* component close */
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
|
||||
mca_pml_csum_component_init, /* component init */
|
||||
mca_pml_csum_component_fini /* component finalize */
|
||||
|
||||
};
|
||||
|
||||
void *mca_pml_csum_seg_alloc( struct mca_mpool_base_module_t* mpool,
|
||||
size_t* size,
|
||||
mca_mpool_base_registration_t** registration);
|
||||
|
||||
void mca_pml_csum_seg_free( struct mca_mpool_base_module_t* mpool,
|
||||
void* segment );
|
||||
|
||||
static inline int mca_pml_csum_param_register_int(
|
||||
const char* param_name,
|
||||
int default_value)
|
||||
{
|
||||
int param_value = default_value;
|
||||
|
||||
(void) mca_base_param_reg_int (&mca_pml_csum_component.pmlm_version, param_name,
|
||||
NULL, false, false, default_value, ¶m_value);
|
||||
|
||||
return param_value;
|
||||
}
|
||||
|
||||
static int mca_pml_csum_component_open(void)
|
||||
{
|
||||
int value;
|
||||
mca_allocator_base_component_t* allocator_component;
|
||||
|
||||
value = mca_pml_csum_param_register_int("verbose", 0);
|
||||
mca_pml_csum_output = opal_output_open(NULL);
|
||||
opal_output_set_verbosity(mca_pml_csum_output, value);
|
||||
|
||||
mca_pml_csum.free_list_num =
|
||||
mca_pml_csum_param_register_int("free_list_num", 4);
|
||||
mca_pml_csum.free_list_max =
|
||||
mca_pml_csum_param_register_int("free_list_max", -1);
|
||||
mca_pml_csum.free_list_inc =
|
||||
mca_pml_csum_param_register_int("free_list_inc", 64);
|
||||
mca_pml_csum.priority =
|
||||
mca_pml_csum_param_register_int("priority", 0);
|
||||
mca_pml_csum.send_pipeline_depth =
|
||||
mca_pml_csum_param_register_int("send_pipeline_depth", 3);
|
||||
mca_pml_csum.recv_pipeline_depth =
|
||||
mca_pml_csum_param_register_int("recv_pipeline_depth", 4);
|
||||
mca_pml_csum.rdma_put_retries_limit =
|
||||
mca_pml_csum_param_register_int("rdma_put_retries_limit", 5);
|
||||
mca_pml_csum.max_rdma_per_request =
|
||||
mca_pml_csum_param_register_int("max_rdma_per_request", 4);
|
||||
mca_pml_csum.max_send_per_range =
|
||||
mca_pml_csum_param_register_int("max_send_per_range", 4);
|
||||
|
||||
mca_pml_csum.unexpected_limit =
|
||||
mca_pml_csum_param_register_int("unexpected_limit", 128);
|
||||
|
||||
mca_base_param_reg_string(&mca_pml_csum_component.pmlm_version,
|
||||
"allocator",
|
||||
"Name of allocator component for unexpected messages",
|
||||
false, false,
|
||||
"bucket",
|
||||
&mca_pml_csum.allocator_name);
|
||||
|
||||
allocator_component = mca_allocator_component_lookup( mca_pml_csum.allocator_name );
|
||||
if(NULL == allocator_component) {
|
||||
opal_output(0, "mca_pml_csum_component_open: can't find allocator: %s\n", mca_pml_csum.allocator_name);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
mca_pml_csum.allocator = allocator_component->allocator_init(true,
|
||||
mca_pml_csum_seg_alloc,
|
||||
mca_pml_csum_seg_free, NULL);
|
||||
if(NULL == mca_pml_csum.allocator) {
|
||||
opal_output(0, "mca_pml_csum_component_open: unable to initialize allocator\n");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
mca_pml_csum.enabled = false;
|
||||
return mca_bml_base_open();
|
||||
}
|
||||
|
||||
|
||||
static int mca_pml_csum_component_close(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (OMPI_SUCCESS != (rc = mca_bml_base_close())) {
|
||||
return rc;
|
||||
}
|
||||
if (NULL != mca_pml_csum.allocator_name) {
|
||||
free(mca_pml_csum.allocator_name);
|
||||
}
|
||||
opal_output_close(mca_pml_csum_output);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static mca_pml_base_module_t*
|
||||
mca_pml_csum_component_init( int* priority,
|
||||
bool enable_progress_threads,
|
||||
bool enable_mpi_threads )
|
||||
{
|
||||
opal_output_verbose( 10, mca_pml_csum_output,
|
||||
"in csum, my priority is %d\n", mca_pml_csum.priority);
|
||||
|
||||
if((*priority) > mca_pml_csum.priority) {
|
||||
*priority = mca_pml_csum.priority;
|
||||
return NULL;
|
||||
}
|
||||
*priority = mca_pml_csum.priority;
|
||||
|
||||
if(OMPI_SUCCESS != mca_bml_base_init( enable_progress_threads,
|
||||
enable_mpi_threads)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Set this here (vs in component_open()) because
|
||||
ompi_mpi_leave_pinned* may have been set after MCA params were
|
||||
read (e.g., by the openib btl) */
|
||||
mca_pml_csum.leave_pinned = (1 == ompi_mpi_leave_pinned);
|
||||
mca_pml_csum.leave_pinned_pipeline = (int) ompi_mpi_leave_pinned_pipeline;
|
||||
|
||||
return &mca_pml_csum.super;
|
||||
}
|
||||
|
||||
int mca_pml_csum_component_fini(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/* Shutdown BML */
|
||||
if(OMPI_SUCCESS != (rc = mca_bml.bml_finalize()))
|
||||
return rc;
|
||||
|
||||
if(!mca_pml_csum.enabled)
|
||||
return OMPI_SUCCESS; /* never selected.. return success.. */
|
||||
mca_pml_csum.enabled = false; /* not anymore */
|
||||
|
||||
OBJ_DESTRUCT(&mca_pml_csum.rdma_pending);
|
||||
OBJ_DESTRUCT(&mca_pml_csum.pckt_pending);
|
||||
OBJ_DESTRUCT(&mca_pml_csum.recv_pending);
|
||||
OBJ_DESTRUCT(&mca_pml_csum.send_pending);
|
||||
OBJ_DESTRUCT(&mca_pml_csum.non_existing_communicator_pending);
|
||||
OBJ_DESTRUCT(&mca_pml_csum.buffers);
|
||||
OBJ_DESTRUCT(&mca_pml_csum.pending_pckts);
|
||||
OBJ_DESTRUCT(&mca_pml_csum.recv_frags);
|
||||
OBJ_DESTRUCT(&mca_pml_csum.rdma_frags);
|
||||
OBJ_DESTRUCT(&mca_pml_csum.lock);
|
||||
|
||||
if(OMPI_SUCCESS != (rc = mca_pml_csum.allocator->alc_finalize(mca_pml_csum.allocator))) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (mca_pml_base_send_requests.fl_num_allocated !=
|
||||
mca_pml_base_send_requests.super.opal_list_length) {
|
||||
opal_output(0, "csum send requests: %d allocated %d returned\n",
|
||||
mca_pml_base_send_requests.fl_num_allocated,
|
||||
mca_pml_base_send_requests.super.opal_list_length);
|
||||
}
|
||||
if (mca_pml_base_recv_requests.fl_num_allocated !=
|
||||
mca_pml_base_recv_requests.super.opal_list_length) {
|
||||
opal_output(0, "csum recv requests: %d allocated %d returned\n",
|
||||
mca_pml_base_recv_requests.fl_num_allocated,
|
||||
mca_pml_base_recv_requests.super.opal_list_length);
|
||||
}
|
||||
#endif
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
void *mca_pml_csum_seg_alloc( struct mca_mpool_base_module_t* mpool,
|
||||
size_t* size,
|
||||
mca_mpool_base_registration_t** registration) {
|
||||
return malloc(*size);
|
||||
}
|
||||
|
||||
void mca_pml_csum_seg_free( struct mca_mpool_base_module_t* mpool,
|
||||
void* segment ) {
|
||||
free(segment);
|
||||
}
|
@ -1,33 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*/
|
||||
|
||||
#ifndef MCA_PML_CSUM_COMPONENT_H
|
||||
#define MCA_PML_CSUM_COMPONENT_H
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/*
|
||||
* PML module functions.
|
||||
*/
|
||||
OMPI_MODULE_DECLSPEC extern mca_pml_base_component_2_0_0_t mca_pml_csum_component;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
@ -1,25 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "pml_csum_endpoint.h"
|
||||
|
||||
|
@ -1,29 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*/
|
||||
#ifndef MCA_PML_CSUM_ENDPOINT_H
|
||||
#define MCA_PML_CSUM_ENDPOINT_H
|
||||
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
END_C_DECLS
|
||||
#endif
|
||||
|
@ -1,393 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2009 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*/
|
||||
#ifndef MCA_PML_CSUM_HEADER_H
|
||||
#define MCA_PML_CSUM_HEADER_H
|
||||
|
||||
#include "ompi_config.h"
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#ifdef HAVE_NETINET_IN_H
|
||||
#include <netinet/in.h>
|
||||
#endif
|
||||
|
||||
#include "opal/types.h"
|
||||
#include "opal/util/arch.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
|
||||
#define MCA_PML_CSUM_HDR_TYPE_MATCH (MCA_BTL_TAG_PML + 1)
|
||||
#define MCA_PML_CSUM_HDR_TYPE_RNDV (MCA_BTL_TAG_PML + 2)
|
||||
#define MCA_PML_CSUM_HDR_TYPE_RGET (MCA_BTL_TAG_PML + 3)
|
||||
#define MCA_PML_CSUM_HDR_TYPE_ACK (MCA_BTL_TAG_PML + 4)
|
||||
#define MCA_PML_CSUM_HDR_TYPE_NACK (MCA_BTL_TAG_PML + 5)
|
||||
#define MCA_PML_CSUM_HDR_TYPE_FRAG (MCA_BTL_TAG_PML + 6)
|
||||
#define MCA_PML_CSUM_HDR_TYPE_GET (MCA_BTL_TAG_PML + 7)
|
||||
#define MCA_PML_CSUM_HDR_TYPE_PUT (MCA_BTL_TAG_PML + 8)
|
||||
#define MCA_PML_CSUM_HDR_TYPE_FIN (MCA_BTL_TAG_PML + 9)
|
||||
|
||||
#define MCA_PML_CSUM_HDR_FLAGS_ACK 1 /* is an ack required */
|
||||
#define MCA_PML_CSUM_HDR_FLAGS_NBO 2 /* is the hdr in network byte order */
|
||||
#define MCA_PML_CSUM_HDR_FLAGS_PIN 4 /* is user buffer pinned */
|
||||
#define MCA_PML_CSUM_HDR_FLAGS_CONTIG 8 /* is user buffer contiguous */
|
||||
#define MCA_PML_CSUM_HDR_FLAGS_NORDMA 16 /* rest will be send by copy-in-out */
|
||||
|
||||
/**
|
||||
* Common hdr attributes - must be first element in each hdr type
|
||||
*/
|
||||
struct mca_pml_csum_common_hdr_t {
|
||||
uint8_t hdr_type; /**< type of envelope */
|
||||
uint8_t hdr_flags; /**< flags indicating how fragment should be processed */
|
||||
uint16_t hdr_csum; /**< checksum over header */
|
||||
};
|
||||
typedef struct mca_pml_csum_common_hdr_t mca_pml_csum_common_hdr_t;
|
||||
|
||||
#define MCA_PML_CSUM_COMMON_HDR_NTOH(h) (h).hdr_csum = ntohs((h).hdr_csum);
|
||||
#define MCA_PML_CSUM_COMMON_HDR_HTON(h) (h).hdr_csum = htons((h).hdr_csum);
|
||||
|
||||
/**
|
||||
* Header definition for the first fragment, contains the
|
||||
* attributes required to match the corresponding posted receive.
|
||||
*/
|
||||
struct mca_pml_csum_match_hdr_t {
|
||||
mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */
|
||||
uint16_t hdr_ctx; /**< communicator index */
|
||||
uint16_t hdr_seq; /**< message sequence number */
|
||||
int32_t hdr_src; /**< source rank */
|
||||
int32_t hdr_tag; /**< user tag */
|
||||
uint32_t hdr_csum; /**< checksum over data */
|
||||
};
|
||||
#define OMPI_PML_CSUM_MATCH_HDR_LEN 20
|
||||
|
||||
typedef struct mca_pml_csum_match_hdr_t mca_pml_csum_match_hdr_t;
|
||||
|
||||
#define MCA_PML_CSUM_MATCH_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \
|
||||
(h).hdr_ctx = ntohs((h).hdr_ctx); \
|
||||
(h).hdr_src = ntohl((h).hdr_src); \
|
||||
(h).hdr_tag = ntohl((h).hdr_tag); \
|
||||
(h).hdr_seq = ntohs((h).hdr_seq); \
|
||||
(h).hdr_csum = ntohl((h).hdr_csum); \
|
||||
} while (0)
|
||||
|
||||
#define MCA_PML_CSUM_MATCH_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \
|
||||
(h).hdr_ctx = htons((h).hdr_ctx); \
|
||||
(h).hdr_src = htonl((h).hdr_src); \
|
||||
(h).hdr_tag = htonl((h).hdr_tag); \
|
||||
(h).hdr_seq = htons((h).hdr_seq); \
|
||||
(h).hdr_csum = htonl((h).hdr_csum); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* Header definition for the first fragment when an acknowledgment
|
||||
* is required. This could be the first fragment of a large message
|
||||
* or a short message that requires an ack (synchronous).
|
||||
*/
|
||||
struct mca_pml_csum_rendezvous_hdr_t {
|
||||
mca_pml_csum_match_hdr_t hdr_match;
|
||||
uint64_t hdr_msg_length; /**< message length */
|
||||
ompi_ptr_t hdr_src_req; /**< pointer to source request - returned in ack */
|
||||
};
|
||||
typedef struct mca_pml_csum_rendezvous_hdr_t mca_pml_csum_rendezvous_hdr_t;
|
||||
|
||||
/* Note that hdr_src_req is not put in network byte order because it
|
||||
is never processed by the receiver, other than being copied into
|
||||
the ack header */
|
||||
#define MCA_PML_CSUM_RNDV_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_CSUM_MATCH_HDR_NTOH((h).hdr_match); \
|
||||
(h).hdr_msg_length = ntoh64((h).hdr_msg_length); \
|
||||
} while (0)
|
||||
|
||||
#define MCA_PML_CSUM_RNDV_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_CSUM_MATCH_HDR_HTON((h).hdr_match); \
|
||||
(h).hdr_msg_length = hton64((h).hdr_msg_length); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* Header definition for a combined rdma rendezvous/get
|
||||
*/
|
||||
struct mca_pml_csum_rget_hdr_t {
|
||||
mca_pml_csum_rendezvous_hdr_t hdr_rndv;
|
||||
uint32_t hdr_seg_cnt; /**< number of segments for rdma */
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
uint8_t hdr_padding[4];
|
||||
#endif
|
||||
ompi_ptr_t hdr_des; /**< source descriptor */
|
||||
};
|
||||
typedef struct mca_pml_csum_rget_hdr_t mca_pml_csum_rget_hdr_t;
|
||||
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
|
||||
#define MCA_PML_CSUM_RGET_HDR_FILL(h) \
|
||||
do { \
|
||||
(h).hdr_padding[0] = 0; \
|
||||
(h).hdr_padding[1] = 0; \
|
||||
(h).hdr_padding[2] = 0; \
|
||||
(h).hdr_padding[3] = 0; \
|
||||
} while(0)
|
||||
#else
|
||||
#define MCA_PML_CSUM_RGET_HDR_FILL(h)
|
||||
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
|
||||
|
||||
#define MCA_PML_CSUM_RGET_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_CSUM_RNDV_HDR_NTOH((h).hdr_rndv); \
|
||||
(h).hdr_seg_cnt = ntohl((h).hdr_seg_cnt); \
|
||||
} while (0)
|
||||
|
||||
#define MCA_PML_CSUM_RGET_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_CSUM_RNDV_HDR_HTON((h).hdr_rndv); \
|
||||
MCA_PML_CSUM_RGET_HDR_FILL(h); \
|
||||
(h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* Header for subsequent fragments.
|
||||
*/
|
||||
struct mca_pml_csum_frag_hdr_t {
|
||||
mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */
|
||||
uint32_t hdr_csum;
|
||||
uint64_t hdr_frag_offset; /**< offset into message */
|
||||
ompi_ptr_t hdr_src_req; /**< pointer to source request */
|
||||
ompi_ptr_t hdr_dst_req; /**< pointer to matched receive */
|
||||
};
|
||||
typedef struct mca_pml_csum_frag_hdr_t mca_pml_csum_frag_hdr_t;
|
||||
|
||||
#define MCA_PML_CSUM_FRAG_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \
|
||||
(h).hdr_csum = ntohl((h).hdr_csum); \
|
||||
(h).hdr_frag_offset = ntoh64((h).hdr_frag_offset); \
|
||||
} while (0)
|
||||
|
||||
#define MCA_PML_CSUM_FRAG_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \
|
||||
(h).hdr_csum = htonl((h).hdr_csum); \
|
||||
(h).hdr_frag_offset = hton64((h).hdr_frag_offset); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* Header used to acknowledgment outstanding fragment(s).
|
||||
*/
|
||||
|
||||
struct mca_pml_csum_ack_hdr_t {
|
||||
mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
uint8_t hdr_padding[4];
|
||||
#endif
|
||||
ompi_ptr_t hdr_src_req; /**< source request */
|
||||
ompi_ptr_t hdr_dst_req; /**< matched receive request */
|
||||
uint64_t hdr_send_offset; /**< starting point of copy in/out */
|
||||
};
|
||||
typedef struct mca_pml_csum_ack_hdr_t mca_pml_csum_ack_hdr_t;
|
||||
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
|
||||
#define MCA_PML_CSUM_ACK_HDR_FILL(h) \
|
||||
do { \
|
||||
(h).hdr_padding[0] = 0; \
|
||||
(h).hdr_padding[1] = 0; \
|
||||
(h).hdr_padding[2] = 0; \
|
||||
(h).hdr_padding[3] = 0; \
|
||||
} while (0)
|
||||
#else
|
||||
#define MCA_PML_CSUM_ACK_HDR_FILL(h)
|
||||
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
|
||||
|
||||
/* Note that the request headers are not put in NBO because the
|
||||
src_req is already in receiver's byte order and the dst_req is not
|
||||
used by the receiver for anything other than backpointers in return
|
||||
headers */
|
||||
#define MCA_PML_CSUM_ACK_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \
|
||||
(h).hdr_send_offset = ntoh64((h).hdr_send_offset); \
|
||||
} while (0)
|
||||
|
||||
#define MCA_PML_CSUM_ACK_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \
|
||||
MCA_PML_CSUM_ACK_HDR_FILL(h); \
|
||||
(h).hdr_send_offset = hton64((h).hdr_send_offset); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* Header used to initiate an RDMA operation.
|
||||
*/
|
||||
|
||||
struct mca_pml_csum_rdma_hdr_t {
|
||||
mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */
|
||||
uint32_t hdr_seg_cnt; /**< number of segments for rdma */
|
||||
ompi_ptr_t hdr_req; /**< destination request */
|
||||
ompi_ptr_t hdr_des; /**< source descriptor */
|
||||
uint64_t hdr_rdma_offset; /**< current offset into user buffer */
|
||||
mca_btl_base_segment_t hdr_segs[1]; /**< list of segments for rdma */
|
||||
};
|
||||
typedef struct mca_pml_csum_rdma_hdr_t mca_pml_csum_rdma_hdr_t;
|
||||
|
||||
#define MCA_PML_CSUM_RDMA_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \
|
||||
(h).hdr_seg_cnt = ntohl((h).hdr_seg_cnt); \
|
||||
(h).hdr_rdma_offset = ntoh64((h).hdr_rdma_offset); \
|
||||
} while (0)
|
||||
|
||||
#define MCA_PML_CSUM_RDMA_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \
|
||||
(h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \
|
||||
(h).hdr_rdma_offset = hton64((h).hdr_rdma_offset); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* Header used to complete an RDMA operation.
|
||||
*/
|
||||
|
||||
struct mca_pml_csum_fin_hdr_t {
|
||||
mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */
|
||||
uint32_t hdr_csum;
|
||||
ompi_ptr_t hdr_des; /**< completed descriptor */
|
||||
uint32_t hdr_fail; /**< RDMA operation failed */
|
||||
};
|
||||
typedef struct mca_pml_csum_fin_hdr_t mca_pml_csum_fin_hdr_t;
|
||||
|
||||
#define MCA_PML_CSUM_FIN_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \
|
||||
(h).hdr_csum = ntohl((h).hdr_csum); \
|
||||
(h).hdr_fail = ntohl((h).hdr_fail); \
|
||||
} while (0)
|
||||
|
||||
#define MCA_PML_CSUM_FIN_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \
|
||||
(h).hdr_csum = htonl((h).hdr_csum); \
|
||||
(h).hdr_fail = htonl((h).hdr_fail); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* Union of defined hdr types.
|
||||
*/
|
||||
union mca_pml_csum_hdr_t {
|
||||
mca_pml_csum_common_hdr_t hdr_common;
|
||||
mca_pml_csum_match_hdr_t hdr_match;
|
||||
mca_pml_csum_rendezvous_hdr_t hdr_rndv;
|
||||
mca_pml_csum_rget_hdr_t hdr_rget;
|
||||
mca_pml_csum_frag_hdr_t hdr_frag;
|
||||
mca_pml_csum_ack_hdr_t hdr_ack;
|
||||
mca_pml_csum_rdma_hdr_t hdr_rdma;
|
||||
mca_pml_csum_fin_hdr_t hdr_fin;
|
||||
};
|
||||
typedef union mca_pml_csum_hdr_t mca_pml_csum_hdr_t;
|
||||
|
||||
#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
static inline __opal_attribute_always_inline__ void
|
||||
csum_hdr_ntoh(mca_pml_csum_hdr_t *hdr, const uint8_t hdr_type)
|
||||
{
|
||||
if(!(hdr->hdr_common.hdr_flags & MCA_PML_CSUM_HDR_FLAGS_NBO))
|
||||
return;
|
||||
|
||||
switch(hdr_type) {
|
||||
case MCA_PML_CSUM_HDR_TYPE_MATCH:
|
||||
MCA_PML_CSUM_MATCH_HDR_NTOH(hdr->hdr_match);
|
||||
break;
|
||||
case MCA_PML_CSUM_HDR_TYPE_RNDV:
|
||||
MCA_PML_CSUM_RNDV_HDR_NTOH(hdr->hdr_rndv);
|
||||
break;
|
||||
case MCA_PML_CSUM_HDR_TYPE_RGET:
|
||||
MCA_PML_CSUM_RGET_HDR_NTOH(hdr->hdr_rget);
|
||||
break;
|
||||
case MCA_PML_CSUM_HDR_TYPE_ACK:
|
||||
MCA_PML_CSUM_ACK_HDR_NTOH(hdr->hdr_ack);
|
||||
break;
|
||||
case MCA_PML_CSUM_HDR_TYPE_FRAG:
|
||||
MCA_PML_CSUM_FRAG_HDR_NTOH(hdr->hdr_frag);
|
||||
break;
|
||||
case MCA_PML_CSUM_HDR_TYPE_PUT:
|
||||
MCA_PML_CSUM_RDMA_HDR_NTOH(hdr->hdr_rdma);
|
||||
break;
|
||||
case MCA_PML_CSUM_HDR_TYPE_FIN:
|
||||
MCA_PML_CSUM_FIN_HDR_NTOH(hdr->hdr_fin);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
#else
|
||||
#define csum_hdr_ntoh(h, t) do{}while(0)
|
||||
#endif
|
||||
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
#define csum_hdr_hton(h, t, p) \
|
||||
csum_hdr_hton_intr((mca_pml_csum_hdr_t*)h, t, p)
|
||||
static inline __opal_attribute_always_inline__ void
|
||||
csum_hdr_hton_intr(mca_pml_csum_hdr_t *hdr, const uint8_t hdr_type,
|
||||
const ompi_proc_t *proc)
|
||||
{
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
hdr->hdr_common.hdr_flags |= MCA_PML_CSUM_HDR_FLAGS_NBO;
|
||||
#else
|
||||
|
||||
if(!(proc->proc_arch & OPAL_ARCH_ISBIGENDIAN))
|
||||
return;
|
||||
|
||||
hdr->hdr_common.hdr_flags |= MCA_PML_CSUM_HDR_FLAGS_NBO;
|
||||
switch(hdr_type) {
|
||||
case MCA_PML_CSUM_HDR_TYPE_MATCH:
|
||||
MCA_PML_CSUM_MATCH_HDR_HTON(hdr->hdr_match);
|
||||
break;
|
||||
case MCA_PML_CSUM_HDR_TYPE_RNDV:
|
||||
MCA_PML_CSUM_RNDV_HDR_HTON(hdr->hdr_rndv);
|
||||
break;
|
||||
case MCA_PML_CSUM_HDR_TYPE_RGET:
|
||||
MCA_PML_CSUM_RGET_HDR_HTON(hdr->hdr_rget);
|
||||
break;
|
||||
case MCA_PML_CSUM_HDR_TYPE_ACK:
|
||||
MCA_PML_CSUM_ACK_HDR_HTON(hdr->hdr_ack);
|
||||
break;
|
||||
case MCA_PML_CSUM_HDR_TYPE_FRAG:
|
||||
MCA_PML_CSUM_FRAG_HDR_HTON(hdr->hdr_frag);
|
||||
break;
|
||||
case MCA_PML_CSUM_HDR_TYPE_PUT:
|
||||
MCA_PML_CSUM_RDMA_HDR_HTON(hdr->hdr_rdma);
|
||||
break;
|
||||
case MCA_PML_CSUM_HDR_TYPE_FIN:
|
||||
MCA_PML_CSUM_FIN_HDR_HTON(hdr->hdr_fin);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
#define csum_hdr_hton(h, t, p) do{}while(0)
|
||||
#endif
|
||||
#endif
|
@ -1,98 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "ompi/request/request.h"
|
||||
#include "pml_csum_recvreq.h"
|
||||
|
||||
|
||||
int mca_pml_csum_iprobe(int src,
|
||||
int tag,
|
||||
struct ompi_communicator_t *comm,
|
||||
int *matched, ompi_status_public_t * status)
|
||||
{
|
||||
int rc = OMPI_SUCCESS;
|
||||
mca_pml_csum_recv_request_t recvreq;
|
||||
|
||||
OBJ_CONSTRUCT( &recvreq, mca_pml_csum_recv_request_t );
|
||||
recvreq.req_recv.req_base.req_ompi.req_type = OMPI_REQUEST_PML;
|
||||
recvreq.req_recv.req_base.req_type = MCA_PML_REQUEST_IPROBE;
|
||||
|
||||
MCA_PML_CSUM_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char.dt, src, tag, comm, true);
|
||||
MCA_PML_CSUM_RECV_REQUEST_START(&recvreq);
|
||||
|
||||
if( recvreq.req_recv.req_base.req_ompi.req_complete == true ) {
|
||||
if( NULL != status ) {
|
||||
*status = recvreq.req_recv.req_base.req_ompi.req_status;
|
||||
}
|
||||
*matched = 1;
|
||||
} else {
|
||||
*matched = 0;
|
||||
opal_progress();
|
||||
}
|
||||
MCA_PML_BASE_RECV_REQUEST_FINI( &recvreq.req_recv );
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
int mca_pml_csum_probe(int src,
|
||||
int tag,
|
||||
struct ompi_communicator_t *comm,
|
||||
ompi_status_public_t * status)
|
||||
{
|
||||
mca_pml_csum_recv_request_t recvreq;
|
||||
|
||||
OBJ_CONSTRUCT( &recvreq, mca_pml_csum_recv_request_t );
|
||||
recvreq.req_recv.req_base.req_ompi.req_type = OMPI_REQUEST_PML;
|
||||
recvreq.req_recv.req_base.req_type = MCA_PML_REQUEST_PROBE;
|
||||
|
||||
MCA_PML_CSUM_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char.dt, src, tag, comm, true);
|
||||
MCA_PML_CSUM_RECV_REQUEST_START(&recvreq);
|
||||
|
||||
ompi_request_wait_completion(&recvreq.req_recv.req_base.req_ompi);
|
||||
|
||||
if (NULL != status) {
|
||||
*status = recvreq.req_recv.req_base.req_ompi.req_status;
|
||||
}
|
||||
MCA_PML_BASE_RECV_REQUEST_FINI( &recvreq.req_recv );
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
mca_pml_csum_improbe(int dst,
|
||||
int tag,
|
||||
struct ompi_communicator_t* comm,
|
||||
int *matched,
|
||||
struct ompi_message_t **message,
|
||||
ompi_status_public_t* status)
|
||||
{
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
mca_pml_csum_mprobe(int dst,
|
||||
int tag,
|
||||
struct ompi_communicator_t* comm,
|
||||
struct ompi_message_t **message,
|
||||
ompi_status_public_t* status)
|
||||
{
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
@ -1,135 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "ompi/request/request.h"
|
||||
#include "pml_csum_recvreq.h"
|
||||
#include "ompi/peruse/peruse-internal.h"
|
||||
|
||||
int mca_pml_csum_irecv_init(void *addr,
|
||||
size_t count,
|
||||
ompi_datatype_t * datatype,
|
||||
int src,
|
||||
int tag,
|
||||
struct ompi_communicator_t *comm,
|
||||
struct ompi_request_t **request)
|
||||
{
|
||||
int rc;
|
||||
mca_pml_csum_recv_request_t *recvreq;
|
||||
MCA_PML_CSUM_RECV_REQUEST_ALLOC(recvreq, rc);
|
||||
if (NULL == recvreq)
|
||||
return rc;
|
||||
|
||||
MCA_PML_CSUM_RECV_REQUEST_INIT(recvreq,
|
||||
addr,
|
||||
count, datatype, src, tag, comm, true);
|
||||
|
||||
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
|
||||
&((recvreq)->req_recv.req_base),
|
||||
PERUSE_RECV);
|
||||
|
||||
*request = (ompi_request_t *) recvreq;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_pml_csum_irecv(void *addr,
|
||||
size_t count,
|
||||
ompi_datatype_t * datatype,
|
||||
int src,
|
||||
int tag,
|
||||
struct ompi_communicator_t *comm,
|
||||
struct ompi_request_t **request)
|
||||
{
|
||||
int rc;
|
||||
|
||||
mca_pml_csum_recv_request_t *recvreq;
|
||||
MCA_PML_CSUM_RECV_REQUEST_ALLOC(recvreq, rc);
|
||||
if (NULL == recvreq)
|
||||
return rc;
|
||||
|
||||
MCA_PML_CSUM_RECV_REQUEST_INIT(recvreq,
|
||||
addr,
|
||||
count, datatype, src, tag, comm, false);
|
||||
|
||||
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
|
||||
&((recvreq)->req_recv.req_base),
|
||||
PERUSE_RECV);
|
||||
|
||||
MCA_PML_CSUM_RECV_REQUEST_START(recvreq);
|
||||
*request = (ompi_request_t *) recvreq;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int mca_pml_csum_recv(void *addr,
|
||||
size_t count,
|
||||
ompi_datatype_t * datatype,
|
||||
int src,
|
||||
int tag,
|
||||
struct ompi_communicator_t *comm,
|
||||
ompi_status_public_t * status)
|
||||
{
|
||||
int rc;
|
||||
mca_pml_csum_recv_request_t *recvreq;
|
||||
MCA_PML_CSUM_RECV_REQUEST_ALLOC(recvreq, rc);
|
||||
if (NULL == recvreq)
|
||||
return rc;
|
||||
|
||||
MCA_PML_CSUM_RECV_REQUEST_INIT(recvreq,
|
||||
addr,
|
||||
count, datatype, src, tag, comm, false);
|
||||
|
||||
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
|
||||
&((recvreq)->req_recv.req_base),
|
||||
PERUSE_RECV);
|
||||
|
||||
MCA_PML_CSUM_RECV_REQUEST_START(recvreq);
|
||||
ompi_request_wait_completion(&recvreq->req_recv.req_base.req_ompi);
|
||||
|
||||
if (NULL != status) { /* return status */
|
||||
*status = recvreq->req_recv.req_base.req_ompi.req_status;
|
||||
}
|
||||
rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR;
|
||||
ompi_request_free( (ompi_request_t**)&recvreq );
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
mca_pml_csum_imrecv(void *buf,
|
||||
size_t count,
|
||||
ompi_datatype_t *datatype,
|
||||
struct ompi_message_t **message,
|
||||
struct ompi_request_t **request)
|
||||
{
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
mca_pml_csum_mrecv(void *buf,
|
||||
size_t count,
|
||||
ompi_datatype_t *datatype,
|
||||
struct ompi_message_t **message,
|
||||
ompi_status_public_t* status)
|
||||
{
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
@ -1,130 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2007 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "pml_csum.h"
|
||||
#include "pml_csum_sendreq.h"
|
||||
#include "pml_csum_recvreq.h"
|
||||
#include "ompi/peruse/peruse-internal.h"
|
||||
|
||||
int mca_pml_csum_isend_init(void *buf,
|
||||
size_t count,
|
||||
ompi_datatype_t * datatype,
|
||||
int dst,
|
||||
int tag,
|
||||
mca_pml_base_send_mode_t sendmode,
|
||||
ompi_communicator_t * comm,
|
||||
ompi_request_t ** request)
|
||||
{
|
||||
int rc;
|
||||
|
||||
mca_pml_csum_send_request_t *sendreq = NULL;
|
||||
MCA_PML_CSUM_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc);
|
||||
if (rc != OMPI_SUCCESS)
|
||||
return rc;
|
||||
|
||||
MCA_PML_CSUM_SEND_REQUEST_INIT(sendreq,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
dst, tag,
|
||||
comm, sendmode, true);
|
||||
|
||||
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
|
||||
&(sendreq)->req_send.req_base,
|
||||
PERUSE_SEND);
|
||||
|
||||
*request = (ompi_request_t *) sendreq;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int mca_pml_csum_isend(void *buf,
|
||||
size_t count,
|
||||
ompi_datatype_t * datatype,
|
||||
int dst,
|
||||
int tag,
|
||||
mca_pml_base_send_mode_t sendmode,
|
||||
ompi_communicator_t * comm,
|
||||
ompi_request_t ** request)
|
||||
{
|
||||
int rc;
|
||||
mca_pml_csum_send_request_t *sendreq = NULL;
|
||||
|
||||
MCA_PML_CSUM_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc);
|
||||
if (rc != OMPI_SUCCESS)
|
||||
return rc;
|
||||
|
||||
MCA_PML_CSUM_SEND_REQUEST_INIT(sendreq,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
dst, tag,
|
||||
comm, sendmode, false);
|
||||
|
||||
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
|
||||
&(sendreq)->req_send.req_base,
|
||||
PERUSE_SEND);
|
||||
|
||||
MCA_PML_CSUM_SEND_REQUEST_START(sendreq, rc);
|
||||
*request = (ompi_request_t *) sendreq;
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
int mca_pml_csum_send(void *buf,
|
||||
size_t count,
|
||||
ompi_datatype_t * datatype,
|
||||
int dst,
|
||||
int tag,
|
||||
mca_pml_base_send_mode_t sendmode,
|
||||
ompi_communicator_t * comm)
|
||||
{
|
||||
int rc;
|
||||
mca_pml_csum_send_request_t *sendreq;
|
||||
|
||||
MCA_PML_CSUM_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc);
|
||||
if (rc != OMPI_SUCCESS)
|
||||
return rc;
|
||||
|
||||
MCA_PML_CSUM_SEND_REQUEST_INIT(sendreq,
|
||||
buf,
|
||||
count,
|
||||
datatype,
|
||||
dst, tag,
|
||||
comm, sendmode, false);
|
||||
|
||||
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
|
||||
&(sendreq)->req_send.req_base,
|
||||
PERUSE_SEND);
|
||||
|
||||
MCA_PML_CSUM_SEND_REQUEST_START(sendreq, rc);
|
||||
if (rc != OMPI_SUCCESS) {
|
||||
MCA_PML_CSUM_SEND_REQUEST_RETURN( sendreq );
|
||||
return rc;
|
||||
}
|
||||
|
||||
ompi_request_wait_completion(&sendreq->req_send.req_base.req_ompi);
|
||||
|
||||
rc = sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR;
|
||||
ompi_request_free( (ompi_request_t**)&sendreq );
|
||||
return rc;
|
||||
}
|
@ -1,77 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2008 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "pml_csum.h"
|
||||
#include "pml_csum_sendreq.h"
|
||||
#include "ompi/mca/bml/base/base.h"
|
||||
|
||||
int mca_pml_csum_progress(void)
|
||||
{
|
||||
int i, queue_length = opal_list_get_size(&mca_pml_csum.send_pending);
|
||||
int j, completed_requests = 0;
|
||||
bool send_succedded;
|
||||
|
||||
if( OPAL_LIKELY(0 == queue_length) )
|
||||
return 0;
|
||||
|
||||
for( i = 0; i < queue_length; i++ ) {
|
||||
mca_pml_csum_send_pending_t pending_type = MCA_PML_CSUM_SEND_PENDING_NONE;
|
||||
mca_pml_csum_send_request_t* sendreq;
|
||||
mca_bml_base_endpoint_t* endpoint;
|
||||
|
||||
sendreq = get_request_from_send_pending(&pending_type);
|
||||
if(OPAL_UNLIKELY(NULL == sendreq))
|
||||
break;
|
||||
|
||||
switch(pending_type) {
|
||||
case MCA_PML_CSUM_SEND_PENDING_NONE:
|
||||
assert(0);
|
||||
return 0;
|
||||
case MCA_PML_CSUM_SEND_PENDING_SCHEDULE:
|
||||
if( mca_pml_csum_send_request_schedule_exclusive(sendreq) ==
|
||||
OMPI_ERR_OUT_OF_RESOURCE ) {
|
||||
return 0;
|
||||
}
|
||||
completed_requests++;
|
||||
break;
|
||||
case MCA_PML_CSUM_SEND_PENDING_START:
|
||||
endpoint = sendreq->req_endpoint;
|
||||
send_succedded = false;
|
||||
for(j = 0; j < (int)mca_bml_base_btl_array_get_size(&endpoint->btl_eager); j++) {
|
||||
mca_bml_base_btl_t* bml_btl;
|
||||
int rc;
|
||||
|
||||
/* select a btl */
|
||||
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
|
||||
rc = mca_pml_csum_send_request_start_btl(sendreq, bml_btl);
|
||||
if( OPAL_LIKELY(OMPI_SUCCESS == rc) ) {
|
||||
send_succedded = true;
|
||||
completed_requests++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( false == send_succedded ) {
|
||||
add_request_to_send_pending(sendreq, MCA_PML_CSUM_SEND_PENDING_START, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
return completed_requests;
|
||||
}
|
||||
|
@ -1,118 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
|
||||
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
#include "ompi/mca/bml/bml.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
#include "pml_csum.h"
|
||||
#include "pml_csum_rdma.h"
|
||||
|
||||
/* Use this registration if no registration needed for a BTL instead of NULL.
|
||||
* This will help other code to distinguish case when memory is not registered
|
||||
* from case when registration is not needed */
|
||||
static mca_mpool_base_registration_t pml_csum_dummy_reg;
|
||||
|
||||
/*
|
||||
* Check to see if memory is registered or can be registered. Build a
|
||||
* set of registrations on the request.
|
||||
*/
|
||||
|
||||
size_t mca_pml_csum_rdma_btls(
|
||||
mca_bml_base_endpoint_t* bml_endpoint,
|
||||
unsigned char* base,
|
||||
size_t size,
|
||||
mca_pml_csum_com_btl_t* rdma_btls)
|
||||
{
|
||||
int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
|
||||
double weight_total = 0;
|
||||
int num_btls_used = 0, n;
|
||||
|
||||
/* shortcut when there are no rdma capable btls */
|
||||
if(num_btls == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* check to see if memory is registered */
|
||||
for(n = 0; n < num_btls && num_btls_used < mca_pml_csum.max_rdma_per_request;
|
||||
n++) {
|
||||
mca_bml_base_btl_t* bml_btl =
|
||||
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma,
|
||||
(bml_endpoint->btl_rdma_index + n) % num_btls);
|
||||
mca_mpool_base_registration_t* reg = &pml_csum_dummy_reg;
|
||||
mca_mpool_base_module_t *btl_mpool = bml_btl->btl->btl_mpool;
|
||||
|
||||
if( NULL != btl_mpool ) {
|
||||
if(!mca_pml_csum.leave_pinned) {
|
||||
/* look through existing registrations */
|
||||
btl_mpool->mpool_find(btl_mpool, base, size, ®);
|
||||
} else {
|
||||
/* register the memory */
|
||||
btl_mpool->mpool_register(btl_mpool, base, size, 0, ®);
|
||||
}
|
||||
|
||||
if(NULL == reg)
|
||||
continue;
|
||||
}
|
||||
|
||||
rdma_btls[num_btls_used].bml_btl = bml_btl;
|
||||
rdma_btls[num_btls_used].btl_reg = reg;
|
||||
weight_total += bml_btl->btl_weight;
|
||||
num_btls_used++;
|
||||
}
|
||||
|
||||
/* if we don't use leave_pinned and all BTLs that already have this memory
|
||||
* registered amount to less then half of available bandwidth - fall back to
|
||||
* pipeline protocol */
|
||||
if(0 == num_btls_used || (!mca_pml_csum.leave_pinned && weight_total < 0.5))
|
||||
return 0;
|
||||
|
||||
mca_pml_csum_calc_weighted_length(rdma_btls, num_btls_used, size,
|
||||
weight_total);
|
||||
|
||||
bml_endpoint->btl_rdma_index = (bml_endpoint->btl_rdma_index + 1) % num_btls;
|
||||
return num_btls_used;
|
||||
}
|
||||
|
||||
size_t mca_pml_csum_rdma_pipeline_btls( mca_bml_base_endpoint_t* bml_endpoint,
|
||||
size_t size,
|
||||
mca_pml_csum_com_btl_t* rdma_btls )
|
||||
{
|
||||
int i, num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
|
||||
double weight_total = 0;
|
||||
|
||||
for(i = 0; i < num_btls && i < mca_pml_csum.max_rdma_per_request; i++) {
|
||||
rdma_btls[i].bml_btl =
|
||||
mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma);
|
||||
if(NULL != rdma_btls[i].bml_btl->btl->btl_mpool)
|
||||
rdma_btls[i].btl_reg = NULL;
|
||||
else
|
||||
rdma_btls[i].btl_reg = &pml_csum_dummy_reg;
|
||||
|
||||
weight_total += rdma_btls[i].bml_btl->btl_weight;
|
||||
}
|
||||
|
||||
mca_pml_csum_calc_weighted_length(rdma_btls, i, size, weight_total);
|
||||
|
||||
return i;
|
||||
}
|
@ -1,41 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*/
|
||||
|
||||
#ifndef MCA_PML_CSUM_RDMA_H
|
||||
#define MCA_PML_CSUM_RDMA_H
|
||||
|
||||
struct mca_bml_base_endpoint_t;
|
||||
|
||||
/*
|
||||
* Of the set of available btls that support RDMA,
|
||||
* find those that already have registrations - or
|
||||
* register if required (for leave_pinned option)
|
||||
*/
|
||||
size_t mca_pml_csum_rdma_btls(struct mca_bml_base_endpoint_t* endpoint,
|
||||
unsigned char* base, size_t size, struct mca_pml_csum_com_btl_t* btls);
|
||||
|
||||
/* Choose RDMA BTLs to use for sending of a request by pipeline protocol.
|
||||
* Calculate number of bytes to send through each BTL according to available
|
||||
* bandwidth */
|
||||
size_t mca_pml_csum_rdma_pipeline_btls(struct mca_bml_base_endpoint_t* endpoint,
|
||||
size_t size, mca_pml_csum_com_btl_t* rdma_btls);
|
||||
#endif
|
||||
|
@ -1,29 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "pml_csum.h"
|
||||
#include "pml_csum_rdmafrag.h"
|
||||
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
mca_pml_csum_rdma_frag_t,
|
||||
ompi_free_list_item_t,
|
||||
NULL,
|
||||
NULL);
|
@ -1,71 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*/
|
||||
|
||||
#ifndef MCA_PML_CSUM_RDMAFRAG_H
|
||||
#define MCA_PML_CSUM_RDMAFRAG_H
|
||||
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "pml_csum_hdr.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
typedef enum {
|
||||
MCA_PML_CSUM_RDMA_PUT,
|
||||
MCA_PML_CSUM_RDMA_GET
|
||||
} mca_pml_csum_rdma_state_t;
|
||||
|
||||
struct mca_pml_csum_rdma_frag_t {
|
||||
ompi_free_list_item_t super;
|
||||
mca_bml_base_btl_t* rdma_bml;
|
||||
mca_pml_csum_hdr_t rdma_hdr;
|
||||
mca_pml_csum_rdma_state_t rdma_state;
|
||||
size_t rdma_length;
|
||||
uint8_t rdma_segs[MCA_BTL_SEG_MAX_SIZE * MCA_BTL_DES_MAX_SEGMENTS];
|
||||
void *rdma_req;
|
||||
struct mca_bml_base_endpoint_t* rdma_ep;
|
||||
opal_convertor_t convertor;
|
||||
mca_mpool_base_registration_t* reg;
|
||||
uint32_t retries;
|
||||
};
|
||||
typedef struct mca_pml_csum_rdma_frag_t mca_pml_csum_rdma_frag_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_pml_csum_rdma_frag_t);
|
||||
|
||||
|
||||
#define MCA_PML_CSUM_RDMA_FRAG_ALLOC(frag,rc) \
|
||||
do { \
|
||||
ompi_free_list_item_t* item; \
|
||||
OMPI_FREE_LIST_WAIT(&mca_pml_csum.rdma_frags, item, rc); \
|
||||
frag = (mca_pml_csum_rdma_frag_t*)item; \
|
||||
} while(0)
|
||||
|
||||
#define MCA_PML_CSUM_RDMA_FRAG_RETURN(frag) \
|
||||
do { \
|
||||
/* return fragment */ \
|
||||
OMPI_FREE_LIST_RETURN(&mca_pml_csum.rdma_frags, \
|
||||
(ompi_free_list_item_t*)frag); \
|
||||
} while(0)
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
@ -1,841 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
|
||||
* Copyright (c) 2006-2008 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2009 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/util/crc.h"
|
||||
#include "opal/threads/mutex.h"
|
||||
#include "opal/prefetch.h"
|
||||
#include "opal/util/output.h"
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
#include "ompi/mca/pml/base/base.h"
|
||||
#include "ompi/peruse/peruse-internal.h"
|
||||
#include "ompi/memchecker.h"
|
||||
|
||||
#include "pml_csum.h"
|
||||
#include "pml_csum_comm.h"
|
||||
#include "pml_csum_recvfrag.h"
|
||||
#include "pml_csum_recvreq.h"
|
||||
#include "pml_csum_sendreq.h"
|
||||
#include "pml_csum_hdr.h"
|
||||
|
||||
OBJ_CLASS_INSTANCE( mca_pml_csum_buffer_t,
|
||||
ompi_free_list_item_t,
|
||||
NULL,
|
||||
NULL );
|
||||
|
||||
OBJ_CLASS_INSTANCE( mca_pml_csum_recv_frag_t,
|
||||
opal_list_item_t,
|
||||
NULL,
|
||||
NULL );
|
||||
|
||||
/**
|
||||
* Static functions.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Dump data elements that caused a checksum violation
|
||||
*/
|
||||
static void dump_csum_error_data(mca_btl_base_segment_t* segments, size_t num_segments)
|
||||
{
|
||||
size_t i, j;
|
||||
uint8_t *data;
|
||||
|
||||
printf("CHECKSUM ERROR DATA\n");
|
||||
for (i = 0; i < num_segments; ++i) {
|
||||
printf("Segment %lu", (unsigned long)i);
|
||||
data = (uint8_t*)segments[i].seg_addr.pval;
|
||||
for (j=0; j < segments[i].seg_len; j++) {
|
||||
if (0 == (j % 40)) {
|
||||
printf("\n");
|
||||
}
|
||||
printf("%02x ", data[j]);
|
||||
};
|
||||
}
|
||||
printf("\nEND CHECKSUM ERROR DATA\n\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* Append a unexpected descriptor to a queue. This function will allocate and
|
||||
* initialize the fragment (if necessary) and then will add it to the specified
|
||||
* queue. The allocated fragment is not returned to the caller.
|
||||
*/
|
||||
static void
|
||||
append_frag_to_list(opal_list_t *queue, mca_btl_base_module_t *btl,
|
||||
mca_pml_csum_match_hdr_t *hdr, mca_btl_base_segment_t* segments,
|
||||
size_t num_segments, mca_pml_csum_recv_frag_t* frag)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if(NULL == frag) {
|
||||
MCA_PML_CSUM_RECV_FRAG_ALLOC(frag, rc);
|
||||
MCA_PML_CSUM_RECV_FRAG_INIT(frag, hdr, segments, num_segments, btl);
|
||||
}
|
||||
opal_list_append(queue, (opal_list_item_t*)frag);
|
||||
}
|
||||
|
||||
/**
|
||||
* Match incoming recv_frags against posted receives.
|
||||
* Supports out of order delivery.
|
||||
*
|
||||
* @param frag_header (IN) Header of received recv_frag.
|
||||
* @param frag_desc (IN) Received recv_frag descriptor.
|
||||
* @param match_made (OUT) Flag indicating wether a match was made.
|
||||
* @param additional_matches (OUT) List of additional matches
|
||||
* @return OMPI_SUCCESS or error status on failure.
|
||||
*/
|
||||
static int mca_pml_csum_recv_frag_match( mca_btl_base_module_t *btl,
|
||||
mca_pml_csum_match_hdr_t *hdr,
|
||||
mca_btl_base_segment_t* segments,
|
||||
size_t num_segments,
|
||||
int type);
|
||||
|
||||
static mca_pml_csum_recv_request_t*
|
||||
match_one(mca_btl_base_module_t *btl,
|
||||
mca_pml_csum_match_hdr_t *hdr, mca_btl_base_segment_t* segments,
|
||||
size_t num_segments, ompi_communicator_t *comm_ptr,
|
||||
mca_pml_csum_comm_proc_t *proc,
|
||||
mca_pml_csum_recv_frag_t* frag);
|
||||
|
||||
void mca_pml_csum_recv_frag_callback_match(mca_btl_base_module_t* btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
void* cbdata )
|
||||
{
|
||||
mca_btl_base_segment_t* segments = des->des_dst;
|
||||
mca_pml_csum_match_hdr_t* hdr = (mca_pml_csum_match_hdr_t*)segments->seg_addr.pval;
|
||||
ompi_communicator_t *comm_ptr;
|
||||
mca_pml_csum_recv_request_t *match = NULL;
|
||||
mca_pml_csum_comm_t *comm;
|
||||
mca_pml_csum_comm_proc_t *proc;
|
||||
size_t num_segments = des->des_dst_cnt;
|
||||
size_t bytes_received = 0;
|
||||
uint16_t csum_received, csum=0;
|
||||
uint32_t csum_data;
|
||||
|
||||
assert(num_segments <= MCA_BTL_DES_MAX_SEGMENTS);
|
||||
|
||||
if( OPAL_UNLIKELY(segments->seg_len < OMPI_PML_CSUM_MATCH_HDR_LEN) ) {
|
||||
return;
|
||||
}
|
||||
csum_hdr_ntoh(((mca_pml_csum_hdr_t*) hdr), MCA_PML_CSUM_HDR_TYPE_MATCH);
|
||||
|
||||
csum_received = hdr->hdr_common.hdr_csum;
|
||||
hdr->hdr_common.hdr_csum = 0;
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
hdr->hdr_common.hdr_flags &= ~MCA_PML_CSUM_HDR_FLAGS_NBO;
|
||||
#endif
|
||||
csum = opal_csum16(hdr, OMPI_PML_CSUM_MATCH_HDR_LEN);
|
||||
hdr->hdr_common.hdr_csum = csum_received;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, mca_pml_base_output,
|
||||
"%s:%s:%d common_hdr: %02x:%02x:%04x match_hdr: %04x:%04x:%08x:%08x:%08x",
|
||||
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__,
|
||||
hdr->hdr_common.hdr_type, hdr->hdr_common.hdr_flags, hdr->hdr_common.hdr_csum,
|
||||
hdr->hdr_ctx, hdr->hdr_seq, hdr->hdr_src, hdr->hdr_tag, hdr->hdr_csum));
|
||||
|
||||
if (csum_received != csum) {
|
||||
opal_output(0, "%s:%s:%d: Invalid \'match header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
||||
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
||||
dump_csum_error_data(segments, 1);
|
||||
ompi_rte_abort(-1,NULL);
|
||||
}
|
||||
|
||||
/* communicator pointer */
|
||||
comm_ptr = ompi_comm_lookup(hdr->hdr_ctx);
|
||||
if(OPAL_UNLIKELY(NULL == comm_ptr)) {
|
||||
/* This is a special case. A message for a not yet existing
|
||||
* communicator can happens. Instead of doing a matching we
|
||||
* will temporarily add it the a pending queue in the PML.
|
||||
* Later on, when the communicator is completely instantiated,
|
||||
* this pending queue will be searched and all matching fragments
|
||||
* moved to the right communicator.
|
||||
*/
|
||||
append_frag_to_list( &mca_pml_csum.non_existing_communicator_pending,
|
||||
btl, hdr, segments, num_segments, NULL );
|
||||
return;
|
||||
}
|
||||
comm = (mca_pml_csum_comm_t *)comm_ptr->c_pml_comm;
|
||||
|
||||
/* source sequence number */
|
||||
proc = &comm->procs[hdr->hdr_src];
|
||||
|
||||
/* We generate the MSG_ARRIVED event as soon as the PML is aware
|
||||
* of a matching fragment arrival. Independing if it is received
|
||||
* on the correct order or not. This will allow the tools to
|
||||
* figure out if the messages are not received in the correct
|
||||
* order (if multiple network interfaces).
|
||||
*/
|
||||
PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm_ptr,
|
||||
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
|
||||
|
||||
/* get next expected message sequence number - if threaded
|
||||
* run, lock to make sure that if another thread is processing
|
||||
* a frag from the same message a match is made only once.
|
||||
* Also, this prevents other posted receives (for a pair of
|
||||
* end points) from being processed, and potentially "loosing"
|
||||
* the fragment.
|
||||
*/
|
||||
OPAL_THREAD_LOCK(&comm->matching_lock);
|
||||
|
||||
/* get sequence number of next message that can be processed */
|
||||
if(OPAL_UNLIKELY((((uint16_t) hdr->hdr_seq) != ((uint16_t) proc->expected_sequence)) ||
|
||||
(opal_list_get_size(&proc->frags_cant_match) > 0 ))) {
|
||||
goto slow_path;
|
||||
}
|
||||
|
||||
/* This is the sequence number we were expecting, so we can try
|
||||
* matching it to already posted receives.
|
||||
*/
|
||||
|
||||
/* We're now expecting the next sequence number. */
|
||||
proc->expected_sequence++;
|
||||
|
||||
/* We generate the SEARCH_POSTED_QUEUE only when the message is
|
||||
* received in the correct sequence. Otherwise, we delay the event
|
||||
* generation until we reach the correct sequence number.
|
||||
*/
|
||||
PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_BEGIN, comm_ptr,
|
||||
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
|
||||
|
||||
match = match_one(btl, hdr, segments, num_segments, comm_ptr, proc, NULL);
|
||||
|
||||
/* The match is over. We generate the SEARCH_POSTED_Q_END here,
|
||||
* before going into the mca_pml_csum_check_cantmatch_for_match so
|
||||
* we can make a difference for the searching time for all
|
||||
* messages.
|
||||
*/
|
||||
PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr,
|
||||
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
|
||||
|
||||
/* release matching lock before processing fragment */
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
|
||||
if(OPAL_LIKELY(match)) {
|
||||
bytes_received = segments->seg_len - OMPI_PML_CSUM_MATCH_HDR_LEN;
|
||||
match->req_recv.req_bytes_packed = bytes_received;
|
||||
|
||||
MCA_PML_CSUM_RECV_REQUEST_MATCHED(match, hdr);
|
||||
if(match->req_bytes_expected > 0) {
|
||||
struct iovec iov[MCA_BTL_DES_MAX_SEGMENTS];
|
||||
uint32_t iov_count = 1;
|
||||
|
||||
/*
|
||||
* Make user buffer accessable(defined) before unpacking.
|
||||
*/
|
||||
MEMCHECKER(
|
||||
memchecker_call(&opal_memchecker_base_mem_defined,
|
||||
match->req_recv.req_base.req_addr,
|
||||
match->req_recv.req_base.req_count,
|
||||
match->req_recv.req_base.req_datatype);
|
||||
);
|
||||
|
||||
iov[0].iov_len = bytes_received;
|
||||
iov[0].iov_base = (IOVBASE_TYPE*)((unsigned char*)segments->seg_addr.pval +
|
||||
OMPI_PML_CSUM_MATCH_HDR_LEN);
|
||||
while (iov_count < num_segments) {
|
||||
bytes_received += segments[iov_count].seg_len;
|
||||
iov[iov_count].iov_len = segments[iov_count].seg_len;
|
||||
iov[iov_count].iov_base = (IOVBASE_TYPE*)((unsigned char*)segments[iov_count].seg_addr.pval);
|
||||
iov_count++;
|
||||
}
|
||||
opal_convertor_unpack( &match->req_recv.req_base.req_convertor,
|
||||
iov,
|
||||
&iov_count,
|
||||
&bytes_received );
|
||||
match->req_bytes_received = bytes_received;
|
||||
/*
|
||||
* Unpacking finished, make the user buffer unaccessable again.
|
||||
*/
|
||||
MEMCHECKER(
|
||||
memchecker_call(&opal_memchecker_base_mem_noaccess,
|
||||
match->req_recv.req_base.req_addr,
|
||||
match->req_recv.req_base.req_count,
|
||||
match->req_recv.req_base.req_datatype);
|
||||
);
|
||||
}
|
||||
if (bytes_received > 0) {
|
||||
csum_data = match->req_recv.req_base.req_convertor.checksum;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
|
||||
"%s Received \'match\' with data csum:0x%x, header csum:0x%04x, size:%lu\n",
|
||||
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), hdr->hdr_csum, csum_received, (unsigned long)bytes_received));
|
||||
|
||||
if (csum_data != hdr->hdr_csum) {
|
||||
opal_output(0, "%s:%s:%d: Invalid \'match data\' - received csum:0x%x != computed csum:0x%x\n",
|
||||
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_csum, csum_data);
|
||||
dump_csum_error_data(segments, num_segments);
|
||||
ompi_rte_abort(-1,NULL);
|
||||
}
|
||||
}
|
||||
|
||||
/* no need to check if complete we know we are.. */
|
||||
/* don't need a rmb as that is for checking */
|
||||
recv_request_pml_complete(match);
|
||||
}
|
||||
return;
|
||||
|
||||
slow_path:
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
mca_pml_csum_recv_frag_match(btl, hdr, segments,
|
||||
num_segments, MCA_PML_CSUM_HDR_TYPE_MATCH);
|
||||
}
|
||||
|
||||
|
||||
void mca_pml_csum_recv_frag_callback_rndv(mca_btl_base_module_t* btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
void* cbdata )
|
||||
{
|
||||
mca_btl_base_segment_t* segments = des->des_dst;
|
||||
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
|
||||
uint16_t csum_received, csum;
|
||||
|
||||
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
|
||||
return;
|
||||
}
|
||||
csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_RNDV);
|
||||
|
||||
csum_received = hdr->hdr_common.hdr_csum;
|
||||
hdr->hdr_common.hdr_csum = 0;
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
hdr->hdr_common.hdr_flags &= ~MCA_PML_CSUM_HDR_FLAGS_NBO;
|
||||
#endif
|
||||
csum = opal_csum16(hdr, sizeof(mca_pml_csum_rendezvous_hdr_t));
|
||||
hdr->hdr_common.hdr_csum = csum_received;
|
||||
if (csum_received != csum) {
|
||||
opal_output(0, "%s:%s:%d: Invalid \'rndv header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
||||
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
||||
dump_csum_error_data(segments, 1);
|
||||
ompi_rte_abort(-1,NULL);
|
||||
}
|
||||
|
||||
mca_pml_csum_recv_frag_match(btl, &hdr->hdr_match, segments,
|
||||
des->des_dst_cnt, MCA_PML_CSUM_HDR_TYPE_RNDV);
|
||||
return;
|
||||
}
|
||||
|
||||
void mca_pml_csum_recv_frag_callback_rget(mca_btl_base_module_t* btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
void* cbdata )
|
||||
{
|
||||
mca_btl_base_segment_t* segments = des->des_dst;
|
||||
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
|
||||
|
||||
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
|
||||
return;
|
||||
}
|
||||
csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_RGET);
|
||||
mca_pml_csum_recv_frag_match(btl, &hdr->hdr_match, segments,
|
||||
des->des_dst_cnt, MCA_PML_CSUM_HDR_TYPE_RGET);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void mca_pml_csum_recv_frag_callback_ack(mca_btl_base_module_t* btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
void* cbdata )
|
||||
{
|
||||
mca_btl_base_segment_t* segments = des->des_dst;
|
||||
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
|
||||
mca_pml_csum_send_request_t* sendreq;
|
||||
uint16_t csum_received, csum;
|
||||
|
||||
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
|
||||
return;
|
||||
}
|
||||
|
||||
csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_ACK);
|
||||
|
||||
csum_received = hdr->hdr_common.hdr_csum;
|
||||
hdr->hdr_common.hdr_csum = 0;
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
hdr->hdr_common.hdr_flags &= ~MCA_PML_CSUM_HDR_FLAGS_NBO;
|
||||
#endif
|
||||
csum = opal_csum16(hdr, sizeof(mca_pml_csum_ack_hdr_t));
|
||||
hdr->hdr_common.hdr_csum = csum_received;
|
||||
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
|
||||
"%s Received \'ACK\' with header csum:0x%04x\n", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), csum));
|
||||
if (csum_received != csum) {
|
||||
opal_output(0, "%s:%s:%d: Invalid \'ACK header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
||||
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
||||
dump_csum_error_data(segments, 1);
|
||||
ompi_rte_abort(-1,NULL);
|
||||
}
|
||||
|
||||
sendreq = (mca_pml_csum_send_request_t*)hdr->hdr_ack.hdr_src_req.pval;
|
||||
sendreq->req_recv = hdr->hdr_ack.hdr_dst_req;
|
||||
|
||||
/* if the request should be delivered entirely by copy in/out
|
||||
* then throttle sends */
|
||||
if(hdr->hdr_common.hdr_flags & MCA_PML_CSUM_HDR_FLAGS_NORDMA)
|
||||
sendreq->req_throttle_sends = true;
|
||||
|
||||
mca_pml_csum_send_request_copy_in_out(sendreq,
|
||||
hdr->hdr_ack.hdr_send_offset,
|
||||
sendreq->req_send.req_bytes_packed -
|
||||
hdr->hdr_ack.hdr_send_offset);
|
||||
|
||||
if (sendreq->req_state != 0) {
|
||||
/* Typical receipt of an ACK message causes req_state to be
|
||||
* decremented. However, a send request that started as an
|
||||
* RGET request can become a RNDV. For example, when the
|
||||
* receiver determines that its receive buffer is not
|
||||
* contiguous and therefore cannot support the RGET
|
||||
* protocol. A send request that started with the RGET
|
||||
* protocol has req_state == 0 and as such should not be
|
||||
* decremented.
|
||||
*/
|
||||
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
|
||||
}
|
||||
|
||||
if(send_request_pml_complete_check(sendreq) == false)
|
||||
mca_pml_csum_send_request_schedule(sendreq);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void mca_pml_csum_recv_frag_callback_frag(mca_btl_base_module_t* btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
void* cbdata ) {
|
||||
mca_btl_base_segment_t* segments = des->des_dst;
|
||||
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
|
||||
mca_pml_csum_recv_request_t* recvreq;
|
||||
uint16_t csum_received, csum;
|
||||
|
||||
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
|
||||
return;
|
||||
}
|
||||
csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_FRAG);
|
||||
|
||||
csum_received = hdr->hdr_common.hdr_csum;
|
||||
hdr->hdr_common.hdr_csum = 0;
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
hdr->hdr_common.hdr_flags &= ~MCA_PML_CSUM_HDR_FLAGS_NBO;
|
||||
#endif
|
||||
csum = opal_csum16(hdr, sizeof(mca_pml_csum_frag_hdr_t));
|
||||
hdr->hdr_common.hdr_csum = csum_received;
|
||||
if(csum_received != csum) {
|
||||
opal_output(0, "%s:%s:%d: Invalid \'frag header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
||||
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
||||
dump_csum_error_data(segments, 1);
|
||||
ompi_rte_abort(-1,NULL);
|
||||
}
|
||||
|
||||
recvreq = (mca_pml_csum_recv_request_t*)hdr->hdr_frag.hdr_dst_req.pval;
|
||||
mca_pml_csum_recv_request_progress_frag(recvreq,btl,segments,des->des_dst_cnt);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void mca_pml_csum_recv_frag_callback_put(mca_btl_base_module_t* btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
void* cbdata ) {
|
||||
mca_btl_base_segment_t* segments = des->des_dst;
|
||||
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
|
||||
mca_pml_csum_send_request_t* sendreq;
|
||||
uint16_t csum_received, csum;
|
||||
|
||||
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
|
||||
return;
|
||||
}
|
||||
|
||||
csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_PUT);
|
||||
|
||||
csum_received = hdr->hdr_common.hdr_csum;
|
||||
hdr->hdr_common.hdr_csum = 0;
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
hdr->hdr_common.hdr_flags &= ~MCA_PML_CSUM_HDR_FLAGS_NBO;
|
||||
#endif
|
||||
csum = opal_csum16(hdr, sizeof(mca_pml_csum_rdma_hdr_t));
|
||||
hdr->hdr_common.hdr_csum = csum_received;
|
||||
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
|
||||
"%s Received \'PUT\' with header csum:0x%04x\n", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), csum));
|
||||
if(csum_received != csum) {
|
||||
opal_output(0, "%s:%s:%d: Invalid \'PUT header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
||||
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
||||
dump_csum_error_data(segments, 1);
|
||||
ompi_rte_abort(-1,NULL);
|
||||
}
|
||||
|
||||
sendreq = (mca_pml_csum_send_request_t*)hdr->hdr_rdma.hdr_req.pval;
|
||||
mca_pml_csum_send_request_put(sendreq,btl,&hdr->hdr_rdma);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void mca_pml_csum_recv_frag_callback_fin(mca_btl_base_module_t* btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
void* cbdata ) {
|
||||
mca_btl_base_segment_t* segments = des->des_dst;
|
||||
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
|
||||
mca_btl_base_descriptor_t* rdma;
|
||||
uint16_t csum_received, csum;
|
||||
|
||||
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
|
||||
return;
|
||||
}
|
||||
|
||||
csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_FIN);
|
||||
|
||||
csum_received = hdr->hdr_common.hdr_csum;
|
||||
hdr->hdr_common.hdr_csum = 0;
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
hdr->hdr_common.hdr_flags &= ~MCA_PML_CSUM_HDR_FLAGS_NBO;
|
||||
#endif
|
||||
csum = opal_csum16(hdr, sizeof(mca_pml_csum_fin_hdr_t));
|
||||
hdr->hdr_common.hdr_csum = csum_received;
|
||||
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
|
||||
"%s Received \'FIN\' with header csum:0x%04x\n",OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),csum));
|
||||
if(csum_received != csum) {
|
||||
opal_output(0, "%s:%s:%d: Invalid \'FIN header\' - received csum:0x%04x != computed csum:0x%04x\n",
|
||||
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
|
||||
dump_csum_error_data(segments, 1);
|
||||
ompi_rte_abort(-1,NULL);
|
||||
}
|
||||
|
||||
rdma = (mca_btl_base_descriptor_t*)hdr->hdr_fin.hdr_des.pval;
|
||||
rdma->des_cbfunc(btl, NULL, rdma,
|
||||
hdr->hdr_fin.hdr_fail ? OMPI_ERROR : OMPI_SUCCESS);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define PML_MAX_SEQ ~((mca_pml_sequence_t)0);
|
||||
|
||||
static inline mca_pml_csum_recv_request_t* get_posted_recv(opal_list_t *queue)
|
||||
{
|
||||
if(opal_list_get_size(queue) == 0)
|
||||
return NULL;
|
||||
|
||||
return (mca_pml_csum_recv_request_t*)opal_list_get_first(queue);
|
||||
}
|
||||
|
||||
static inline mca_pml_csum_recv_request_t* get_next_posted_recv(
|
||||
opal_list_t *queue,
|
||||
mca_pml_csum_recv_request_t* req)
|
||||
{
|
||||
opal_list_item_t *i = opal_list_get_next((opal_list_item_t*)req);
|
||||
|
||||
if(opal_list_get_end(queue) == i)
|
||||
return NULL;
|
||||
|
||||
return (mca_pml_csum_recv_request_t*)i;
|
||||
}
|
||||
|
||||
static mca_pml_csum_recv_request_t *match_incomming(
|
||||
mca_pml_csum_match_hdr_t *hdr, mca_pml_csum_comm_t *comm,
|
||||
mca_pml_csum_comm_proc_t *proc)
|
||||
{
|
||||
mca_pml_csum_recv_request_t *specific_recv, *wild_recv;
|
||||
mca_pml_sequence_t wild_recv_seq, specific_recv_seq;
|
||||
int tag = hdr->hdr_tag;
|
||||
|
||||
specific_recv = get_posted_recv(&proc->specific_receives);
|
||||
wild_recv = get_posted_recv(&comm->wild_receives);
|
||||
|
||||
wild_recv_seq = wild_recv ?
|
||||
wild_recv->req_recv.req_base.req_sequence : PML_MAX_SEQ;
|
||||
specific_recv_seq = specific_recv ?
|
||||
specific_recv->req_recv.req_base.req_sequence : PML_MAX_SEQ;
|
||||
|
||||
/* they are equal only if both are PML_MAX_SEQ */
|
||||
while(wild_recv_seq != specific_recv_seq) {
|
||||
mca_pml_csum_recv_request_t **match;
|
||||
opal_list_t *queue;
|
||||
int req_tag;
|
||||
mca_pml_sequence_t *seq;
|
||||
|
||||
if (OPAL_UNLIKELY(wild_recv_seq < specific_recv_seq)) {
|
||||
match = &wild_recv;
|
||||
queue = &comm->wild_receives;
|
||||
seq = &wild_recv_seq;
|
||||
} else {
|
||||
match = &specific_recv;
|
||||
queue = &proc->specific_receives;
|
||||
seq = &specific_recv_seq;
|
||||
}
|
||||
|
||||
req_tag = (*match)->req_recv.req_base.req_tag;
|
||||
if(req_tag == tag || (req_tag == OMPI_ANY_TAG && tag >= 0)) {
|
||||
opal_list_remove_item(queue, (opal_list_item_t*)(*match));
|
||||
PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q,
|
||||
&((*match)->req_recv.req_base), PERUSE_RECV);
|
||||
return *match;
|
||||
}
|
||||
|
||||
*match = get_next_posted_recv(queue, *match);
|
||||
*seq = (*match) ? (*match)->req_recv.req_base.req_sequence : PML_MAX_SEQ;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static mca_pml_csum_recv_request_t*
|
||||
match_one(mca_btl_base_module_t *btl,
|
||||
mca_pml_csum_match_hdr_t *hdr, mca_btl_base_segment_t* segments,
|
||||
size_t num_segments, ompi_communicator_t *comm_ptr,
|
||||
mca_pml_csum_comm_proc_t *proc,
|
||||
mca_pml_csum_recv_frag_t* frag)
|
||||
{
|
||||
mca_pml_csum_recv_request_t *match;
|
||||
mca_pml_csum_comm_t *comm = (mca_pml_csum_comm_t *)comm_ptr->c_pml_comm;
|
||||
|
||||
do {
|
||||
match = match_incomming(hdr, comm, proc);
|
||||
|
||||
/* if match found, process data */
|
||||
if(OPAL_LIKELY(NULL != match)) {
|
||||
match->req_recv.req_base.req_proc = proc->ompi_proc;
|
||||
|
||||
if(OPAL_UNLIKELY(MCA_PML_REQUEST_PROBE == match->req_recv.req_base.req_type)) {
|
||||
/* complete the probe */
|
||||
mca_pml_csum_recv_request_matched_probe(match, btl, segments,
|
||||
num_segments);
|
||||
/* attempt to match actual request */
|
||||
continue;
|
||||
}
|
||||
|
||||
PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_MSG_MATCH_POSTED_REQ,
|
||||
&(match->req_recv.req_base), PERUSE_RECV);
|
||||
return match;
|
||||
}
|
||||
|
||||
/* if no match found, place on unexpected queue */
|
||||
append_frag_to_list(&proc->unexpected_frags, btl, hdr, segments,
|
||||
num_segments, frag);
|
||||
PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm_ptr,
|
||||
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
|
||||
return NULL;
|
||||
} while(true);
|
||||
}
|
||||
|
||||
static mca_pml_csum_recv_frag_t* check_cantmatch_for_match(mca_pml_csum_comm_proc_t *proc)
|
||||
{
|
||||
mca_pml_csum_recv_frag_t *frag;
|
||||
|
||||
/* search the list for a fragment from the send with sequence
|
||||
* number next_msg_seq_expected
|
||||
*/
|
||||
for(frag = (mca_pml_csum_recv_frag_t*)opal_list_get_first(&proc->frags_cant_match);
|
||||
frag != (mca_pml_csum_recv_frag_t*)opal_list_get_end(&proc->frags_cant_match);
|
||||
frag = (mca_pml_csum_recv_frag_t*)opal_list_get_next(frag))
|
||||
{
|
||||
mca_pml_csum_match_hdr_t* hdr = &frag->hdr.hdr_match;
|
||||
/*
|
||||
* If the message has the next expected seq from that proc...
|
||||
*/
|
||||
if(hdr->hdr_seq != proc->expected_sequence)
|
||||
continue;
|
||||
|
||||
opal_list_remove_item(&proc->frags_cant_match, (opal_list_item_t*)frag);
|
||||
return frag;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* RCS/CTS receive side matching
|
||||
*
|
||||
* @param hdr list of parameters needed for matching
|
||||
* This list is also embeded in frag,
|
||||
* but this allows to save a memory copy when
|
||||
* a match is made in this routine. (IN)
|
||||
* @param frag pointer to receive fragment which we want
|
||||
* to match (IN/OUT). If a match is not made,
|
||||
* hdr is copied to frag.
|
||||
* @param match_made parameter indicating if we matched frag/
|
||||
* hdr (OUT)
|
||||
* @param additional_matches if a match is made with frag, we
|
||||
* may be able to match fragments that previously
|
||||
* have arrived out-of-order. If this is the
|
||||
* case, the associated fragment descriptors are
|
||||
* put on this list for further processing. (OUT)
|
||||
*
|
||||
* @return OMPI error code
|
||||
*
|
||||
* This routine is used to try and match a newly arrived message fragment
|
||||
* to pre-posted receives. The following assumptions are made
|
||||
* - fragments are received out of order
|
||||
* - for long messages, e.g. more than one fragment, a RTS/CTS algorithm
|
||||
* is used.
|
||||
* - 2nd and greater fragments include a receive descriptor pointer
|
||||
* - fragments may be dropped
|
||||
* - fragments may be corrupt
|
||||
* - this routine may be called simultaneously by more than one thread
|
||||
*/
|
||||
static int mca_pml_csum_recv_frag_match( mca_btl_base_module_t *btl,
|
||||
mca_pml_csum_match_hdr_t *hdr,
|
||||
mca_btl_base_segment_t* segments,
|
||||
size_t num_segments,
|
||||
int type)
|
||||
{
|
||||
/* local variables */
|
||||
uint16_t next_msg_seq_expected, frag_msg_seq;
|
||||
ompi_communicator_t *comm_ptr;
|
||||
mca_pml_csum_recv_request_t *match = NULL;
|
||||
mca_pml_csum_comm_t *comm;
|
||||
mca_pml_csum_comm_proc_t *proc;
|
||||
mca_pml_csum_recv_frag_t* frag = NULL;
|
||||
|
||||
/* communicator pointer */
|
||||
comm_ptr = ompi_comm_lookup(hdr->hdr_ctx);
|
||||
if(OPAL_UNLIKELY(NULL == comm_ptr)) {
|
||||
/* This is a special case. A message for a not yet existing
|
||||
* communicator can happens. Instead of doing a matching we
|
||||
* will temporarily add it the a pending queue in the PML.
|
||||
* Later on, when the communicator is completely instantiated,
|
||||
* this pending queue will be searched and all matching fragments
|
||||
* moved to the right communicator.
|
||||
*/
|
||||
append_frag_to_list( &mca_pml_csum.non_existing_communicator_pending,
|
||||
btl, hdr, segments, num_segments, NULL );
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
comm = (mca_pml_csum_comm_t *)comm_ptr->c_pml_comm;
|
||||
|
||||
/* source sequence number */
|
||||
frag_msg_seq = hdr->hdr_seq;
|
||||
proc = &comm->procs[hdr->hdr_src];
|
||||
|
||||
/**
|
||||
* We generate the MSG_ARRIVED event as soon as the PML is aware of a matching
|
||||
* fragment arrival. Independing if it is received on the correct order or not.
|
||||
* This will allow the tools to figure out if the messages are not received in the
|
||||
* correct order (if multiple network interfaces).
|
||||
*/
|
||||
PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm_ptr,
|
||||
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
|
||||
|
||||
/* get next expected message sequence number - if threaded
|
||||
* run, lock to make sure that if another thread is processing
|
||||
* a frag from the same message a match is made only once.
|
||||
* Also, this prevents other posted receives (for a pair of
|
||||
* end points) from being processed, and potentially "loosing"
|
||||
* the fragment.
|
||||
*/
|
||||
OPAL_THREAD_LOCK(&comm->matching_lock);
|
||||
|
||||
/* get sequence number of next message that can be processed */
|
||||
next_msg_seq_expected = (uint16_t)proc->expected_sequence;
|
||||
if(OPAL_UNLIKELY(frag_msg_seq != next_msg_seq_expected))
|
||||
goto wrong_seq;
|
||||
|
||||
/*
|
||||
* This is the sequence number we were expecting,
|
||||
* so we can try matching it to already posted
|
||||
* receives.
|
||||
*/
|
||||
|
||||
out_of_order_match:
|
||||
/* We're now expecting the next sequence number. */
|
||||
proc->expected_sequence++;
|
||||
|
||||
/**
|
||||
* We generate the SEARCH_POSTED_QUEUE only when the message is received
|
||||
* in the correct sequence. Otherwise, we delay the event generation until
|
||||
* we reach the correct sequence number.
|
||||
*/
|
||||
PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_BEGIN, comm_ptr,
|
||||
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
|
||||
|
||||
match = match_one(btl, hdr, segments, num_segments, comm_ptr, proc, frag);
|
||||
|
||||
/**
|
||||
* The match is over. We generate the SEARCH_POSTED_Q_END here, before going
|
||||
* into the mca_pml_csum_check_cantmatch_for_match so we can make a difference
|
||||
* for the searching time for all messages.
|
||||
*/
|
||||
PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr,
|
||||
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
|
||||
|
||||
/* release matching lock before processing fragment */
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
|
||||
if(OPAL_LIKELY(match)) {
|
||||
switch(type) {
|
||||
case MCA_PML_CSUM_HDR_TYPE_MATCH:
|
||||
mca_pml_csum_recv_request_progress_match(match, btl, segments, num_segments);
|
||||
break;
|
||||
case MCA_PML_CSUM_HDR_TYPE_RNDV:
|
||||
mca_pml_csum_recv_request_progress_rndv(match, btl, segments, num_segments);
|
||||
break;
|
||||
case MCA_PML_CSUM_HDR_TYPE_RGET:
|
||||
mca_pml_csum_recv_request_progress_rget(match, btl, segments, num_segments);
|
||||
break;
|
||||
}
|
||||
|
||||
if(OPAL_UNLIKELY(frag))
|
||||
MCA_PML_CSUM_RECV_FRAG_RETURN(frag);
|
||||
}
|
||||
|
||||
/*
|
||||
* Now that new message has arrived, check to see if
|
||||
* any fragments on the c_c_frags_cant_match list
|
||||
* may now be used to form new matchs
|
||||
*/
|
||||
if(OPAL_UNLIKELY(opal_list_get_size(&proc->frags_cant_match) > 0)) {
|
||||
OPAL_THREAD_LOCK(&comm->matching_lock);
|
||||
if((frag = check_cantmatch_for_match(proc))) {
|
||||
hdr = &frag->hdr.hdr_match;
|
||||
segments = frag->segments;
|
||||
num_segments = frag->num_segments;
|
||||
btl = frag->btl;
|
||||
type = hdr->hdr_common.hdr_type;
|
||||
goto out_of_order_match;
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
wrong_seq:
|
||||
/*
|
||||
* This message comes after the next expected, so it
|
||||
* is ahead of sequence. Save it for later.
|
||||
*/
|
||||
append_frag_to_list(&proc->frags_cant_match, btl, hdr, segments,
|
||||
num_segments, NULL);
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
@ -1,175 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
|
||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2009 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*/
|
||||
|
||||
#ifndef MCA_PML_CSUM_RECVFRAG_H
|
||||
#define MCA_PML_CSUM_RECVFRAG_H
|
||||
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "pml_csum_hdr.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
struct mca_pml_csum_buffer_t {
|
||||
size_t len;
|
||||
void * addr;
|
||||
};
|
||||
typedef struct mca_pml_csum_buffer_t mca_pml_csum_buffer_t;
|
||||
|
||||
|
||||
struct mca_pml_csum_recv_frag_t {
|
||||
ompi_free_list_item_t super;
|
||||
mca_pml_csum_hdr_t hdr;
|
||||
size_t num_segments;
|
||||
mca_btl_base_module_t* btl;
|
||||
mca_btl_base_segment_t segments[MCA_BTL_DES_MAX_SEGMENTS];
|
||||
mca_pml_csum_buffer_t buffers[MCA_BTL_DES_MAX_SEGMENTS];
|
||||
unsigned char addr[1];
|
||||
};
|
||||
typedef struct mca_pml_csum_recv_frag_t mca_pml_csum_recv_frag_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_pml_csum_recv_frag_t);
|
||||
|
||||
|
||||
#define MCA_PML_CSUM_RECV_FRAG_ALLOC(frag,rc) \
|
||||
do { \
|
||||
ompi_free_list_item_t* item; \
|
||||
OMPI_FREE_LIST_WAIT(&mca_pml_csum.recv_frags, item, rc); \
|
||||
frag = (mca_pml_csum_recv_frag_t*)item; \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define MCA_PML_CSUM_RECV_FRAG_INIT(frag, hdr, segs, cnt, btl ) \
|
||||
do { \
|
||||
size_t i, _size; \
|
||||
mca_btl_base_segment_t* macro_segments = frag->segments; \
|
||||
mca_pml_csum_buffer_t* buffers = frag->buffers; \
|
||||
unsigned char* _ptr = (unsigned char*)frag->addr; \
|
||||
/* init recv_frag */ \
|
||||
frag->btl = btl; \
|
||||
frag->hdr = *(mca_pml_csum_hdr_t*)hdr; \
|
||||
frag->num_segments = 1; \
|
||||
_size = segs[0].seg_len; \
|
||||
for( i = 1; i < cnt; i++ ) { \
|
||||
_size += segs[i].seg_len; \
|
||||
} \
|
||||
/* copy over data */ \
|
||||
if(_size <= mca_pml_csum.unexpected_limit ) { \
|
||||
macro_segments[0].seg_addr.pval = frag->addr; \
|
||||
} else { \
|
||||
buffers[0].len = _size; \
|
||||
buffers[0].addr = (char*) \
|
||||
mca_pml_csum.allocator->alc_alloc( mca_pml_csum.allocator, \
|
||||
buffers[0].len, \
|
||||
0, NULL); \
|
||||
_ptr = (unsigned char*)(buffers[0].addr); \
|
||||
macro_segments[0].seg_addr.pval = buffers[0].addr; \
|
||||
} \
|
||||
macro_segments[0].seg_len = _size; \
|
||||
for( i = 0; i < cnt; i++ ) { \
|
||||
memcpy( _ptr, segs[i].seg_addr.pval, segs[i].seg_len); \
|
||||
_ptr += segs[i].seg_len; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define MCA_PML_CSUM_RECV_FRAG_RETURN(frag) \
|
||||
do { \
|
||||
if( frag->segments[0].seg_len > mca_pml_csum.unexpected_limit ) { \
|
||||
/* return buffers */ \
|
||||
mca_pml_csum.allocator->alc_free( mca_pml_csum.allocator, \
|
||||
frag->buffers[0].addr ); \
|
||||
} \
|
||||
frag->num_segments = 0; \
|
||||
\
|
||||
/* return recv_frag */ \
|
||||
OMPI_FREE_LIST_RETURN(&mca_pml_csum.recv_frags, \
|
||||
(ompi_free_list_item_t*)frag); \
|
||||
} while(0)
|
||||
|
||||
|
||||
/**
|
||||
* Callback from BTL on receipt of a recv_frag (match).
|
||||
*/
|
||||
|
||||
extern void mca_pml_csum_recv_frag_callback_match( mca_btl_base_module_t *btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t* descriptor,
|
||||
void* cbdata );
|
||||
|
||||
/**
|
||||
* Callback from BTL on receipt of a recv_frag (rndv).
|
||||
*/
|
||||
|
||||
extern void mca_pml_csum_recv_frag_callback_rndv( mca_btl_base_module_t *btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t* descriptor,
|
||||
void* cbdata );
|
||||
/**
|
||||
* Callback from BTL on receipt of a recv_frag (rget).
|
||||
*/
|
||||
|
||||
extern void mca_pml_csum_recv_frag_callback_rget( mca_btl_base_module_t *btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t* descriptor,
|
||||
void* cbdata );
|
||||
|
||||
/**
|
||||
* Callback from BTL on receipt of a recv_frag (ack).
|
||||
*/
|
||||
|
||||
extern void mca_pml_csum_recv_frag_callback_ack( mca_btl_base_module_t *btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t* descriptor,
|
||||
void* cbdata );
|
||||
/**
|
||||
* Callback from BTL on receipt of a recv_frag (frag).
|
||||
*/
|
||||
|
||||
extern void mca_pml_csum_recv_frag_callback_frag( mca_btl_base_module_t *btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t* descriptor,
|
||||
void* cbdata );
|
||||
/**
|
||||
* Callback from BTL on receipt of a recv_frag (put).
|
||||
*/
|
||||
|
||||
extern void mca_pml_csum_recv_frag_callback_put( mca_btl_base_module_t *btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t* descriptor,
|
||||
void* cbdata );
|
||||
/**
|
||||
* Callback from BTL on receipt of a recv_frag (fin).
|
||||
*/
|
||||
|
||||
extern void mca_pml_csum_recv_frag_callback_fin( mca_btl_base_module_t *btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t* descriptor,
|
||||
void* cbdata );
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -1,425 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2010 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*/
|
||||
#ifndef OMPI_PML_CSUM_RECV_REQUEST_H
|
||||
#define OMPI_PML_CSUM_RECV_REQUEST_H
|
||||
|
||||
#include "pml_csum.h"
|
||||
#include "pml_csum_rdma.h"
|
||||
#include "pml_csum_rdmafrag.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "ompi/mca/pml/csum/pml_csum_comm.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/pml/base/pml_base_recvreq.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
struct mca_pml_csum_recv_request_t {
|
||||
mca_pml_base_recv_request_t req_recv;
|
||||
ompi_ptr_t remote_req_send;
|
||||
int32_t req_lock;
|
||||
size_t req_pipeline_depth;
|
||||
size_t req_bytes_received; /**< amount of data transferred into the user buffer */
|
||||
size_t req_bytes_expected; /**< local size of the data as suggested by the user */
|
||||
size_t req_rdma_offset;
|
||||
size_t req_send_offset;
|
||||
uint32_t req_rdma_cnt;
|
||||
uint32_t req_rdma_idx;
|
||||
bool req_pending;
|
||||
bool req_ack_sent; /**< whether ack was sent to the sender */
|
||||
bool req_match_received; /**< Prevent request to be completed prematurely */
|
||||
opal_mutex_t lock;
|
||||
mca_pml_csum_com_btl_t req_rdma[1];
|
||||
};
|
||||
typedef struct mca_pml_csum_recv_request_t mca_pml_csum_recv_request_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_pml_csum_recv_request_t);
|
||||
|
||||
static inline bool lock_recv_request(mca_pml_csum_recv_request_t *recvreq)
|
||||
{
|
||||
return OPAL_THREAD_ADD32(&recvreq->req_lock, 1) == 1;
|
||||
}
|
||||
|
||||
static inline bool unlock_recv_request(mca_pml_csum_recv_request_t *recvreq)
|
||||
{
|
||||
return OPAL_THREAD_ADD32(&recvreq->req_lock, -1) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate a recv request from the modules free list.
|
||||
*
|
||||
* @param rc (OUT) OMPI_SUCCESS or error status on failure.
|
||||
* @return Receive request.
|
||||
*/
|
||||
#define MCA_PML_CSUM_RECV_REQUEST_ALLOC(recvreq, rc) \
|
||||
do { \
|
||||
ompi_free_list_item_t* item; \
|
||||
rc = OMPI_SUCCESS; \
|
||||
OMPI_FREE_LIST_GET(&mca_pml_base_recv_requests, item, rc); \
|
||||
recvreq = (mca_pml_csum_recv_request_t*)item; \
|
||||
} while(0)
|
||||
|
||||
|
||||
/**
|
||||
* Initialize a receive request with call parameters.
|
||||
*
|
||||
* @param request (IN) Receive request.
|
||||
* @param addr (IN) User buffer.
|
||||
* @param count (IN) Number of elements of indicated datatype.
|
||||
* @param datatype (IN) User defined datatype.
|
||||
* @param src (IN) Source rank w/in the communicator.
|
||||
* @param tag (IN) User defined tag.
|
||||
* @param comm (IN) Communicator.
|
||||
* @param persistent (IN) Is this a ersistent request.
|
||||
*/
|
||||
#define MCA_PML_CSUM_RECV_REQUEST_INIT( request, \
|
||||
addr, \
|
||||
count, \
|
||||
datatype, \
|
||||
src, \
|
||||
tag, \
|
||||
comm, \
|
||||
persistent) \
|
||||
do { \
|
||||
MCA_PML_BASE_RECV_REQUEST_INIT( &(request)->req_recv, \
|
||||
addr, \
|
||||
count, \
|
||||
datatype, \
|
||||
src, \
|
||||
tag, \
|
||||
comm, \
|
||||
persistent); \
|
||||
} while(0)
|
||||
|
||||
/**
|
||||
* Mark the request as completed at MPI level for internal purposes.
|
||||
*
|
||||
* @param recvreq (IN) Receive request.
|
||||
*/
|
||||
#define MCA_PML_CSUM_RECV_REQUEST_MPI_COMPLETE( recvreq ) \
|
||||
do { \
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \
|
||||
&(recvreq->req_recv.req_base), PERUSE_RECV ); \
|
||||
ompi_request_complete( &(recvreq->req_recv.req_base.req_ompi), true ); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Free the PML receive request
|
||||
*/
|
||||
#define MCA_PML_CSUM_RECV_REQUEST_RETURN(recvreq) \
|
||||
{ \
|
||||
MCA_PML_BASE_RECV_REQUEST_FINI(&(recvreq)->req_recv); \
|
||||
OMPI_FREE_LIST_RETURN( &mca_pml_base_recv_requests, \
|
||||
(ompi_free_list_item_t*)(recvreq)); \
|
||||
}
|
||||
|
||||
/**
|
||||
* Complete receive request. Request structure cannot be accessed after calling
|
||||
* this function any more.
|
||||
*
|
||||
* @param recvreq (IN) Receive request.
|
||||
*/
|
||||
static inline void
|
||||
recv_request_pml_complete(mca_pml_csum_recv_request_t *recvreq)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
assert(false == recvreq->req_recv.req_base.req_pml_complete);
|
||||
|
||||
if(recvreq->req_recv.req_bytes_packed > 0) {
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END,
|
||||
&recvreq->req_recv.req_base, PERUSE_RECV );
|
||||
}
|
||||
|
||||
for(i = 0; i < recvreq->req_rdma_cnt; i++) {
|
||||
mca_mpool_base_registration_t* btl_reg = recvreq->req_rdma[i].btl_reg;
|
||||
if( NULL != btl_reg && btl_reg->mpool != NULL) {
|
||||
btl_reg->mpool->mpool_deregister( btl_reg->mpool, btl_reg );
|
||||
}
|
||||
}
|
||||
recvreq->req_rdma_cnt = 0;
|
||||
|
||||
OPAL_THREAD_LOCK(&ompi_request_lock);
|
||||
if(true == recvreq->req_recv.req_base.req_free_called) {
|
||||
MCA_PML_CSUM_RECV_REQUEST_RETURN(recvreq);
|
||||
} else {
|
||||
/* initialize request status */
|
||||
recvreq->req_recv.req_base.req_pml_complete = true;
|
||||
recvreq->req_recv.req_base.req_ompi.req_status._ucount =
|
||||
recvreq->req_bytes_received;
|
||||
if (recvreq->req_recv.req_bytes_packed > recvreq->req_bytes_expected) {
|
||||
recvreq->req_recv.req_base.req_ompi.req_status._ucount =
|
||||
recvreq->req_recv.req_bytes_packed;
|
||||
recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR =
|
||||
MPI_ERR_TRUNCATE;
|
||||
}
|
||||
MCA_PML_CSUM_RECV_REQUEST_MPI_COMPLETE(recvreq);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
recv_request_pml_complete_check(mca_pml_csum_recv_request_t *recvreq)
|
||||
{
|
||||
#if OPAL_ENABLE_MULTI_THREADS
|
||||
opal_atomic_rmb();
|
||||
#endif
|
||||
if(recvreq->req_match_received &&
|
||||
recvreq->req_bytes_received >= recvreq->req_recv.req_bytes_packed &&
|
||||
lock_recv_request(recvreq)) {
|
||||
recv_request_pml_complete(recvreq);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
extern void mca_pml_csum_recv_req_start(mca_pml_csum_recv_request_t *req);
|
||||
#define MCA_PML_CSUM_RECV_REQUEST_START(r) mca_pml_csum_recv_req_start(r)
|
||||
|
||||
static inline void prepare_recv_req_converter(mca_pml_csum_recv_request_t *req)
|
||||
{
|
||||
if( req->req_recv.req_base.req_datatype->super.size | req->req_recv.req_base.req_count ) {
|
||||
opal_convertor_copy_and_prepare_for_recv(
|
||||
req->req_recv.req_base.req_proc->proc_convertor,
|
||||
&(req->req_recv.req_base.req_datatype->super),
|
||||
req->req_recv.req_base.req_count,
|
||||
req->req_recv.req_base.req_addr,
|
||||
0,
|
||||
&req->req_recv.req_base.req_convertor);
|
||||
opal_convertor_get_unpacked_size(&req->req_recv.req_base.req_convertor,
|
||||
&req->req_bytes_expected);
|
||||
}
|
||||
}
|
||||
|
||||
#define MCA_PML_CSUM_RECV_REQUEST_MATCHED(request, hdr) \
|
||||
recv_req_matched(request, hdr)
|
||||
|
||||
static inline void recv_req_matched(mca_pml_csum_recv_request_t *req,
|
||||
mca_pml_csum_match_hdr_t *hdr)
|
||||
{
|
||||
req->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = hdr->hdr_src;
|
||||
req->req_recv.req_base.req_ompi.req_status.MPI_TAG = hdr->hdr_tag;
|
||||
req->req_match_received = true;
|
||||
#if OPAL_ENABLE_MULTI_THREADS
|
||||
opal_atomic_wmb();
|
||||
#endif
|
||||
if(req->req_recv.req_bytes_packed > 0) {
|
||||
if(MPI_ANY_SOURCE == req->req_recv.req_base.req_peer) {
|
||||
/* non wildcard prepared during post recv */
|
||||
prepare_recv_req_converter(req);
|
||||
}
|
||||
PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_XFER_BEGIN,
|
||||
&req->req_recv.req_base, PERUSE_RECV);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
||||
#define MCA_PML_CSUM_RECV_REQUEST_UNPACK( request, \
|
||||
segments, \
|
||||
num_segments, \
|
||||
seg_offset, \
|
||||
data_offset, \
|
||||
bytes_received, \
|
||||
bytes_delivered) \
|
||||
do { \
|
||||
bytes_delivered = 0; \
|
||||
if(request->req_recv.req_bytes_packed > 0) { \
|
||||
struct iovec iov[MCA_BTL_DES_MAX_SEGMENTS]; \
|
||||
uint32_t iov_count = 0; \
|
||||
size_t max_data = bytes_received; \
|
||||
size_t n, offset = seg_offset; \
|
||||
mca_btl_base_segment_t* segment = segments; \
|
||||
\
|
||||
OPAL_THREAD_LOCK(&request->lock); \
|
||||
for( n = 0; n < num_segments; n++, segment++ ) { \
|
||||
if(offset >= segment->seg_len) { \
|
||||
offset -= segment->seg_len; \
|
||||
} else { \
|
||||
iov[iov_count].iov_len = segment->seg_len - offset; \
|
||||
iov[iov_count].iov_base = (IOVBASE_TYPE*) \
|
||||
((unsigned char*)segment->seg_addr.pval + offset); \
|
||||
iov_count++; \
|
||||
offset = 0; \
|
||||
} \
|
||||
} \
|
||||
PERUSE_TRACE_COMM_OMPI_EVENT (PERUSE_COMM_REQ_XFER_CONTINUE, \
|
||||
&(recvreq->req_recv.req_base), max_data, \
|
||||
PERUSE_RECV); \
|
||||
opal_convertor_set_position( &(request->req_recv.req_base.req_convertor), \
|
||||
&data_offset ); \
|
||||
opal_convertor_unpack( &(request)->req_recv.req_base.req_convertor, \
|
||||
iov, \
|
||||
&iov_count, \
|
||||
&max_data ); \
|
||||
bytes_delivered = max_data; \
|
||||
OPAL_THREAD_UNLOCK(&request->lock); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
||||
void mca_pml_csum_recv_request_progress_match(
|
||||
mca_pml_csum_recv_request_t* req,
|
||||
struct mca_btl_base_module_t* btl,
|
||||
mca_btl_base_segment_t* segments,
|
||||
size_t num_segments);
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
||||
void mca_pml_csum_recv_request_progress_frag(
|
||||
mca_pml_csum_recv_request_t* req,
|
||||
struct mca_btl_base_module_t* btl,
|
||||
mca_btl_base_segment_t* segments,
|
||||
size_t num_segments);
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
||||
void mca_pml_csum_recv_request_progress_rndv(
|
||||
mca_pml_csum_recv_request_t* req,
|
||||
struct mca_btl_base_module_t* btl,
|
||||
mca_btl_base_segment_t* segments,
|
||||
size_t num_segments);
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
||||
void mca_pml_csum_recv_request_progress_rget(
|
||||
mca_pml_csum_recv_request_t* req,
|
||||
struct mca_btl_base_module_t* btl,
|
||||
mca_btl_base_segment_t* segments,
|
||||
size_t num_segments);
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
||||
void mca_pml_csum_recv_request_matched_probe(
|
||||
mca_pml_csum_recv_request_t* req,
|
||||
struct mca_btl_base_module_t* btl,
|
||||
mca_btl_base_segment_t* segments,
|
||||
size_t num_segments);
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
||||
int mca_pml_csum_recv_request_schedule_once(
|
||||
mca_pml_csum_recv_request_t* req, mca_bml_base_btl_t* start_bml_btl);
|
||||
|
||||
static inline int mca_pml_csum_recv_request_schedule_exclusive(
|
||||
mca_pml_csum_recv_request_t* req,
|
||||
mca_bml_base_btl_t* start_bml_btl)
|
||||
{
|
||||
int rc;
|
||||
|
||||
do {
|
||||
rc = mca_pml_csum_recv_request_schedule_once(req, start_bml_btl);
|
||||
if(rc == OMPI_ERR_OUT_OF_RESOURCE)
|
||||
break;
|
||||
} while(!unlock_recv_request(req));
|
||||
|
||||
if(OMPI_SUCCESS == rc)
|
||||
recv_request_pml_complete_check(req);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline void mca_pml_csum_recv_request_schedule(
|
||||
mca_pml_csum_recv_request_t* req,
|
||||
mca_bml_base_btl_t* start_bml_btl)
|
||||
{
|
||||
if(!lock_recv_request(req))
|
||||
return;
|
||||
|
||||
(void)mca_pml_csum_recv_request_schedule_exclusive(req, start_bml_btl);
|
||||
}
|
||||
|
||||
#define MCA_PML_CSUM_ADD_ACK_TO_PENDING(P, S, D, O) \
|
||||
do { \
|
||||
mca_pml_csum_pckt_pending_t *_pckt; \
|
||||
int _rc; \
|
||||
\
|
||||
MCA_PML_CSUM_PCKT_PENDING_ALLOC(_pckt,_rc); \
|
||||
_pckt->hdr.hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_ACK; \
|
||||
_pckt->hdr.hdr_ack.hdr_src_req.lval = (S); \
|
||||
_pckt->hdr.hdr_ack.hdr_dst_req.pval = (D); \
|
||||
_pckt->hdr.hdr_ack.hdr_send_offset = (O); \
|
||||
_pckt->proc = (P); \
|
||||
_pckt->bml_btl = NULL; \
|
||||
OPAL_THREAD_LOCK(&mca_pml_csum.lock); \
|
||||
opal_list_append(&mca_pml_csum.pckt_pending, \
|
||||
(opal_list_item_t*)_pckt); \
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_csum.lock); \
|
||||
} while(0)
|
||||
|
||||
int mca_pml_csum_recv_request_ack_send_btl(ompi_proc_t* proc,
|
||||
mca_bml_base_btl_t* bml_btl, uint64_t hdr_src_req, void *hdr_dst_req,
|
||||
uint64_t hdr_rdma_offset, bool nordma);
|
||||
|
||||
static inline int mca_pml_csum_recv_request_ack_send(ompi_proc_t* proc,
|
||||
uint64_t hdr_src_req, void *hdr_dst_req, uint64_t hdr_send_offset,
|
||||
bool nordma)
|
||||
{
|
||||
size_t i;
|
||||
mca_bml_base_btl_t* bml_btl;
|
||||
mca_bml_base_endpoint_t* endpoint =
|
||||
(mca_bml_base_endpoint_t*)proc->proc_bml;
|
||||
|
||||
for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) {
|
||||
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
|
||||
if(mca_pml_csum_recv_request_ack_send_btl(proc, bml_btl, hdr_src_req,
|
||||
hdr_dst_req, hdr_send_offset, nordma) == OMPI_SUCCESS)
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
MCA_PML_CSUM_ADD_ACK_TO_PENDING(proc, hdr_src_req, hdr_dst_req,
|
||||
hdr_send_offset);
|
||||
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
int mca_pml_csum_recv_request_get_frag(mca_pml_csum_rdma_frag_t* frag);
|
||||
|
||||
/* This function tries to continue recvreq that stuck due to resource
|
||||
* unavailability. Recvreq is added to recv_pending list if scheduling of put
|
||||
* operation cannot be accomplished for some reason. */
|
||||
void mca_pml_csum_recv_request_process_pending(void);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -1,470 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2010 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2009-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef OMPI_PML_CSUM_SEND_REQUEST_H
|
||||
#define OMPI_PML_CSUM_SEND_REQUEST_H
|
||||
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "ompi/mca/pml/base/pml_base_sendreq.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "pml_csum_comm.h"
|
||||
#include "pml_csum_hdr.h"
|
||||
#include "pml_csum_rdma.h"
|
||||
#include "pml_csum_rdmafrag.h"
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "ompi/mca/bml/bml.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
typedef enum {
|
||||
MCA_PML_CSUM_SEND_PENDING_NONE,
|
||||
MCA_PML_CSUM_SEND_PENDING_SCHEDULE,
|
||||
MCA_PML_CSUM_SEND_PENDING_START
|
||||
} mca_pml_csum_send_pending_t;
|
||||
|
||||
struct mca_pml_csum_send_request_t {
|
||||
mca_pml_base_send_request_t req_send;
|
||||
mca_bml_base_endpoint_t* req_endpoint;
|
||||
ompi_ptr_t req_recv;
|
||||
int32_t req_state;
|
||||
int32_t req_lock;
|
||||
bool req_throttle_sends;
|
||||
size_t req_pipeline_depth;
|
||||
size_t req_bytes_delivered;
|
||||
uint32_t req_rdma_cnt;
|
||||
mca_pml_csum_send_pending_t req_pending;
|
||||
opal_mutex_t req_send_range_lock;
|
||||
opal_list_t req_send_ranges;
|
||||
mca_pml_csum_com_btl_t req_rdma[1];
|
||||
};
|
||||
typedef struct mca_pml_csum_send_request_t mca_pml_csum_send_request_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_pml_csum_send_request_t);
|
||||
|
||||
struct mca_pml_csum_send_range_t {
|
||||
ompi_free_list_item_t base;
|
||||
uint64_t range_send_offset;
|
||||
uint64_t range_send_length;
|
||||
int range_btl_idx;
|
||||
int range_btl_cnt;
|
||||
mca_pml_csum_com_btl_t range_btls[1];
|
||||
};
|
||||
typedef struct mca_pml_csum_send_range_t mca_pml_csum_send_range_t;
|
||||
OBJ_CLASS_DECLARATION(mca_pml_csum_send_range_t);
|
||||
|
||||
static inline bool lock_send_request(mca_pml_csum_send_request_t *sendreq)
|
||||
{
|
||||
return OPAL_THREAD_ADD32(&sendreq->req_lock, 1) == 1;
|
||||
}
|
||||
|
||||
static inline bool unlock_send_request(mca_pml_csum_send_request_t *sendreq)
|
||||
{
|
||||
return OPAL_THREAD_ADD32(&sendreq->req_lock, -1) == 0;
|
||||
}
|
||||
|
||||
static inline void
|
||||
add_request_to_send_pending(mca_pml_csum_send_request_t* sendreq,
|
||||
const mca_pml_csum_send_pending_t type,
|
||||
const bool append)
|
||||
{
|
||||
opal_list_item_t *item = (opal_list_item_t*)sendreq;
|
||||
|
||||
OPAL_THREAD_LOCK(&mca_pml_csum.lock);
|
||||
sendreq->req_pending = type;
|
||||
if(append)
|
||||
opal_list_append(&mca_pml_csum.send_pending, item);
|
||||
else
|
||||
opal_list_prepend(&mca_pml_csum.send_pending, item);
|
||||
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_csum.lock);
|
||||
}
|
||||
|
||||
static inline mca_pml_csum_send_request_t*
|
||||
get_request_from_send_pending(mca_pml_csum_send_pending_t *type)
|
||||
{
|
||||
mca_pml_csum_send_request_t *sendreq;
|
||||
|
||||
OPAL_THREAD_LOCK(&mca_pml_csum.lock);
|
||||
sendreq = (mca_pml_csum_send_request_t*)
|
||||
opal_list_remove_first(&mca_pml_csum.send_pending);
|
||||
if(sendreq) {
|
||||
*type = sendreq->req_pending;
|
||||
sendreq->req_pending = MCA_PML_CSUM_SEND_PENDING_NONE;
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_csum.lock);
|
||||
|
||||
return sendreq;
|
||||
}
|
||||
|
||||
#define MCA_PML_CSUM_SEND_REQUEST_ALLOC( comm, \
|
||||
dst, \
|
||||
sendreq, \
|
||||
rc) \
|
||||
{ \
|
||||
ompi_proc_t *proc = ompi_comm_peer_lookup( comm, dst ); \
|
||||
ompi_free_list_item_t* item; \
|
||||
\
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE; \
|
||||
if( OPAL_LIKELY(NULL != proc) ) { \
|
||||
rc = OMPI_SUCCESS; \
|
||||
OMPI_FREE_LIST_WAIT(&mca_pml_base_send_requests, item, rc); \
|
||||
sendreq = (mca_pml_csum_send_request_t*)item; \
|
||||
sendreq->req_send.req_base.req_proc = proc; \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
#define MCA_PML_CSUM_SEND_REQUEST_INIT(sendreq, \
|
||||
buf, \
|
||||
count, \
|
||||
datatype, \
|
||||
dst, \
|
||||
tag, \
|
||||
comm, \
|
||||
sendmode, \
|
||||
persistent) \
|
||||
{ \
|
||||
MCA_PML_BASE_SEND_REQUEST_INIT(&sendreq->req_send, \
|
||||
buf, \
|
||||
count, \
|
||||
datatype, \
|
||||
dst, \
|
||||
tag, \
|
||||
comm, \
|
||||
sendmode, \
|
||||
persistent, \
|
||||
0); /* convertor_flags */ \
|
||||
(sendreq)->req_recv.pval = NULL; \
|
||||
}
|
||||
|
||||
|
||||
static inline void mca_pml_csum_free_rdma_resources(mca_pml_csum_send_request_t* sendreq)
|
||||
{
|
||||
size_t r;
|
||||
|
||||
/* return mpool resources */
|
||||
for(r = 0; r < sendreq->req_rdma_cnt; r++) {
|
||||
mca_mpool_base_registration_t* reg = sendreq->req_rdma[r].btl_reg;
|
||||
if( NULL != reg && reg->mpool != NULL ) {
|
||||
reg->mpool->mpool_deregister(reg->mpool, reg);
|
||||
}
|
||||
}
|
||||
sendreq->req_rdma_cnt = 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Start a send request.
|
||||
*/
|
||||
|
||||
#define MCA_PML_CSUM_SEND_REQUEST_START(sendreq, rc) \
|
||||
do { \
|
||||
rc = mca_pml_csum_send_request_start(sendreq); \
|
||||
} while (0)
|
||||
|
||||
|
||||
/*
|
||||
* Mark a send request as completed at the MPI level.
|
||||
*/
|
||||
|
||||
#define MCA_PML_CSUM_SEND_REQUEST_MPI_COMPLETE(sendreq, with_signal) \
|
||||
do { \
|
||||
(sendreq)->req_send.req_base.req_ompi.req_status.MPI_SOURCE = \
|
||||
(sendreq)->req_send.req_base.req_comm->c_my_rank; \
|
||||
(sendreq)->req_send.req_base.req_ompi.req_status.MPI_TAG = \
|
||||
(sendreq)->req_send.req_base.req_tag; \
|
||||
(sendreq)->req_send.req_base.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; \
|
||||
(sendreq)->req_send.req_base.req_ompi.req_status._ucount = \
|
||||
(sendreq)->req_send.req_bytes_packed; \
|
||||
ompi_request_complete( &((sendreq)->req_send.req_base.req_ompi), (with_signal) ); \
|
||||
\
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \
|
||||
&(sendreq->req_send.req_base), PERUSE_SEND); \
|
||||
} while(0)
|
||||
|
||||
/*
|
||||
* Release resources associated with a request
|
||||
*/
|
||||
|
||||
#define MCA_PML_CSUM_SEND_REQUEST_RETURN(sendreq) \
|
||||
do { \
|
||||
/* Let the base handle the reference counts */ \
|
||||
MCA_PML_BASE_SEND_REQUEST_FINI((&(sendreq)->req_send)); \
|
||||
OMPI_FREE_LIST_RETURN( &mca_pml_base_send_requests, \
|
||||
(ompi_free_list_item_t*)sendreq); \
|
||||
} while(0)
|
||||
|
||||
|
||||
/*
|
||||
* The PML has completed a send request. Note that this request
|
||||
* may have been orphaned by the user or have already completed
|
||||
* at the MPI level.
|
||||
* This function will never be called directly from the upper level, as it
|
||||
* should only be an internal call to the PML.
|
||||
*
|
||||
*/
|
||||
static inline void
|
||||
send_request_pml_complete(mca_pml_csum_send_request_t *sendreq)
|
||||
{
|
||||
assert(false == sendreq->req_send.req_base.req_pml_complete);
|
||||
|
||||
if(sendreq->req_send.req_bytes_packed > 0) {
|
||||
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END,
|
||||
&(sendreq->req_send.req_base), PERUSE_SEND);
|
||||
}
|
||||
|
||||
/* return mpool resources */
|
||||
mca_pml_csum_free_rdma_resources(sendreq);
|
||||
|
||||
if (sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED &&
|
||||
sendreq->req_send.req_addr != sendreq->req_send.req_base.req_addr) {
|
||||
mca_pml_base_bsend_request_fini((ompi_request_t*)sendreq);
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&ompi_request_lock);
|
||||
if(false == sendreq->req_send.req_base.req_ompi.req_complete) {
|
||||
/* Should only be called for long messages (maybe synchronous) */
|
||||
MCA_PML_CSUM_SEND_REQUEST_MPI_COMPLETE(sendreq, true);
|
||||
}
|
||||
sendreq->req_send.req_base.req_pml_complete = true;
|
||||
|
||||
if(sendreq->req_send.req_base.req_free_called) {
|
||||
MCA_PML_CSUM_SEND_REQUEST_RETURN(sendreq);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||
}
|
||||
|
||||
/* returns true if request was completed on PML level */
|
||||
static inline bool
|
||||
send_request_pml_complete_check(mca_pml_csum_send_request_t *sendreq)
|
||||
{
|
||||
#if OPAL_ENABLE_MULTI_THREADS
|
||||
opal_atomic_rmb();
|
||||
#endif
|
||||
/* if no more events are expected for the request and the whole message is
|
||||
* already sent and send fragment scheduling isn't running in another
|
||||
* thread then complete the request on PML level. From now on, if user
|
||||
* called free on this request, the request structure can be reused for
|
||||
* another request or if the request is persistent it can be restarted */
|
||||
if(sendreq->req_state == 0 &&
|
||||
sendreq->req_bytes_delivered >= sendreq->req_send.req_bytes_packed
|
||||
&& lock_send_request(sendreq)) {
|
||||
send_request_pml_complete(sendreq);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Schedule additional fragments
|
||||
*/
|
||||
int
|
||||
mca_pml_csum_send_request_schedule_once(mca_pml_csum_send_request_t*);
|
||||
|
||||
static inline int
|
||||
mca_pml_csum_send_request_schedule_exclusive(mca_pml_csum_send_request_t* sendreq)
|
||||
{
|
||||
int rc;
|
||||
do {
|
||||
rc = mca_pml_csum_send_request_schedule_once(sendreq);
|
||||
if(rc == OMPI_ERR_OUT_OF_RESOURCE)
|
||||
break;
|
||||
} while(!unlock_send_request(sendreq));
|
||||
|
||||
if(OMPI_SUCCESS == rc)
|
||||
send_request_pml_complete_check(sendreq);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline void
|
||||
mca_pml_csum_send_request_schedule(mca_pml_csum_send_request_t* sendreq)
|
||||
{
|
||||
/*
|
||||
* Only allow one thread in this routine for a given request.
|
||||
* However, we cannot block callers on a mutex, so simply keep track
|
||||
* of the number of times the routine has been called and run through
|
||||
* the scheduling logic once for every call.
|
||||
*/
|
||||
|
||||
if(!lock_send_request(sendreq))
|
||||
return;
|
||||
|
||||
mca_pml_csum_send_request_schedule_exclusive(sendreq);
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the specified request
|
||||
*/
|
||||
|
||||
int mca_pml_csum_send_request_start_buffered(
|
||||
mca_pml_csum_send_request_t* sendreq,
|
||||
mca_bml_base_btl_t* bml_btl,
|
||||
size_t size);
|
||||
|
||||
int mca_pml_csum_send_request_start_copy(
|
||||
mca_pml_csum_send_request_t* sendreq,
|
||||
mca_bml_base_btl_t* bml_btl,
|
||||
size_t size);
|
||||
|
||||
int mca_pml_csum_send_request_start_prepare(
|
||||
mca_pml_csum_send_request_t* sendreq,
|
||||
mca_bml_base_btl_t* bml_btl,
|
||||
size_t size);
|
||||
|
||||
int mca_pml_csum_send_request_start_rdma(
|
||||
mca_pml_csum_send_request_t* sendreq,
|
||||
mca_bml_base_btl_t* bml_btl,
|
||||
size_t size);
|
||||
|
||||
int mca_pml_csum_send_request_start_rndv(
|
||||
mca_pml_csum_send_request_t* sendreq,
|
||||
mca_bml_base_btl_t* bml_btl,
|
||||
size_t size,
|
||||
int flags);
|
||||
|
||||
static inline int
|
||||
mca_pml_csum_send_request_start_btl( mca_pml_csum_send_request_t* sendreq,
|
||||
mca_bml_base_btl_t* bml_btl )
|
||||
{
|
||||
size_t size = sendreq->req_send.req_bytes_packed;
|
||||
mca_btl_base_module_t* btl = bml_btl->btl;
|
||||
size_t eager_limit = btl->btl_eager_limit - sizeof(mca_pml_csum_hdr_t);
|
||||
int rc;
|
||||
|
||||
if( OPAL_LIKELY(size <= eager_limit) ) {
|
||||
switch(sendreq->req_send.req_send_mode) {
|
||||
case MCA_PML_BASE_SEND_SYNCHRONOUS:
|
||||
rc = mca_pml_csum_send_request_start_rndv(sendreq, bml_btl, size, 0);
|
||||
break;
|
||||
case MCA_PML_BASE_SEND_BUFFERED:
|
||||
rc = mca_pml_csum_send_request_start_copy(sendreq, bml_btl, size);
|
||||
break;
|
||||
case MCA_PML_BASE_SEND_COMPLETE:
|
||||
rc = mca_pml_csum_send_request_start_prepare(sendreq, bml_btl, size);
|
||||
break;
|
||||
default:
|
||||
if (size != 0 && bml_btl->btl_flags & MCA_BTL_FLAGS_SEND_INPLACE) {
|
||||
rc = mca_pml_csum_send_request_start_prepare(sendreq, bml_btl, size);
|
||||
} else {
|
||||
rc = mca_pml_csum_send_request_start_copy(sendreq, bml_btl, size);
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
size = eager_limit;
|
||||
if(OPAL_UNLIKELY(btl->btl_rndv_eager_limit < eager_limit))
|
||||
size = btl->btl_rndv_eager_limit;
|
||||
if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) {
|
||||
rc = mca_pml_csum_send_request_start_buffered(sendreq, bml_btl, size);
|
||||
} else if
|
||||
(opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) {
|
||||
unsigned char *base;
|
||||
opal_convertor_get_current_pointer( &sendreq->req_send.req_base.req_convertor, (void**)&base );
|
||||
|
||||
if( 0 != (sendreq->req_rdma_cnt = (uint32_t)mca_pml_csum_rdma_btls(
|
||||
sendreq->req_endpoint,
|
||||
base,
|
||||
sendreq->req_send.req_bytes_packed,
|
||||
sendreq->req_rdma))) {
|
||||
rc = mca_pml_csum_send_request_start_rdma(sendreq, bml_btl,
|
||||
sendreq->req_send.req_bytes_packed);
|
||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
|
||||
mca_pml_csum_free_rdma_resources(sendreq);
|
||||
}
|
||||
} else {
|
||||
rc = mca_pml_csum_send_request_start_rndv(sendreq, bml_btl, size,
|
||||
MCA_PML_CSUM_HDR_FLAGS_CONTIG);
|
||||
}
|
||||
} else {
|
||||
rc = mca_pml_csum_send_request_start_rndv(sendreq, bml_btl, size, 0);
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline int
|
||||
mca_pml_csum_send_request_start( mca_pml_csum_send_request_t* sendreq )
|
||||
{
|
||||
mca_pml_csum_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm;
|
||||
mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*)
|
||||
sendreq->req_send.req_base.req_proc->proc_bml;
|
||||
size_t i;
|
||||
|
||||
if( OPAL_UNLIKELY(endpoint == NULL) ) {
|
||||
return OMPI_ERR_UNREACH;
|
||||
}
|
||||
|
||||
sendreq->req_endpoint = endpoint;
|
||||
sendreq->req_state = 0;
|
||||
sendreq->req_lock = 0;
|
||||
sendreq->req_pipeline_depth = 0;
|
||||
sendreq->req_bytes_delivered = 0;
|
||||
sendreq->req_pending = MCA_PML_CSUM_SEND_PENDING_NONE;
|
||||
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32(
|
||||
&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1);
|
||||
|
||||
MCA_PML_BASE_SEND_START( &sendreq->req_send.req_base );
|
||||
|
||||
for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) {
|
||||
mca_bml_base_btl_t* bml_btl;
|
||||
int rc;
|
||||
|
||||
/* select a btl */
|
||||
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
|
||||
rc = mca_pml_csum_send_request_start_btl(sendreq, bml_btl);
|
||||
if( OPAL_LIKELY(OMPI_ERR_OUT_OF_RESOURCE != rc) )
|
||||
return rc;
|
||||
}
|
||||
add_request_to_send_pending(sendreq, MCA_PML_CSUM_SEND_PENDING_START, true);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initiate a put scheduled by the receiver.
|
||||
*/
|
||||
|
||||
void mca_pml_csum_send_request_put( mca_pml_csum_send_request_t* sendreq,
|
||||
mca_btl_base_module_t* btl,
|
||||
mca_pml_csum_rdma_hdr_t* hdr );
|
||||
|
||||
int mca_pml_csum_send_request_put_frag(mca_pml_csum_rdma_frag_t* frag);
|
||||
|
||||
/* This function tries to continue sendreq that was stuck because of resource
|
||||
* unavailability. A sendreq may be added to send_pending list if there is no
|
||||
* resource to send initial packet or there is not resource to schedule data
|
||||
* for sending. The reason the sendreq was added to the list is stored inside
|
||||
* sendreq struct and appropriate operation is retried when resource became
|
||||
* available. bml_btl passed to the function doesn't represents sendreq
|
||||
* destination, it represents BTL on which resource was freed, so only this BTL
|
||||
* should be considered for sending packets */
|
||||
void mca_pml_csum_send_request_process_pending(mca_bml_base_btl_t *bml_btl);
|
||||
|
||||
void mca_pml_csum_send_request_copy_in_out(mca_pml_csum_send_request_t *sendreq,
|
||||
uint64_t send_offset, uint64_t send_length);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* OMPI_PML_CSUM_SEND_REQUEST_H */
|
@ -1,148 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2007 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "pml_csum.h"
|
||||
#include "pml_csum_recvreq.h"
|
||||
#include "pml_csum_sendreq.h"
|
||||
#include "ompi/memchecker.h"
|
||||
|
||||
|
||||
int mca_pml_csum_start(size_t count, ompi_request_t** requests)
|
||||
{
|
||||
int rc;
|
||||
size_t i;
|
||||
bool reuse_old_request = true;
|
||||
|
||||
for(i=0; i<count; i++) {
|
||||
mca_pml_base_request_t *pml_request = (mca_pml_base_request_t*)requests[i];
|
||||
if(NULL == pml_request) {
|
||||
continue;
|
||||
}
|
||||
if (OMPI_REQUEST_PML != requests[i]->req_type) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If the persistent request is currently active - obtain the
|
||||
* request lock and verify the status is incomplete. if the
|
||||
* pml layer has not completed the request - mark the request
|
||||
* as free called - so that it will be freed when the request
|
||||
* completes - and create a new request.
|
||||
*/
|
||||
|
||||
reuse_old_request = true;
|
||||
switch(pml_request->req_ompi.req_state) {
|
||||
case OMPI_REQUEST_INACTIVE:
|
||||
if(pml_request->req_pml_complete == true)
|
||||
break;
|
||||
/* otherwise fall through */
|
||||
case OMPI_REQUEST_ACTIVE: {
|
||||
|
||||
ompi_request_t *request;
|
||||
OPAL_THREAD_LOCK(&ompi_request_lock);
|
||||
if (pml_request->req_pml_complete == false) {
|
||||
/* free request after it completes */
|
||||
pml_request->req_free_called = true;
|
||||
} else {
|
||||
/* can reuse the existing request */
|
||||
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||
break;
|
||||
}
|
||||
|
||||
reuse_old_request = false;
|
||||
/* allocate a new request */
|
||||
switch(pml_request->req_type) {
|
||||
case MCA_PML_REQUEST_SEND: {
|
||||
mca_pml_base_send_mode_t sendmode =
|
||||
((mca_pml_base_send_request_t*)pml_request)->req_send_mode;
|
||||
rc = mca_pml_csum_isend_init(
|
||||
pml_request->req_addr,
|
||||
pml_request->req_count,
|
||||
pml_request->req_datatype,
|
||||
pml_request->req_peer,
|
||||
pml_request->req_tag,
|
||||
sendmode,
|
||||
pml_request->req_comm,
|
||||
&request);
|
||||
break;
|
||||
}
|
||||
case MCA_PML_REQUEST_RECV:
|
||||
rc = mca_pml_csum_irecv_init(
|
||||
pml_request->req_addr,
|
||||
pml_request->req_count,
|
||||
pml_request->req_datatype,
|
||||
pml_request->req_peer,
|
||||
pml_request->req_tag,
|
||||
pml_request->req_comm,
|
||||
&request);
|
||||
break;
|
||||
default:
|
||||
rc = OMPI_ERR_REQUEST;
|
||||
break;
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||
if(OMPI_SUCCESS != rc)
|
||||
return rc;
|
||||
pml_request = (mca_pml_base_request_t*)request;
|
||||
requests[i] = request;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return OMPI_ERR_REQUEST;
|
||||
}
|
||||
|
||||
/* start the request */
|
||||
switch(pml_request->req_type) {
|
||||
case MCA_PML_REQUEST_SEND:
|
||||
{
|
||||
mca_pml_csum_send_request_t* sendreq = (mca_pml_csum_send_request_t*)pml_request;
|
||||
MEMCHECKER(
|
||||
memchecker_call(&opal_memchecker_base_isdefined,
|
||||
pml_request->req_addr, pml_request->req_count,
|
||||
pml_request->req_datatype);
|
||||
);
|
||||
if( reuse_old_request && (sendreq->req_send.req_bytes_packed != 0) ) {
|
||||
size_t offset = 0;
|
||||
/**
|
||||
* Reset the convertor in case we're dealing with the original
|
||||
* request, which when completed do not reset the convertor.
|
||||
*/
|
||||
opal_convertor_set_position( &sendreq->req_send.req_base.req_convertor,
|
||||
&offset );
|
||||
}
|
||||
MCA_PML_CSUM_SEND_REQUEST_START(sendreq, rc);
|
||||
if(rc != OMPI_SUCCESS)
|
||||
return rc;
|
||||
break;
|
||||
}
|
||||
case MCA_PML_REQUEST_RECV:
|
||||
{
|
||||
mca_pml_csum_recv_request_t* recvreq = (mca_pml_csum_recv_request_t*)pml_request;
|
||||
MCA_PML_CSUM_RECV_REQUEST_START(recvreq);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return OMPI_ERR_REQUEST;
|
||||
}
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -1 +0,0 @@
|
||||
DIRECT_CALL_HEADER="ompi/mca/pml/csum/pml_csum.h"
|
Загрузка…
x
Ссылка в новой задаче
Block a user