1
1
This commit was SVN r28133.
Этот коммит содержится в:
Nathan Hjelm 2013-02-28 00:17:56 +00:00
родитель 1370d4569a
Коммит b5a2cd1cce
27 изменённых файлов: 0 добавлений и 7459 удалений

Просмотреть файл

@ -1,66 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
#
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
dist_pkgdata_DATA = \
help-mpi-pml-csum.txt
EXTRA_DIST = post_configure.sh pml_csum_endpoint.c pml_csum_endpoint.h
csum_sources = \
pml_csum.c \
pml_csum.h \
pml_csum_comm.c \
pml_csum_comm.h \
pml_csum_component.c \
pml_csum_component.h \
pml_csum_hdr.h \
pml_csum_iprobe.c \
pml_csum_irecv.c \
pml_csum_isend.c \
pml_csum_progress.c \
pml_csum_rdma.c \
pml_csum_rdma.h \
pml_csum_rdmafrag.c \
pml_csum_rdmafrag.h \
pml_csum_recvfrag.c \
pml_csum_recvfrag.h \
pml_csum_recvreq.c \
pml_csum_recvreq.h \
pml_csum_sendreq.c \
pml_csum_sendreq.h \
pml_csum_start.c
if MCA_BUILD_ompi_pml_csum_DSO
component_noinst =
component_install = mca_pml_csum.la
else
component_noinst = libmca_pml_csum.la
component_install =
endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_pml_csum_la_SOURCES = $(csum_sources)
mca_pml_csum_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_pml_csum_la_SOURCES = $(csum_sources)
libmca_pml_csum_la_LDFLAGS = -module -avoid-version

Просмотреть файл

@ -1,20 +0,0 @@
# -*- text -*-
#
# Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
[eager_limit_too_small]
The "eager limit" MCA parameter in the %s BTL was set to a value which
is too low for Open MPI to function properly. Please re-run your job
with a higher eager limit value for this BTL; the exact MCA parameter
name and its corresponding minimum value is shown below.
Local host: %s
BTL name: %s
BTL eager limit value: %d (set via btl_%s_eager_limit)
BTL eager limit minimum: %d
MCA parameter name: btl_%s_eager_limit

Просмотреть файл

@ -1,903 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2009 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2006-2008 University of Houston. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* Copyright (c) 2009-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <stdlib.h>
#include <string.h>
#include "opal/class/opal_bitmap.h"
#include "opal/util/crc.h"
#include "opal/util/output.h"
#include "opal/util/show_help.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/pml/base/base.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/pml/base/base.h"
#include "ompi/mca/btl/base/base.h"
#include "ompi/mca/bml/base/base.h"
#include "ompi/runtime/ompi_cr.h"
#include "pml_csum.h"
#include "pml_csum_component.h"
#include "pml_csum_comm.h"
#include "pml_csum_hdr.h"
#include "pml_csum_recvfrag.h"
#include "pml_csum_sendreq.h"
#include "pml_csum_recvreq.h"
#include "pml_csum_rdmafrag.h"
mca_pml_csum_t mca_pml_csum = {
{
mca_pml_csum_add_procs,
mca_pml_csum_del_procs,
mca_pml_csum_enable,
mca_pml_csum_progress,
mca_pml_csum_add_comm,
mca_pml_csum_del_comm,
mca_pml_csum_irecv_init,
mca_pml_csum_irecv,
mca_pml_csum_recv,
mca_pml_csum_isend_init,
mca_pml_csum_isend,
mca_pml_csum_send,
mca_pml_csum_iprobe,
mca_pml_csum_probe,
mca_pml_csum_start,
mca_pml_csum_improbe,
mca_pml_csum_mprobe,
mca_pml_csum_imrecv,
mca_pml_csum_mrecv,
mca_pml_csum_dump,
mca_pml_csum_ft_event,
65535,
INT_MAX
}
};
void mca_pml_csum_error_handler( struct mca_btl_base_module_t* btl,
int32_t flags, ompi_proc_t* errproc,
char* btlinfo );
int mca_pml_csum_enable(bool enable)
{
if( false == enable ) {
return OMPI_SUCCESS;
}
OBJ_CONSTRUCT(&mca_pml_csum.lock, opal_mutex_t);
/* fragments */
OBJ_CONSTRUCT(&mca_pml_csum.rdma_frags, ompi_free_list_t);
ompi_free_list_init_new( &mca_pml_csum.rdma_frags,
sizeof(mca_pml_csum_rdma_frag_t),
opal_cache_line_size,
OBJ_CLASS(mca_pml_csum_rdma_frag_t),
0,opal_cache_line_size,
mca_pml_csum.free_list_num,
mca_pml_csum.free_list_max,
mca_pml_csum.free_list_inc,
NULL );
OBJ_CONSTRUCT(&mca_pml_csum.recv_frags, ompi_free_list_t);
ompi_free_list_init_new( &mca_pml_csum.recv_frags,
sizeof(mca_pml_csum_recv_frag_t) + mca_pml_csum.unexpected_limit,
opal_cache_line_size,
OBJ_CLASS(mca_pml_csum_recv_frag_t),
0,opal_cache_line_size,
mca_pml_csum.free_list_num,
mca_pml_csum.free_list_max,
mca_pml_csum.free_list_inc,
NULL );
OBJ_CONSTRUCT(&mca_pml_csum.pending_pckts, ompi_free_list_t);
ompi_free_list_init_new( &mca_pml_csum.pending_pckts,
sizeof(mca_pml_csum_pckt_pending_t),
opal_cache_line_size,
OBJ_CLASS(mca_pml_csum_pckt_pending_t),
0,opal_cache_line_size,
mca_pml_csum.free_list_num,
mca_pml_csum.free_list_max,
mca_pml_csum.free_list_inc,
NULL );
OBJ_CONSTRUCT(&mca_pml_csum.buffers, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_pml_csum.send_ranges, ompi_free_list_t);
ompi_free_list_init_new( &mca_pml_csum.send_ranges,
sizeof(mca_pml_csum_send_range_t) +
(mca_pml_csum.max_send_per_range - 1) * sizeof(mca_pml_csum_com_btl_t),
opal_cache_line_size,
OBJ_CLASS(mca_pml_csum_send_range_t),
0,opal_cache_line_size,
mca_pml_csum.free_list_num,
mca_pml_csum.free_list_max,
mca_pml_csum.free_list_inc,
NULL );
/* pending operations */
OBJ_CONSTRUCT(&mca_pml_csum.send_pending, opal_list_t);
OBJ_CONSTRUCT(&mca_pml_csum.recv_pending, opal_list_t);
OBJ_CONSTRUCT(&mca_pml_csum.pckt_pending, opal_list_t);
OBJ_CONSTRUCT(&mca_pml_csum.rdma_pending, opal_list_t);
/* missing communicator pending list */
OBJ_CONSTRUCT(&mca_pml_csum.non_existing_communicator_pending, opal_list_t);
/**
* If we get here this is the PML who get selected for the run. We
* should get ownership for the send and receive requests list, and
* initialize them with the size of our own requests.
*/
ompi_free_list_init_new( &mca_pml_base_send_requests,
sizeof(mca_pml_csum_send_request_t) +
(mca_pml_csum.max_rdma_per_request - 1) *
sizeof(mca_pml_csum_com_btl_t),
opal_cache_line_size,
OBJ_CLASS(mca_pml_csum_send_request_t),
0,opal_cache_line_size,
mca_pml_csum.free_list_num,
mca_pml_csum.free_list_max,
mca_pml_csum.free_list_inc,
NULL );
ompi_free_list_init_new( &mca_pml_base_recv_requests,
sizeof(mca_pml_csum_recv_request_t) +
(mca_pml_csum.max_rdma_per_request - 1) *
sizeof(mca_pml_csum_com_btl_t),
opal_cache_line_size,
OBJ_CLASS(mca_pml_csum_recv_request_t),
0,opal_cache_line_size,
mca_pml_csum.free_list_num,
mca_pml_csum.free_list_max,
mca_pml_csum.free_list_inc,
NULL );
mca_pml_csum.enabled = true;
return OMPI_SUCCESS;
}
int mca_pml_csum_add_comm(ompi_communicator_t* comm)
{
/* allocate pml specific comm data */
mca_pml_csum_comm_t* pml_comm = OBJ_NEW(mca_pml_csum_comm_t);
opal_list_item_t *item, *next_item;
mca_pml_csum_recv_frag_t* frag;
mca_pml_csum_comm_proc_t* pml_proc;
mca_pml_csum_match_hdr_t* hdr;
int i;
if (NULL == pml_comm) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* should never happen, but it was, so check */
if (comm->c_contextid > mca_pml_csum.super.pml_max_contextid) {
OBJ_RELEASE(pml_comm);
return OMPI_ERR_OUT_OF_RESOURCE;
}
mca_pml_csum_comm_init_size(pml_comm, comm->c_remote_group->grp_proc_count);
comm->c_pml_comm = pml_comm;
for( i = 0; i < comm->c_remote_group->grp_proc_count; i++ ) {
pml_comm->procs[i].ompi_proc = ompi_group_peer_lookup(comm->c_remote_group,i);
OBJ_RETAIN(pml_comm->procs[i].ompi_proc);
}
/* Grab all related messages from the non_existing_communicator pending queue */
for( item = opal_list_get_first(&mca_pml_csum.non_existing_communicator_pending);
item != opal_list_get_end(&mca_pml_csum.non_existing_communicator_pending);
item = next_item ) {
frag = (mca_pml_csum_recv_frag_t*)item;
next_item = opal_list_get_next(item);
hdr = &frag->hdr.hdr_match;
/* Is this fragment for the current communicator ? */
if( frag->hdr.hdr_match.hdr_ctx != comm->c_contextid )
continue;
/* As we now know we work on a fragment for this communicator
* we should remove it from the
* non_existing_communicator_pending list. */
opal_list_remove_item( &mca_pml_csum.non_existing_communicator_pending,
item );
add_fragment_to_unexpected:
/* We generate the MSG_ARRIVED event as soon as the PML is aware
* of a matching fragment arrival. Independing if it is received
* on the correct order or not. This will allow the tools to
* figure out if the messages are not received in the correct
* order (if multiple network interfaces).
*/
PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm,
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
/* There is no matching to be done, and no lock to be held on the communicator as
* we know at this point that the communicator has not yet been returned to the user.
* The only required protection is around the non_existing_communicator_pending queue.
* We just have to push the fragment into the unexpected list of the corresponding
* proc, or into the out-of-order (cant_match) list.
*/
pml_proc = &(pml_comm->procs[hdr->hdr_src]);
if( ((uint16_t)hdr->hdr_seq) == ((uint16_t)pml_proc->expected_sequence) ) {
/* We're now expecting the next sequence number. */
pml_proc->expected_sequence++;
opal_list_append( &pml_proc->unexpected_frags, (opal_list_item_t*)frag );
PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm,
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
/* And now the ugly part. As some fragments can be inserted in the cant_match list,
* every time we succesfully add a fragment in the unexpected list we have to make
* sure the next one is not in the cant_match. Otherwise, we will endup in a deadlock
* situation as the cant_match is only checked when a new fragment is received from
* the network.
*/
for(frag = (mca_pml_csum_recv_frag_t *)opal_list_get_first(&pml_proc->frags_cant_match);
frag != (mca_pml_csum_recv_frag_t *)opal_list_get_end(&pml_proc->frags_cant_match);
frag = (mca_pml_csum_recv_frag_t *)opal_list_get_next(frag)) {
hdr = &frag->hdr.hdr_match;
/* If the message has the next expected seq from that proc... */
if(hdr->hdr_seq != pml_proc->expected_sequence)
continue;
opal_list_remove_item(&pml_proc->frags_cant_match, (opal_list_item_t*)frag);
goto add_fragment_to_unexpected;
}
} else {
opal_list_append( &pml_proc->frags_cant_match, (opal_list_item_t*)frag );
}
}
return OMPI_SUCCESS;
}
int mca_pml_csum_del_comm(ompi_communicator_t* comm)
{
mca_pml_csum_comm_t* pml_comm = comm->c_pml_comm;
int i;
for( i = 0; i < comm->c_remote_group->grp_proc_count; i++ ) {
OBJ_RELEASE(pml_comm->procs[i].ompi_proc);
}
OBJ_RELEASE(comm->c_pml_comm);
comm->c_pml_comm = NULL;
return OMPI_SUCCESS;
}
/*
* For each proc setup a datastructure that indicates the BTLs
* that can be used to reach the destination.
*
*/
int mca_pml_csum_add_procs(ompi_proc_t** procs, size_t nprocs)
{
opal_bitmap_t reachable;
int rc;
size_t i;
opal_list_item_t *item;
opal_convertor_t *local_convertor;
if(nprocs == 0)
return OMPI_SUCCESS;
/* Create a convertor for processes on the same node &
disable checksum computation for local communication */
local_convertor = opal_convertor_create(ompi_proc_local()->proc_arch, 0);
local_convertor->flags &= ~CONVERTOR_WITH_CHECKSUM;
for (i = 0 ; i < nprocs ; ++i) {
/* we don't have any endpoint data we need to cache on the
ompi_proc_t, so set proc_pml to NULL */
procs[i]->proc_pml = NULL;
/* if the proc isn't local, tell the convertor to
* checksum the data
*/
if (!OPAL_PROC_ON_LOCAL_NODE(procs[i]->proc_flags)) {
procs[i]->proc_convertor->flags |= CONVERTOR_WITH_CHECKSUM;
} else {
OBJ_RELEASE(procs[i]->proc_convertor);
procs[i]->proc_convertor = local_convertor;
OBJ_RETAIN(local_convertor);
}
}
/* Decrement reference count by one, as we increment it twice for ourselves */
OBJ_RELEASE(local_convertor);
OBJ_CONSTRUCT(&reachable, opal_bitmap_t);
rc = opal_bitmap_init(&reachable, (int)nprocs);
if(OMPI_SUCCESS != rc)
return rc;
/*
* JJH: Disable this in FT enabled builds since
* we use a wrapper PML. It will cause this check to
* return failure as all processes will return the wrapper PML
* component in use instead of the wrapped PML component underneath.
*/
#if OPAL_ENABLE_FT_CR == 0
/* make sure remote procs are using the same PML as us */
if (OMPI_SUCCESS != (rc = mca_pml_base_pml_check_selected("csum",
procs,
nprocs))) {
return rc;
}
#endif
rc = mca_bml.bml_add_procs( nprocs,
procs,
&reachable );
if(OMPI_SUCCESS != rc)
goto cleanup_and_return;
/* Check that values supplied by all initialized btls will work
for us. Note that this is the list of all initialized BTLs,
not the ones used for the just added procs. This is a little
overkill and inaccurate, as we may end up not using the BTL in
question and all add_procs calls after the first one are
duplicating an already completed check. But the final
initialization of the PML occurs before the final
initialization of the BTLs, and iterating through the in-use
BTLs requires iterating over the procs, as the BML does not
expose all currently in use btls. */
for (item = opal_list_get_first(&mca_btl_base_modules_initialized) ;
item != opal_list_get_end(&mca_btl_base_modules_initialized) ;
item = opal_list_get_next(item)) {
mca_btl_base_selected_module_t *sm =
(mca_btl_base_selected_module_t*) item;
if (sm->btl_module->btl_eager_limit < sizeof(mca_pml_csum_hdr_t)) {
opal_show_help("help-mpi-pml-csum.txt", "eager_limit_too_small",
true,
sm->btl_component->btl_version.mca_component_name,
ompi_process_info.nodename,
sm->btl_component->btl_version.mca_component_name,
sm->btl_module->btl_eager_limit,
sm->btl_component->btl_version.mca_component_name,
sizeof(mca_pml_csum_hdr_t),
sm->btl_component->btl_version.mca_component_name);
rc = OMPI_ERR_BAD_PARAM;
goto cleanup_and_return;
}
}
/* TODO: Move these callback registration to another place */
rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_MATCH,
mca_pml_csum_recv_frag_callback_match,
NULL );
if(OMPI_SUCCESS != rc)
goto cleanup_and_return;
rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_RNDV,
mca_pml_csum_recv_frag_callback_rndv,
NULL );
if(OMPI_SUCCESS != rc)
goto cleanup_and_return;
rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_RGET,
mca_pml_csum_recv_frag_callback_rget,
NULL );
if(OMPI_SUCCESS != rc)
goto cleanup_and_return;
rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_ACK,
mca_pml_csum_recv_frag_callback_ack,
NULL );
if(OMPI_SUCCESS != rc)
goto cleanup_and_return;
rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_FRAG,
mca_pml_csum_recv_frag_callback_frag,
NULL );
if(OMPI_SUCCESS != rc)
goto cleanup_and_return;
rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_PUT,
mca_pml_csum_recv_frag_callback_put,
NULL );
if(OMPI_SUCCESS != rc)
goto cleanup_and_return;
rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_FIN,
mca_pml_csum_recv_frag_callback_fin,
NULL );
if(OMPI_SUCCESS != rc)
goto cleanup_and_return;
/* register error handlers */
rc = mca_bml.bml_register_error(mca_pml_csum_error_handler);
if(OMPI_SUCCESS != rc)
goto cleanup_and_return;
cleanup_and_return:
OBJ_DESTRUCT(&reachable);
return rc;
}
/*
* iterate through each proc and notify any PTLs associated
* with the proc that it is/has gone away
*/
int mca_pml_csum_del_procs(ompi_proc_t** procs, size_t nprocs)
{
return mca_bml.bml_del_procs(nprocs, procs);
}
/*
* diagnostics
*/
int mca_pml_csum_dump(struct ompi_communicator_t* comm, int verbose)
{
struct mca_pml_comm_t* pml_comm = comm->c_pml_comm;
int i;
/* iterate through all procs on communicator */
for( i = 0; i < (int)pml_comm->num_procs; i++ ) {
mca_pml_csum_comm_proc_t* proc = &pml_comm->procs[i];
mca_bml_base_endpoint_t* ep = (mca_bml_base_endpoint_t*)proc->ompi_proc->proc_bml;
size_t n;
opal_output(0, "[Rank %d]\n", i);
/* dump all receive queues */
/* dump all btls */
for(n=0; n<ep->btl_eager.arr_size; n++) {
mca_bml_base_btl_t* bml_btl = &ep->btl_eager.bml_btls[n];
bml_btl->btl->btl_dump(bml_btl->btl, bml_btl->btl_endpoint, verbose);
}
}
return OMPI_SUCCESS;
}
static void mca_pml_csum_fin_completion( mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* des,
int status )
{
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
/* check for pending requests */
MCA_PML_CSUM_PROGRESS_PENDING(bml_btl);
}
/**
* Send an FIN to the peer. If we fail to send this ack (no more available
* fragments or the send failed) this function automatically add the FIN
* to the list of pending FIN, Which guarantee that the FIN will be sent
* later.
*/
int mca_pml_csum_send_fin( ompi_proc_t* proc,
mca_bml_base_btl_t* bml_btl,
ompi_ptr_t hdr_des,
uint8_t order,
uint32_t status )
{
mca_btl_base_descriptor_t* fin;
mca_pml_csum_fin_hdr_t* hdr;
int rc;
mca_bml_base_alloc(bml_btl, &fin, order, sizeof(mca_pml_csum_fin_hdr_t),
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
if(NULL == fin) {
MCA_PML_CSUM_ADD_FIN_TO_PENDING(proc, hdr_des, bml_btl, order, status);
return OMPI_ERR_OUT_OF_RESOURCE;
}
fin->des_cbfunc = mca_pml_csum_fin_completion;
fin->des_cbdata = NULL;
/* fill in header */
hdr = (mca_pml_csum_fin_hdr_t*)fin->des_src->seg_addr.pval;
hdr->hdr_common.hdr_flags = 0;
hdr->hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_FIN;
hdr->hdr_common.hdr_csum = 0;
hdr->hdr_des = hdr_des;
hdr->hdr_fail = status;
hdr->hdr_common.hdr_csum = opal_csum16(hdr, sizeof(mca_pml_csum_fin_hdr_t));
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
"%s: Sending \'FIN\' with header csum:0x%04x\n",
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), hdr->hdr_common.hdr_csum));
csum_hdr_hton(hdr, MCA_PML_CSUM_HDR_TYPE_FIN, proc);
/* queue request */
rc = mca_bml_base_send( bml_btl,
fin,
MCA_PML_CSUM_HDR_TYPE_FIN );
if( OPAL_LIKELY( rc >= 0 ) ) {
if( OPAL_LIKELY( 1 == rc ) ) {
MCA_PML_CSUM_PROGRESS_PENDING(bml_btl);
}
return OMPI_SUCCESS;
}
mca_bml_base_free(bml_btl, fin);
MCA_PML_CSUM_ADD_FIN_TO_PENDING(proc, hdr_des, bml_btl, order, status);
return OMPI_ERR_OUT_OF_RESOURCE;
}
void mca_pml_csum_process_pending_packets(mca_bml_base_btl_t* bml_btl)
{
mca_pml_csum_pckt_pending_t *pckt;
int32_t i, rc, s = (int32_t)opal_list_get_size(&mca_pml_csum.pckt_pending);
for(i = 0; i < s; i++) {
mca_bml_base_btl_t *send_dst = NULL;
OPAL_THREAD_LOCK(&mca_pml_csum.lock);
pckt = (mca_pml_csum_pckt_pending_t*)
opal_list_remove_first(&mca_pml_csum.pckt_pending);
OPAL_THREAD_UNLOCK(&mca_pml_csum.lock);
if(NULL == pckt)
break;
if(pckt->bml_btl != NULL &&
pckt->bml_btl->btl == bml_btl->btl) {
send_dst = pckt->bml_btl;
} else {
send_dst = mca_bml_base_btl_array_find(
&pckt->proc->proc_bml->btl_eager, bml_btl->btl);
}
if(NULL == send_dst) {
OPAL_THREAD_LOCK(&mca_pml_csum.lock);
opal_list_append(&mca_pml_csum.pckt_pending,
(opal_list_item_t*)pckt);
OPAL_THREAD_UNLOCK(&mca_pml_csum.lock);
continue;
}
switch(pckt->hdr.hdr_common.hdr_type) {
case MCA_PML_CSUM_HDR_TYPE_ACK:
rc = mca_pml_csum_recv_request_ack_send_btl(pckt->proc,
send_dst,
pckt->hdr.hdr_ack.hdr_src_req.lval,
pckt->hdr.hdr_ack.hdr_dst_req.pval,
pckt->hdr.hdr_ack.hdr_send_offset,
pckt->hdr.hdr_common.hdr_flags & MCA_PML_CSUM_HDR_FLAGS_NORDMA);
if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) {
OPAL_THREAD_LOCK(&mca_pml_csum.lock);
opal_list_append(&mca_pml_csum.pckt_pending,
(opal_list_item_t*)pckt);
OPAL_THREAD_UNLOCK(&mca_pml_csum.lock);
return;
}
break;
case MCA_PML_CSUM_HDR_TYPE_FIN:
rc = mca_pml_csum_send_fin(pckt->proc, send_dst,
pckt->hdr.hdr_fin.hdr_des,
pckt->order,
pckt->hdr.hdr_fin.hdr_fail);
if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) {
return;
}
break;
default:
opal_output(0, "[%s:%d] wrong header type\n",
__FILE__, __LINE__);
break;
}
/* We're done with this packet, return it back to the free list */
MCA_PML_CSUM_PCKT_PENDING_RETURN(pckt);
}
}
void mca_pml_csum_process_pending_rdma(void)
{
mca_pml_csum_rdma_frag_t* frag;
int32_t i, rc, s = (int32_t)opal_list_get_size(&mca_pml_csum.rdma_pending);
for(i = 0; i < s; i++) {
OPAL_THREAD_LOCK(&mca_pml_csum.lock);
frag = (mca_pml_csum_rdma_frag_t*)
opal_list_remove_first(&mca_pml_csum.rdma_pending);
OPAL_THREAD_UNLOCK(&mca_pml_csum.lock);
if(NULL == frag)
break;
if(frag->rdma_state == MCA_PML_CSUM_RDMA_PUT) {
frag->retries++;
rc = mca_pml_csum_send_request_put_frag(frag);
} else {
rc = mca_pml_csum_recv_request_get_frag(frag);
}
if(OMPI_ERR_OUT_OF_RESOURCE == rc)
break;
}
}
void mca_pml_csum_error_handler(
struct mca_btl_base_module_t* btl, int32_t flags,
ompi_proc_t* errproc, char* btlinfo ) {
ompi_rte_abort(-1, NULL);
}
#if OPAL_ENABLE_FT_CR == 0
int mca_pml_csum_ft_event( int state ) {
return OMPI_SUCCESS;
}
#else
int mca_pml_csum_ft_event( int state )
{
static bool first_continue_pass = false;
ompi_proc_t** procs = NULL;
size_t num_procs;
int ret, p;
ompi_rte_collective_t *coll, *modex;
coll = OBJ_NEW(ompi_rte_collective_t);
coll->id = ompi_process_info.peer_init_barrier;
if(OPAL_CRS_CHECKPOINT == state) {
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCPBR1);
ompi_rte_barrier(coll);
ORTE_WAIT_FOR_COMPLETION(coll->active);
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P0);
}
else if(OPAL_CRS_CONTINUE == state) {
first_continue_pass = !first_continue_pass;
if( !first_continue_pass ) {
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_COREBR0);
ompi_rte_barrier(coll);
ORTE_WAIT_FOR_COMPLETION(coll->active);
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P2);
}
if( orte_cr_continue_like_restart && !first_continue_pass ) {
/*
* Get a list of processes
*/
procs = ompi_proc_all(&num_procs);
if(NULL == procs) {
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto clean;
}
/*
* Refresh the proc structure, and publish our proc info in the modex.
* NOTE: Do *not* call ompi_proc_finalize as there are many places in
* the code that point to indv. procs in this strucutre. For our
* needs here we only need to fix up the modex, bml and pml
* references.
*/
if (OMPI_SUCCESS != (ret = ompi_proc_refresh())) {
opal_output(0,
"pml:csum: ft_event(Restart): proc_refresh Failed %d",
ret);
for(p = 0; p < (int)num_procs; ++p) {
OBJ_RELEASE(procs[p]);
}
free (procs);
goto clean;
}
}
}
else if(OPAL_CRS_RESTART_PRE == state ) {
/* Nothing here */
}
else if(OPAL_CRS_RESTART == state ) {
/*
* Get a list of processes
*/
procs = ompi_proc_all(&num_procs);
if(NULL == procs) {
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto clean;
}
/*
* Clean out the modex information since it is invalid now.
* ompi_rte_purge_proc_attrs();
* This happens at the ORTE level, so doing it again here will cause
* some issues with socket caching.
*/
/*
* Refresh the proc structure, and publish our proc info in the modex.
* NOTE: Do *not* call ompi_proc_finalize as there are many places in
* the code that point to indv. procs in this strucutre. For our
* needs here we only need to fix up the modex, bml and pml
* references.
*/
if (OMPI_SUCCESS != (ret = ompi_proc_refresh())) {
opal_output(0,
"pml:csum: ft_event(Restart): proc_refresh Failed %d",
ret);
for(p = 0; p < (int)num_procs; ++p) {
OBJ_RELEASE(procs[p]);
}
free (procs);
goto clean;
}
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
/* Call the BML
* BML is expected to call ft_event in
* - BTL(s)
* - MPool(s)
*/
if( OMPI_SUCCESS != (ret = mca_bml.bml_ft_event(state))) {
opal_output(0, "pml:base: ft_event: BML ft_event function failed: %d\n",
ret);
}
if(OPAL_CRS_CHECKPOINT == state) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P1);
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR0);
/* JJH Cannot barrier here due to progress engine -- ompi_rte_barrier();*/
}
}
else if(OPAL_CRS_CONTINUE == state) {
if( !first_continue_pass ) {
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR1);
ompi_rte_barrier(coll);
ORTE_WAIT_FOR_COMPLETION(coll->active);
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P3);
}
if( orte_cr_continue_like_restart && !first_continue_pass ) {
/*
* Exchange the modex information once again.
* BTLs will have republished their modex information.
*/
modex = OBJ_NEW(ompi_rte_collective_t);
modex->id = ompi_process_info.peer_modex;
if (OMPI_SUCCESS != (ret = orte_grpcomm.modex(modex))) {
opal_output(0,
"pml:csum: ft_event(Restart): Failed orte_grpcomm.modex() = %d",
ret);
OBJ_RELEASE(modex);
goto clean;
}
ORTE_WAIT_FOR_COMPLETION(modex->active);
OBJ_RELEASE(modex);
/*
* Startup the PML stack now that the modex is running again
* Add the new procs (BTLs redo modex recv's)
*/
if( OMPI_SUCCESS != (ret = mca_pml_csum_add_procs(procs, num_procs) ) ) {
opal_output(0, "pml:csum: ft_event(Restart): Failed in add_procs (%d)", ret);
goto clean;
}
/* Is this barrier necessary ? JJH */
if (OMPI_SUCCESS != (ret = ompi_rte_barrier(coll))) {
opal_output(0, "pml:csum: ft_event(Restart): Failed in ompi_rte_barrier (%d)", ret);
goto clean;
}
ORTE_WAIT_FOR_COMPLETION(coll->active);
if( NULL != procs ) {
for(p = 0; p < (int)num_procs; ++p) {
OBJ_RELEASE(procs[p]);
}
free(procs);
procs = NULL;
}
}
if( !first_continue_pass ) {
if( opal_cr_timing_barrier_enabled ) {
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR2);
ompi_rte_barrier(coll);
ORTE_WAIT_FOR_COMPLETION(coll->active);
}
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCP1);
}
}
else if(OPAL_CRS_RESTART_PRE == state ) {
/* Nothing here */
}
else if(OPAL_CRS_RESTART == state ) {
/*
* Exchange the modex information once again.
* BTLs will have republished their modex information.
*/
modex = OBJ_NEW(ompi_rte_collective_t);
modex->id = ompi_process_info.peer_modex;
if (OMPI_SUCCESS != (ret = orte_grpcomm.modex(NULL))) {
opal_output(0,
"pml:csum: ft_event(Restart): Failed orte_grpcomm.modex() = %d",
ret);
OBJ_RELEASE(modex);
goto clean;
}
ORTE_WAIT_FOR_COMPLETION(modex->active);
OBJ_RELEASE(modex);
/*
* Startup the PML stack now that the modex is running again
* Add the new procs (BTLs redo modex recv's)
*/
if( OMPI_SUCCESS != (ret = mca_pml_csum_add_procs(procs, num_procs) ) ) {
opal_output(0, "pml:csum: ft_event(Restart): Failed in add_procs (%d)", ret);
goto clean;
}
/* Is this barrier necessary ? JJH */
if (OMPI_SUCCESS != (ret = ompi_rte_barrier(coll))) {
opal_output(0, "pml:csum: ft_event(Restart): Failed in ompi_rte_barrier (%d)", ret);
goto clean;
}
ORTE_WAIT_FOR_COMPLETION(coll->active);
if( NULL != procs ) {
for(p = 0; p < (int)num_procs; ++p) {
OBJ_RELEASE(procs[p]);
}
free(procs);
procs = NULL;
}
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
ret = OMPI_SUCCESS;
clean:
OBJ_RELEASE(coll);
return ret;
}
#endif /* OPAL_ENABLE_FT_CR */
int mca_pml_csum_com_btl_comp(const void *v1, const void *v2)
{
const mca_pml_csum_com_btl_t *b1 = (const mca_pml_csum_com_btl_t *) v1;
const mca_pml_csum_com_btl_t *b2 = (const mca_pml_csum_com_btl_t *) v2;
if(b1->bml_btl->btl_weight < b2->bml_btl->btl_weight)
return 1;
if(b1->bml_btl->btl_weight > b2->bml_btl->btl_weight)
return -1;
return 0;
}

Просмотреть файл

@ -1,361 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* Copyright (c) 2009-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PML_CSUM_H
#define MCA_PML_CSUM_H
#include "ompi_config.h"
#include "ompi/class/ompi_free_list.h"
#include "ompi/request/request.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/pml/base/pml_base_request.h"
#include "ompi/mca/pml/base/pml_base_bsend.h"
#include "ompi/mca/pml/base/pml_base_sendreq.h"
#include "ompi/datatype/ompi_datatype.h"
#include "pml_csum_hdr.h"
#include "ompi/mca/bml/base/base.h"
#include "ompi/proc/proc.h"
#include "ompi/mca/allocator/base/base.h"
BEGIN_C_DECLS
/**
* CSUM PML module
*/
struct mca_pml_csum_t {
mca_pml_base_module_t super;
int priority;
int free_list_num; /* initial size of free list */
int free_list_max; /* maximum size of free list */
int free_list_inc; /* number of elements to grow free list */
size_t send_pipeline_depth;
size_t recv_pipeline_depth;
size_t rdma_put_retries_limit;
int max_rdma_per_request;
int max_send_per_range;
bool leave_pinned;
int leave_pinned_pipeline;
/* lock queue access */
opal_mutex_t lock;
/* free lists */
ompi_free_list_t rdma_frags;
ompi_free_list_t recv_frags;
ompi_free_list_t pending_pckts;
ompi_free_list_t buffers;
ompi_free_list_t send_ranges;
/* list of pending operations */
opal_list_t pckt_pending;
opal_list_t send_pending;
opal_list_t recv_pending;
opal_list_t rdma_pending;
/* List of pending fragments without a matching communicator */
opal_list_t non_existing_communicator_pending;
bool enabled;
char* allocator_name;
mca_allocator_base_module_t* allocator;
uint32_t unexpected_limit;
};
typedef struct mca_pml_csum_t mca_pml_csum_t;
extern mca_pml_csum_t mca_pml_csum;
extern int mca_pml_csum_output;
/*
* PML interface functions.
*/
extern int mca_pml_csum_add_comm(
struct ompi_communicator_t* comm
);
extern int mca_pml_csum_del_comm(
struct ompi_communicator_t* comm
);
extern int mca_pml_csum_add_procs(
struct ompi_proc_t **procs,
size_t nprocs
);
extern int mca_pml_csum_del_procs(
struct ompi_proc_t **procs,
size_t nprocs
);
extern int mca_pml_csum_enable( bool enable );
extern int mca_pml_csum_progress(void);
extern int mca_pml_csum_iprobe( int dst,
int tag,
struct ompi_communicator_t* comm,
int *matched,
ompi_status_public_t* status );
extern int mca_pml_csum_probe( int dst,
int tag,
struct ompi_communicator_t* comm,
ompi_status_public_t* status );
extern int mca_pml_csum_improbe( int dst,
int tag,
struct ompi_communicator_t* comm,
int *matched,
struct ompi_message_t **message,
ompi_status_public_t* status );
extern int mca_pml_csum_mprobe( int dst,
int tag,
struct ompi_communicator_t* comm,
struct ompi_message_t **message,
ompi_status_public_t* status );
extern int mca_pml_csum_isend_init( void *buf,
size_t count,
ompi_datatype_t *datatype,
int dst,
int tag,
mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm,
struct ompi_request_t **request );
extern int mca_pml_csum_isend( void *buf,
size_t count,
ompi_datatype_t *datatype,
int dst,
int tag,
mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm,
struct ompi_request_t **request );
extern int mca_pml_csum_send( void *buf,
size_t count,
ompi_datatype_t *datatype,
int dst,
int tag,
mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm );
extern int mca_pml_csum_irecv_init( void *buf,
size_t count,
ompi_datatype_t *datatype,
int src,
int tag,
struct ompi_communicator_t* comm,
struct ompi_request_t **request );
extern int mca_pml_csum_irecv( void *buf,
size_t count,
ompi_datatype_t *datatype,
int src,
int tag,
struct ompi_communicator_t* comm,
struct ompi_request_t **request );
extern int mca_pml_csum_recv( void *buf,
size_t count,
ompi_datatype_t *datatype,
int src,
int tag,
struct ompi_communicator_t* comm,
ompi_status_public_t* status );
extern int mca_pml_csum_imrecv( void *buf,
size_t count,
ompi_datatype_t *datatype,
struct ompi_message_t **message,
struct ompi_request_t **request );
extern int mca_pml_csum_mrecv( void *buf,
size_t count,
ompi_datatype_t *datatype,
struct ompi_message_t **message,
ompi_status_public_t* status );
extern int mca_pml_csum_dump( struct ompi_communicator_t* comm,
int verbose );
extern int mca_pml_csum_start( size_t count,
ompi_request_t** requests );
extern int mca_pml_csum_ft_event( int state );
END_C_DECLS
struct mca_pml_csum_pckt_pending_t {
ompi_free_list_item_t super;
ompi_proc_t* proc;
mca_pml_csum_hdr_t hdr;
struct mca_bml_base_btl_t *bml_btl;
uint8_t order;
};
typedef struct mca_pml_csum_pckt_pending_t mca_pml_csum_pckt_pending_t;
OBJ_CLASS_DECLARATION(mca_pml_csum_pckt_pending_t);
#define MCA_PML_CSUM_PCKT_PENDING_ALLOC(pckt,rc) \
do { \
ompi_free_list_item_t* item; \
OMPI_FREE_LIST_WAIT(&mca_pml_csum.pending_pckts, item, rc); \
pckt = (mca_pml_csum_pckt_pending_t*)item; \
} while (0)
#define MCA_PML_CSUM_PCKT_PENDING_RETURN(pckt) \
do { \
/* return packet */ \
OMPI_FREE_LIST_RETURN(&mca_pml_csum.pending_pckts, \
(ompi_free_list_item_t*)pckt); \
} while(0)
#define MCA_PML_CSUM_ADD_FIN_TO_PENDING(P, D, B, O, S) \
do { \
mca_pml_csum_pckt_pending_t *_pckt; \
int _rc; \
\
MCA_PML_CSUM_PCKT_PENDING_ALLOC(_pckt,_rc); \
_pckt->hdr.hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_FIN; \
_pckt->hdr.hdr_fin.hdr_des = (D); \
_pckt->hdr.hdr_fin.hdr_fail = (S); \
_pckt->proc = (P); \
_pckt->bml_btl = (B); \
_pckt->order = (O); \
OPAL_THREAD_LOCK(&mca_pml_csum.lock); \
opal_list_append(&mca_pml_csum.pckt_pending, \
(opal_list_item_t*)_pckt); \
OPAL_THREAD_UNLOCK(&mca_pml_csum.lock); \
} while(0)
int mca_pml_csum_send_fin(ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl,
ompi_ptr_t hdr_des, uint8_t order, uint32_t status);
/* This function tries to resend FIN/ACK packets from pckt_pending queue.
* Packets are added to the queue when sending of FIN or ACK is failed due to
* resource unavailability. bml_btl passed to the function doesn't represents
* packet's destination, it represents BTL on which resource was freed, so only
* this BTL should be considered for resending packets */
void mca_pml_csum_process_pending_packets(mca_bml_base_btl_t* bml_btl);
/* This function retries failed PUT/GET operations on frag. When RDMA operation
* cannot be accomplished for some reason, frag is put on the rdma_pending list.
* Later the operation is retried. The destination of RDMA operation is stored
* inside the frag structure */
void mca_pml_csum_process_pending_rdma(void);
#define MCA_PML_CSUM_PROGRESS_PENDING(bml_btl) \
do { \
if(opal_list_get_size(&mca_pml_csum.pckt_pending)) \
mca_pml_csum_process_pending_packets(bml_btl); \
if(opal_list_get_size(&mca_pml_csum.recv_pending)) \
mca_pml_csum_recv_request_process_pending(); \
if(opal_list_get_size(&mca_pml_csum.send_pending)) \
mca_pml_csum_send_request_process_pending(bml_btl); \
if(opal_list_get_size(&mca_pml_csum.rdma_pending)) \
mca_pml_csum_process_pending_rdma(); \
} while (0)
/*
* Compute the total number of bytes on supplied descriptor
*/
static inline int mca_pml_csum_compute_segment_length (size_t seg_size, void *segments, size_t count,
size_t hdrlen) {
size_t i, length;
for (i = 0, length = -hdrlen ; i < count ; ++i) {
mca_btl_base_segment_t *segment =
(mca_btl_base_segment_t *)((char *) segments + i * seg_size);
length += segment->seg_len;
}
return length;
}
static inline int mca_pml_csum_compute_segment_length_base (mca_btl_base_segment_t *segments,
size_t count, size_t hdrlen) {
size_t i, length;
for (i = 0, length = -hdrlen ; i < count ; ++i) {
length += segments[i].seg_len;
}
return length;
}
/* represent BTL chosen for sending request */
struct mca_pml_csum_com_btl_t {
mca_bml_base_btl_t *bml_btl;
struct mca_mpool_base_registration_t* btl_reg;
size_t length;
};
typedef struct mca_pml_csum_com_btl_t mca_pml_csum_com_btl_t;
int mca_pml_csum_com_btl_comp(const void *v1, const void *v2);
/* Calculate what percentage of a message to send through each BTL according to
* relative weight */
static inline void
mca_pml_csum_calc_weighted_length( mca_pml_csum_com_btl_t *btls, int num_btls, size_t size,
double weight_total )
{
int i;
size_t length_left;
/* shortcut for common case for only one BTL */
if( OPAL_LIKELY(1 == num_btls) ) {
btls[0].length = size;
return;
}
/* sort BTLs according of their weights so BTLs with smaller weight will
* not hijack all of the traffic */
qsort( btls, num_btls, sizeof(mca_pml_csum_com_btl_t),
mca_pml_csum_com_btl_comp );
for(length_left = size, i = 0; i < num_btls; i++) {
mca_bml_base_btl_t* bml_btl = btls[i].bml_btl;
size_t length = 0;
if( OPAL_UNLIKELY(0 != length_left) ) {
length = (length_left > bml_btl->btl->btl_eager_limit)?
((size_t)(size * (bml_btl->btl_weight / weight_total))) :
length_left;
if(length > length_left)
length = length_left;
length_left -= length;
}
btls[i].length = length;
}
/* account for rounding errors */
btls[0].length += length_left;
}
#endif

Просмотреть файл

@ -1,98 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include "pml_csum.h"
#include "pml_csum_comm.h"
static void mca_pml_csum_comm_proc_construct(mca_pml_csum_comm_proc_t* proc)
{
proc->expected_sequence = 1;
proc->ompi_proc = NULL;
proc->send_sequence = 0;
OBJ_CONSTRUCT(&proc->frags_cant_match, opal_list_t);
OBJ_CONSTRUCT(&proc->specific_receives, opal_list_t);
OBJ_CONSTRUCT(&proc->unexpected_frags, opal_list_t);
}
static void mca_pml_csum_comm_proc_destruct(mca_pml_csum_comm_proc_t* proc)
{
OBJ_DESTRUCT(&proc->frags_cant_match);
OBJ_DESTRUCT(&proc->specific_receives);
OBJ_DESTRUCT(&proc->unexpected_frags);
}
static OBJ_CLASS_INSTANCE(
mca_pml_csum_comm_proc_t,
opal_object_t,
mca_pml_csum_comm_proc_construct,
mca_pml_csum_comm_proc_destruct);
static void mca_pml_csum_comm_construct(mca_pml_csum_comm_t* comm)
{
OBJ_CONSTRUCT(&comm->wild_receives, opal_list_t);
OBJ_CONSTRUCT(&comm->matching_lock, opal_mutex_t);
comm->recv_sequence = 0;
comm->procs = NULL;
comm->num_procs = 0;
}
static void mca_pml_csum_comm_destruct(mca_pml_csum_comm_t* comm)
{
size_t i;
for(i=0; i<comm->num_procs; i++)
OBJ_DESTRUCT((&comm->procs[i]));
if(NULL != comm->procs)
free(comm->procs);
OBJ_DESTRUCT(&comm->wild_receives);
OBJ_DESTRUCT(&comm->matching_lock);
}
OBJ_CLASS_INSTANCE(
mca_pml_csum_comm_t,
opal_object_t,
mca_pml_csum_comm_construct,
mca_pml_csum_comm_destruct);
int mca_pml_csum_comm_init_size(mca_pml_csum_comm_t* comm, size_t size)
{
size_t i;
/* send message sequence-number support - sender side */
comm->procs = (mca_pml_csum_comm_proc_t*)malloc(sizeof(mca_pml_csum_comm_proc_t)*size);
if(NULL == comm->procs) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
for(i=0; i<size; i++) {
OBJ_CONSTRUCT(comm->procs+i, mca_pml_csum_comm_proc_t);
}
comm->num_procs = size;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,79 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PML_OB1_COMM_H
#define MCA_PML_OB1_COMM_H
#include "opal/threads/mutex.h"
#include "opal/class/opal_list.h"
#include "ompi/proc/proc.h"
BEGIN_C_DECLS
struct mca_pml_csum_comm_proc_t {
opal_object_t super;
uint16_t expected_sequence; /**< send message sequence number - receiver side */
struct ompi_proc_t* ompi_proc;
#if OPAL_ENABLE_MULTI_THREADS
volatile int32_t send_sequence; /**< send side sequence number */
#else
int32_t send_sequence; /**< send side sequence number */
#endif
opal_list_t frags_cant_match; /**< out-of-order fragment queues */
opal_list_t specific_receives; /**< queues of unmatched specific receives */
opal_list_t unexpected_frags; /**< unexpected fragment queues */
};
typedef struct mca_pml_csum_comm_proc_t mca_pml_csum_comm_proc_t;
/**
* Cached on ompi_communicator_t to hold queues/state
* used by the PML<->PTL interface for matching logic.
*/
struct mca_pml_comm_t {
opal_object_t super;
#if OPAL_ENABLE_MULTI_THREADS
volatile uint32_t recv_sequence; /**< recv request sequence number - receiver side */
#else
uint32_t recv_sequence; /**< recv request sequence number - receiver side */
#endif
opal_mutex_t matching_lock; /**< matching lock */
opal_list_t wild_receives; /**< queue of unmatched wild (source process not specified) receives */
mca_pml_csum_comm_proc_t* procs;
size_t num_procs;
};
typedef struct mca_pml_comm_t mca_pml_csum_comm_t;
OBJ_CLASS_DECLARATION(mca_pml_csum_comm_t);
/**
* Initialize an instance of mca_pml_csum_comm_t based on the communicator size.
*
* @param comm Instance of mca_pml_csum_comm_t
* @param size Size of communicator
* @return OMPI_SUCCESS or error status on failure.
*/
extern int mca_pml_csum_comm_init_size(mca_pml_csum_comm_t* comm, size_t size);
END_C_DECLS
#endif

Просмотреть файл

@ -1,254 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2009 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* Copyright (c) 2009 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/mca/event/event.h"
#include "mpi.h"
#include "ompi/runtime/params.h"
#include "ompi/mca/pml/pml.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/pml/base/pml_base_bsend.h"
#include "pml_csum.h"
#include "pml_csum_hdr.h"
#include "pml_csum_sendreq.h"
#include "pml_csum_recvreq.h"
#include "pml_csum_rdmafrag.h"
#include "pml_csum_recvfrag.h"
#include "ompi/mca/bml/base/base.h"
#include "pml_csum_component.h"
#include "ompi/mca/allocator/base/base.h"
OBJ_CLASS_INSTANCE( mca_pml_csum_pckt_pending_t,
ompi_free_list_item_t,
NULL,
NULL );
static int mca_pml_csum_component_open(void);
static int mca_pml_csum_component_close(void);
static mca_pml_base_module_t*
mca_pml_csum_component_init( int* priority, bool enable_progress_threads,
bool enable_mpi_threads );
static int mca_pml_csum_component_fini(void);
int mca_pml_csum_output = 0;
mca_pml_base_component_2_0_0_t mca_pml_csum_component = {
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
MCA_PML_BASE_VERSION_2_0_0,
"csum", /* MCA component name */
OMPI_MAJOR_VERSION, /* MCA component major version */
OMPI_MINOR_VERSION, /* MCA component minor version */
OMPI_RELEASE_VERSION, /* MCA component release version */
mca_pml_csum_component_open, /* component open */
mca_pml_csum_component_close /* component close */
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
mca_pml_csum_component_init, /* component init */
mca_pml_csum_component_fini /* component finalize */
};
void *mca_pml_csum_seg_alloc( struct mca_mpool_base_module_t* mpool,
size_t* size,
mca_mpool_base_registration_t** registration);
void mca_pml_csum_seg_free( struct mca_mpool_base_module_t* mpool,
void* segment );
static inline int mca_pml_csum_param_register_int(
const char* param_name,
int default_value)
{
int param_value = default_value;
(void) mca_base_param_reg_int (&mca_pml_csum_component.pmlm_version, param_name,
NULL, false, false, default_value, &param_value);
return param_value;
}
static int mca_pml_csum_component_open(void)
{
int value;
mca_allocator_base_component_t* allocator_component;
value = mca_pml_csum_param_register_int("verbose", 0);
mca_pml_csum_output = opal_output_open(NULL);
opal_output_set_verbosity(mca_pml_csum_output, value);
mca_pml_csum.free_list_num =
mca_pml_csum_param_register_int("free_list_num", 4);
mca_pml_csum.free_list_max =
mca_pml_csum_param_register_int("free_list_max", -1);
mca_pml_csum.free_list_inc =
mca_pml_csum_param_register_int("free_list_inc", 64);
mca_pml_csum.priority =
mca_pml_csum_param_register_int("priority", 0);
mca_pml_csum.send_pipeline_depth =
mca_pml_csum_param_register_int("send_pipeline_depth", 3);
mca_pml_csum.recv_pipeline_depth =
mca_pml_csum_param_register_int("recv_pipeline_depth", 4);
mca_pml_csum.rdma_put_retries_limit =
mca_pml_csum_param_register_int("rdma_put_retries_limit", 5);
mca_pml_csum.max_rdma_per_request =
mca_pml_csum_param_register_int("max_rdma_per_request", 4);
mca_pml_csum.max_send_per_range =
mca_pml_csum_param_register_int("max_send_per_range", 4);
mca_pml_csum.unexpected_limit =
mca_pml_csum_param_register_int("unexpected_limit", 128);
mca_base_param_reg_string(&mca_pml_csum_component.pmlm_version,
"allocator",
"Name of allocator component for unexpected messages",
false, false,
"bucket",
&mca_pml_csum.allocator_name);
allocator_component = mca_allocator_component_lookup( mca_pml_csum.allocator_name );
if(NULL == allocator_component) {
opal_output(0, "mca_pml_csum_component_open: can't find allocator: %s\n", mca_pml_csum.allocator_name);
return OMPI_ERROR;
}
mca_pml_csum.allocator = allocator_component->allocator_init(true,
mca_pml_csum_seg_alloc,
mca_pml_csum_seg_free, NULL);
if(NULL == mca_pml_csum.allocator) {
opal_output(0, "mca_pml_csum_component_open: unable to initialize allocator\n");
return OMPI_ERROR;
}
mca_pml_csum.enabled = false;
return mca_bml_base_open();
}
static int mca_pml_csum_component_close(void)
{
int rc;
if (OMPI_SUCCESS != (rc = mca_bml_base_close())) {
return rc;
}
if (NULL != mca_pml_csum.allocator_name) {
free(mca_pml_csum.allocator_name);
}
opal_output_close(mca_pml_csum_output);
return OMPI_SUCCESS;
}
static mca_pml_base_module_t*
mca_pml_csum_component_init( int* priority,
bool enable_progress_threads,
bool enable_mpi_threads )
{
opal_output_verbose( 10, mca_pml_csum_output,
"in csum, my priority is %d\n", mca_pml_csum.priority);
if((*priority) > mca_pml_csum.priority) {
*priority = mca_pml_csum.priority;
return NULL;
}
*priority = mca_pml_csum.priority;
if(OMPI_SUCCESS != mca_bml_base_init( enable_progress_threads,
enable_mpi_threads)) {
return NULL;
}
/* Set this here (vs in component_open()) because
ompi_mpi_leave_pinned* may have been set after MCA params were
read (e.g., by the openib btl) */
mca_pml_csum.leave_pinned = (1 == ompi_mpi_leave_pinned);
mca_pml_csum.leave_pinned_pipeline = (int) ompi_mpi_leave_pinned_pipeline;
return &mca_pml_csum.super;
}
int mca_pml_csum_component_fini(void)
{
int rc;
/* Shutdown BML */
if(OMPI_SUCCESS != (rc = mca_bml.bml_finalize()))
return rc;
if(!mca_pml_csum.enabled)
return OMPI_SUCCESS; /* never selected.. return success.. */
mca_pml_csum.enabled = false; /* not anymore */
OBJ_DESTRUCT(&mca_pml_csum.rdma_pending);
OBJ_DESTRUCT(&mca_pml_csum.pckt_pending);
OBJ_DESTRUCT(&mca_pml_csum.recv_pending);
OBJ_DESTRUCT(&mca_pml_csum.send_pending);
OBJ_DESTRUCT(&mca_pml_csum.non_existing_communicator_pending);
OBJ_DESTRUCT(&mca_pml_csum.buffers);
OBJ_DESTRUCT(&mca_pml_csum.pending_pckts);
OBJ_DESTRUCT(&mca_pml_csum.recv_frags);
OBJ_DESTRUCT(&mca_pml_csum.rdma_frags);
OBJ_DESTRUCT(&mca_pml_csum.lock);
if(OMPI_SUCCESS != (rc = mca_pml_csum.allocator->alc_finalize(mca_pml_csum.allocator))) {
return rc;
}
#if 0
if (mca_pml_base_send_requests.fl_num_allocated !=
mca_pml_base_send_requests.super.opal_list_length) {
opal_output(0, "csum send requests: %d allocated %d returned\n",
mca_pml_base_send_requests.fl_num_allocated,
mca_pml_base_send_requests.super.opal_list_length);
}
if (mca_pml_base_recv_requests.fl_num_allocated !=
mca_pml_base_recv_requests.super.opal_list_length) {
opal_output(0, "csum recv requests: %d allocated %d returned\n",
mca_pml_base_recv_requests.fl_num_allocated,
mca_pml_base_recv_requests.super.opal_list_length);
}
#endif
return OMPI_SUCCESS;
}
void *mca_pml_csum_seg_alloc( struct mca_mpool_base_module_t* mpool,
size_t* size,
mca_mpool_base_registration_t** registration) {
return malloc(*size);
}
void mca_pml_csum_seg_free( struct mca_mpool_base_module_t* mpool,
void* segment ) {
free(segment);
}

Просмотреть файл

@ -1,33 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PML_CSUM_COMPONENT_H
#define MCA_PML_CSUM_COMPONENT_H
BEGIN_C_DECLS
/*
* PML module functions.
*/
OMPI_MODULE_DECLSPEC extern mca_pml_base_component_2_0_0_t mca_pml_csum_component;
END_C_DECLS
#endif

Просмотреть файл

@ -1,25 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include "pml_csum_endpoint.h"

Просмотреть файл

@ -1,29 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PML_CSUM_ENDPOINT_H
#define MCA_PML_CSUM_ENDPOINT_H
BEGIN_C_DECLS
END_C_DECLS
#endif

Просмотреть файл

@ -1,393 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* Copyright (c) 2009 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PML_CSUM_HEADER_H
#define MCA_PML_CSUM_HEADER_H
#include "ompi_config.h"
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
#include "opal/types.h"
#include "opal/util/arch.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/proc/proc.h"
#define MCA_PML_CSUM_HDR_TYPE_MATCH (MCA_BTL_TAG_PML + 1)
#define MCA_PML_CSUM_HDR_TYPE_RNDV (MCA_BTL_TAG_PML + 2)
#define MCA_PML_CSUM_HDR_TYPE_RGET (MCA_BTL_TAG_PML + 3)
#define MCA_PML_CSUM_HDR_TYPE_ACK (MCA_BTL_TAG_PML + 4)
#define MCA_PML_CSUM_HDR_TYPE_NACK (MCA_BTL_TAG_PML + 5)
#define MCA_PML_CSUM_HDR_TYPE_FRAG (MCA_BTL_TAG_PML + 6)
#define MCA_PML_CSUM_HDR_TYPE_GET (MCA_BTL_TAG_PML + 7)
#define MCA_PML_CSUM_HDR_TYPE_PUT (MCA_BTL_TAG_PML + 8)
#define MCA_PML_CSUM_HDR_TYPE_FIN (MCA_BTL_TAG_PML + 9)
#define MCA_PML_CSUM_HDR_FLAGS_ACK 1 /* is an ack required */
#define MCA_PML_CSUM_HDR_FLAGS_NBO 2 /* is the hdr in network byte order */
#define MCA_PML_CSUM_HDR_FLAGS_PIN 4 /* is user buffer pinned */
#define MCA_PML_CSUM_HDR_FLAGS_CONTIG 8 /* is user buffer contiguous */
#define MCA_PML_CSUM_HDR_FLAGS_NORDMA 16 /* rest will be send by copy-in-out */
/**
* Common hdr attributes - must be first element in each hdr type
*/
struct mca_pml_csum_common_hdr_t {
uint8_t hdr_type; /**< type of envelope */
uint8_t hdr_flags; /**< flags indicating how fragment should be processed */
uint16_t hdr_csum; /**< checksum over header */
};
typedef struct mca_pml_csum_common_hdr_t mca_pml_csum_common_hdr_t;
#define MCA_PML_CSUM_COMMON_HDR_NTOH(h) (h).hdr_csum = ntohs((h).hdr_csum);
#define MCA_PML_CSUM_COMMON_HDR_HTON(h) (h).hdr_csum = htons((h).hdr_csum);
/**
* Header definition for the first fragment, contains the
* attributes required to match the corresponding posted receive.
*/
struct mca_pml_csum_match_hdr_t {
mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */
uint16_t hdr_ctx; /**< communicator index */
uint16_t hdr_seq; /**< message sequence number */
int32_t hdr_src; /**< source rank */
int32_t hdr_tag; /**< user tag */
uint32_t hdr_csum; /**< checksum over data */
};
#define OMPI_PML_CSUM_MATCH_HDR_LEN 20
typedef struct mca_pml_csum_match_hdr_t mca_pml_csum_match_hdr_t;
#define MCA_PML_CSUM_MATCH_HDR_NTOH(h) \
do { \
MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \
(h).hdr_ctx = ntohs((h).hdr_ctx); \
(h).hdr_src = ntohl((h).hdr_src); \
(h).hdr_tag = ntohl((h).hdr_tag); \
(h).hdr_seq = ntohs((h).hdr_seq); \
(h).hdr_csum = ntohl((h).hdr_csum); \
} while (0)
#define MCA_PML_CSUM_MATCH_HDR_HTON(h) \
do { \
MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \
(h).hdr_ctx = htons((h).hdr_ctx); \
(h).hdr_src = htonl((h).hdr_src); \
(h).hdr_tag = htonl((h).hdr_tag); \
(h).hdr_seq = htons((h).hdr_seq); \
(h).hdr_csum = htonl((h).hdr_csum); \
} while (0)
/**
* Header definition for the first fragment when an acknowledgment
* is required. This could be the first fragment of a large message
* or a short message that requires an ack (synchronous).
*/
struct mca_pml_csum_rendezvous_hdr_t {
mca_pml_csum_match_hdr_t hdr_match;
uint64_t hdr_msg_length; /**< message length */
ompi_ptr_t hdr_src_req; /**< pointer to source request - returned in ack */
};
typedef struct mca_pml_csum_rendezvous_hdr_t mca_pml_csum_rendezvous_hdr_t;
/* Note that hdr_src_req is not put in network byte order because it
is never processed by the receiver, other than being copied into
the ack header */
#define MCA_PML_CSUM_RNDV_HDR_NTOH(h) \
do { \
MCA_PML_CSUM_MATCH_HDR_NTOH((h).hdr_match); \
(h).hdr_msg_length = ntoh64((h).hdr_msg_length); \
} while (0)
#define MCA_PML_CSUM_RNDV_HDR_HTON(h) \
do { \
MCA_PML_CSUM_MATCH_HDR_HTON((h).hdr_match); \
(h).hdr_msg_length = hton64((h).hdr_msg_length); \
} while (0)
/**
* Header definition for a combined rdma rendezvous/get
*/
struct mca_pml_csum_rget_hdr_t {
mca_pml_csum_rendezvous_hdr_t hdr_rndv;
uint32_t hdr_seg_cnt; /**< number of segments for rdma */
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t hdr_padding[4];
#endif
ompi_ptr_t hdr_des; /**< source descriptor */
};
typedef struct mca_pml_csum_rget_hdr_t mca_pml_csum_rget_hdr_t;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
#define MCA_PML_CSUM_RGET_HDR_FILL(h) \
do { \
(h).hdr_padding[0] = 0; \
(h).hdr_padding[1] = 0; \
(h).hdr_padding[2] = 0; \
(h).hdr_padding[3] = 0; \
} while(0)
#else
#define MCA_PML_CSUM_RGET_HDR_FILL(h)
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
#define MCA_PML_CSUM_RGET_HDR_NTOH(h) \
do { \
MCA_PML_CSUM_RNDV_HDR_NTOH((h).hdr_rndv); \
(h).hdr_seg_cnt = ntohl((h).hdr_seg_cnt); \
} while (0)
#define MCA_PML_CSUM_RGET_HDR_HTON(h) \
do { \
MCA_PML_CSUM_RNDV_HDR_HTON((h).hdr_rndv); \
MCA_PML_CSUM_RGET_HDR_FILL(h); \
(h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \
} while (0)
/**
* Header for subsequent fragments.
*/
struct mca_pml_csum_frag_hdr_t {
mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */
uint32_t hdr_csum;
uint64_t hdr_frag_offset; /**< offset into message */
ompi_ptr_t hdr_src_req; /**< pointer to source request */
ompi_ptr_t hdr_dst_req; /**< pointer to matched receive */
};
typedef struct mca_pml_csum_frag_hdr_t mca_pml_csum_frag_hdr_t;
#define MCA_PML_CSUM_FRAG_HDR_NTOH(h) \
do { \
MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \
(h).hdr_csum = ntohl((h).hdr_csum); \
(h).hdr_frag_offset = ntoh64((h).hdr_frag_offset); \
} while (0)
#define MCA_PML_CSUM_FRAG_HDR_HTON(h) \
do { \
MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \
(h).hdr_csum = htonl((h).hdr_csum); \
(h).hdr_frag_offset = hton64((h).hdr_frag_offset); \
} while (0)
/**
* Header used to acknowledgment outstanding fragment(s).
*/
struct mca_pml_csum_ack_hdr_t {
mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t hdr_padding[4];
#endif
ompi_ptr_t hdr_src_req; /**< source request */
ompi_ptr_t hdr_dst_req; /**< matched receive request */
uint64_t hdr_send_offset; /**< starting point of copy in/out */
};
typedef struct mca_pml_csum_ack_hdr_t mca_pml_csum_ack_hdr_t;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
#define MCA_PML_CSUM_ACK_HDR_FILL(h) \
do { \
(h).hdr_padding[0] = 0; \
(h).hdr_padding[1] = 0; \
(h).hdr_padding[2] = 0; \
(h).hdr_padding[3] = 0; \
} while (0)
#else
#define MCA_PML_CSUM_ACK_HDR_FILL(h)
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
/* Note that the request headers are not put in NBO because the
src_req is already in receiver's byte order and the dst_req is not
used by the receiver for anything other than backpointers in return
headers */
#define MCA_PML_CSUM_ACK_HDR_NTOH(h) \
do { \
MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \
(h).hdr_send_offset = ntoh64((h).hdr_send_offset); \
} while (0)
#define MCA_PML_CSUM_ACK_HDR_HTON(h) \
do { \
MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \
MCA_PML_CSUM_ACK_HDR_FILL(h); \
(h).hdr_send_offset = hton64((h).hdr_send_offset); \
} while (0)
/**
* Header used to initiate an RDMA operation.
*/
struct mca_pml_csum_rdma_hdr_t {
mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */
uint32_t hdr_seg_cnt; /**< number of segments for rdma */
ompi_ptr_t hdr_req; /**< destination request */
ompi_ptr_t hdr_des; /**< source descriptor */
uint64_t hdr_rdma_offset; /**< current offset into user buffer */
mca_btl_base_segment_t hdr_segs[1]; /**< list of segments for rdma */
};
typedef struct mca_pml_csum_rdma_hdr_t mca_pml_csum_rdma_hdr_t;
#define MCA_PML_CSUM_RDMA_HDR_NTOH(h) \
do { \
MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \
(h).hdr_seg_cnt = ntohl((h).hdr_seg_cnt); \
(h).hdr_rdma_offset = ntoh64((h).hdr_rdma_offset); \
} while (0)
#define MCA_PML_CSUM_RDMA_HDR_HTON(h) \
do { \
MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \
(h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \
(h).hdr_rdma_offset = hton64((h).hdr_rdma_offset); \
} while (0)
/**
* Header used to complete an RDMA operation.
*/
struct mca_pml_csum_fin_hdr_t {
mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */
uint32_t hdr_csum;
ompi_ptr_t hdr_des; /**< completed descriptor */
uint32_t hdr_fail; /**< RDMA operation failed */
};
typedef struct mca_pml_csum_fin_hdr_t mca_pml_csum_fin_hdr_t;
#define MCA_PML_CSUM_FIN_HDR_NTOH(h) \
do { \
MCA_PML_CSUM_COMMON_HDR_NTOH((h).hdr_common); \
(h).hdr_csum = ntohl((h).hdr_csum); \
(h).hdr_fail = ntohl((h).hdr_fail); \
} while (0)
#define MCA_PML_CSUM_FIN_HDR_HTON(h) \
do { \
MCA_PML_CSUM_COMMON_HDR_HTON((h).hdr_common); \
(h).hdr_csum = htonl((h).hdr_csum); \
(h).hdr_fail = htonl((h).hdr_fail); \
} while (0)
/**
* Union of defined hdr types.
*/
union mca_pml_csum_hdr_t {
mca_pml_csum_common_hdr_t hdr_common;
mca_pml_csum_match_hdr_t hdr_match;
mca_pml_csum_rendezvous_hdr_t hdr_rndv;
mca_pml_csum_rget_hdr_t hdr_rget;
mca_pml_csum_frag_hdr_t hdr_frag;
mca_pml_csum_ack_hdr_t hdr_ack;
mca_pml_csum_rdma_hdr_t hdr_rdma;
mca_pml_csum_fin_hdr_t hdr_fin;
};
typedef union mca_pml_csum_hdr_t mca_pml_csum_hdr_t;
#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
static inline __opal_attribute_always_inline__ void
csum_hdr_ntoh(mca_pml_csum_hdr_t *hdr, const uint8_t hdr_type)
{
if(!(hdr->hdr_common.hdr_flags & MCA_PML_CSUM_HDR_FLAGS_NBO))
return;
switch(hdr_type) {
case MCA_PML_CSUM_HDR_TYPE_MATCH:
MCA_PML_CSUM_MATCH_HDR_NTOH(hdr->hdr_match);
break;
case MCA_PML_CSUM_HDR_TYPE_RNDV:
MCA_PML_CSUM_RNDV_HDR_NTOH(hdr->hdr_rndv);
break;
case MCA_PML_CSUM_HDR_TYPE_RGET:
MCA_PML_CSUM_RGET_HDR_NTOH(hdr->hdr_rget);
break;
case MCA_PML_CSUM_HDR_TYPE_ACK:
MCA_PML_CSUM_ACK_HDR_NTOH(hdr->hdr_ack);
break;
case MCA_PML_CSUM_HDR_TYPE_FRAG:
MCA_PML_CSUM_FRAG_HDR_NTOH(hdr->hdr_frag);
break;
case MCA_PML_CSUM_HDR_TYPE_PUT:
MCA_PML_CSUM_RDMA_HDR_NTOH(hdr->hdr_rdma);
break;
case MCA_PML_CSUM_HDR_TYPE_FIN:
MCA_PML_CSUM_FIN_HDR_NTOH(hdr->hdr_fin);
break;
default:
assert(0);
break;
}
}
#else
#define csum_hdr_ntoh(h, t) do{}while(0)
#endif
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
#define csum_hdr_hton(h, t, p) \
csum_hdr_hton_intr((mca_pml_csum_hdr_t*)h, t, p)
static inline __opal_attribute_always_inline__ void
csum_hdr_hton_intr(mca_pml_csum_hdr_t *hdr, const uint8_t hdr_type,
const ompi_proc_t *proc)
{
#ifdef WORDS_BIGENDIAN
hdr->hdr_common.hdr_flags |= MCA_PML_CSUM_HDR_FLAGS_NBO;
#else
if(!(proc->proc_arch & OPAL_ARCH_ISBIGENDIAN))
return;
hdr->hdr_common.hdr_flags |= MCA_PML_CSUM_HDR_FLAGS_NBO;
switch(hdr_type) {
case MCA_PML_CSUM_HDR_TYPE_MATCH:
MCA_PML_CSUM_MATCH_HDR_HTON(hdr->hdr_match);
break;
case MCA_PML_CSUM_HDR_TYPE_RNDV:
MCA_PML_CSUM_RNDV_HDR_HTON(hdr->hdr_rndv);
break;
case MCA_PML_CSUM_HDR_TYPE_RGET:
MCA_PML_CSUM_RGET_HDR_HTON(hdr->hdr_rget);
break;
case MCA_PML_CSUM_HDR_TYPE_ACK:
MCA_PML_CSUM_ACK_HDR_HTON(hdr->hdr_ack);
break;
case MCA_PML_CSUM_HDR_TYPE_FRAG:
MCA_PML_CSUM_FRAG_HDR_HTON(hdr->hdr_frag);
break;
case MCA_PML_CSUM_HDR_TYPE_PUT:
MCA_PML_CSUM_RDMA_HDR_HTON(hdr->hdr_rdma);
break;
case MCA_PML_CSUM_HDR_TYPE_FIN:
MCA_PML_CSUM_FIN_HDR_HTON(hdr->hdr_fin);
break;
default:
assert(0);
break;
}
#endif
}
#else
#define csum_hdr_hton(h, t, p) do{}while(0)
#endif
#endif

Просмотреть файл

@ -1,98 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/request/request.h"
#include "pml_csum_recvreq.h"
int mca_pml_csum_iprobe(int src,
int tag,
struct ompi_communicator_t *comm,
int *matched, ompi_status_public_t * status)
{
int rc = OMPI_SUCCESS;
mca_pml_csum_recv_request_t recvreq;
OBJ_CONSTRUCT( &recvreq, mca_pml_csum_recv_request_t );
recvreq.req_recv.req_base.req_ompi.req_type = OMPI_REQUEST_PML;
recvreq.req_recv.req_base.req_type = MCA_PML_REQUEST_IPROBE;
MCA_PML_CSUM_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char.dt, src, tag, comm, true);
MCA_PML_CSUM_RECV_REQUEST_START(&recvreq);
if( recvreq.req_recv.req_base.req_ompi.req_complete == true ) {
if( NULL != status ) {
*status = recvreq.req_recv.req_base.req_ompi.req_status;
}
*matched = 1;
} else {
*matched = 0;
opal_progress();
}
MCA_PML_BASE_RECV_REQUEST_FINI( &recvreq.req_recv );
return rc;
}
int mca_pml_csum_probe(int src,
int tag,
struct ompi_communicator_t *comm,
ompi_status_public_t * status)
{
mca_pml_csum_recv_request_t recvreq;
OBJ_CONSTRUCT( &recvreq, mca_pml_csum_recv_request_t );
recvreq.req_recv.req_base.req_ompi.req_type = OMPI_REQUEST_PML;
recvreq.req_recv.req_base.req_type = MCA_PML_REQUEST_PROBE;
MCA_PML_CSUM_RECV_REQUEST_INIT(&recvreq, NULL, 0, &ompi_mpi_char.dt, src, tag, comm, true);
MCA_PML_CSUM_RECV_REQUEST_START(&recvreq);
ompi_request_wait_completion(&recvreq.req_recv.req_base.req_ompi);
if (NULL != status) {
*status = recvreq.req_recv.req_base.req_ompi.req_status;
}
MCA_PML_BASE_RECV_REQUEST_FINI( &recvreq.req_recv );
return OMPI_SUCCESS;
}
int
mca_pml_csum_improbe(int dst,
int tag,
struct ompi_communicator_t* comm,
int *matched,
struct ompi_message_t **message,
ompi_status_public_t* status)
{
return OMPI_ERR_NOT_SUPPORTED;
}
int
mca_pml_csum_mprobe(int dst,
int tag,
struct ompi_communicator_t* comm,
struct ompi_message_t **message,
ompi_status_public_t* status)
{
return OMPI_ERR_NOT_SUPPORTED;
}

Просмотреть файл

@ -1,135 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/request/request.h"
#include "pml_csum_recvreq.h"
#include "ompi/peruse/peruse-internal.h"
int mca_pml_csum_irecv_init(void *addr,
size_t count,
ompi_datatype_t * datatype,
int src,
int tag,
struct ompi_communicator_t *comm,
struct ompi_request_t **request)
{
int rc;
mca_pml_csum_recv_request_t *recvreq;
MCA_PML_CSUM_RECV_REQUEST_ALLOC(recvreq, rc);
if (NULL == recvreq)
return rc;
MCA_PML_CSUM_RECV_REQUEST_INIT(recvreq,
addr,
count, datatype, src, tag, comm, true);
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
&((recvreq)->req_recv.req_base),
PERUSE_RECV);
*request = (ompi_request_t *) recvreq;
return OMPI_SUCCESS;
}
int mca_pml_csum_irecv(void *addr,
size_t count,
ompi_datatype_t * datatype,
int src,
int tag,
struct ompi_communicator_t *comm,
struct ompi_request_t **request)
{
int rc;
mca_pml_csum_recv_request_t *recvreq;
MCA_PML_CSUM_RECV_REQUEST_ALLOC(recvreq, rc);
if (NULL == recvreq)
return rc;
MCA_PML_CSUM_RECV_REQUEST_INIT(recvreq,
addr,
count, datatype, src, tag, comm, false);
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
&((recvreq)->req_recv.req_base),
PERUSE_RECV);
MCA_PML_CSUM_RECV_REQUEST_START(recvreq);
*request = (ompi_request_t *) recvreq;
return OMPI_SUCCESS;
}
int mca_pml_csum_recv(void *addr,
size_t count,
ompi_datatype_t * datatype,
int src,
int tag,
struct ompi_communicator_t *comm,
ompi_status_public_t * status)
{
int rc;
mca_pml_csum_recv_request_t *recvreq;
MCA_PML_CSUM_RECV_REQUEST_ALLOC(recvreq, rc);
if (NULL == recvreq)
return rc;
MCA_PML_CSUM_RECV_REQUEST_INIT(recvreq,
addr,
count, datatype, src, tag, comm, false);
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
&((recvreq)->req_recv.req_base),
PERUSE_RECV);
MCA_PML_CSUM_RECV_REQUEST_START(recvreq);
ompi_request_wait_completion(&recvreq->req_recv.req_base.req_ompi);
if (NULL != status) { /* return status */
*status = recvreq->req_recv.req_base.req_ompi.req_status;
}
rc = recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR;
ompi_request_free( (ompi_request_t**)&recvreq );
return rc;
}
int
mca_pml_csum_imrecv(void *buf,
size_t count,
ompi_datatype_t *datatype,
struct ompi_message_t **message,
struct ompi_request_t **request)
{
return OMPI_ERR_NOT_SUPPORTED;
}
int
mca_pml_csum_mrecv(void *buf,
size_t count,
ompi_datatype_t *datatype,
struct ompi_message_t **message,
ompi_status_public_t* status)
{
return OMPI_ERR_NOT_SUPPORTED;
}

Просмотреть файл

@ -1,130 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "pml_csum.h"
#include "pml_csum_sendreq.h"
#include "pml_csum_recvreq.h"
#include "ompi/peruse/peruse-internal.h"
int mca_pml_csum_isend_init(void *buf,
size_t count,
ompi_datatype_t * datatype,
int dst,
int tag,
mca_pml_base_send_mode_t sendmode,
ompi_communicator_t * comm,
ompi_request_t ** request)
{
int rc;
mca_pml_csum_send_request_t *sendreq = NULL;
MCA_PML_CSUM_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc);
if (rc != OMPI_SUCCESS)
return rc;
MCA_PML_CSUM_SEND_REQUEST_INIT(sendreq,
buf,
count,
datatype,
dst, tag,
comm, sendmode, true);
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
&(sendreq)->req_send.req_base,
PERUSE_SEND);
*request = (ompi_request_t *) sendreq;
return OMPI_SUCCESS;
}
int mca_pml_csum_isend(void *buf,
size_t count,
ompi_datatype_t * datatype,
int dst,
int tag,
mca_pml_base_send_mode_t sendmode,
ompi_communicator_t * comm,
ompi_request_t ** request)
{
int rc;
mca_pml_csum_send_request_t *sendreq = NULL;
MCA_PML_CSUM_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc);
if (rc != OMPI_SUCCESS)
return rc;
MCA_PML_CSUM_SEND_REQUEST_INIT(sendreq,
buf,
count,
datatype,
dst, tag,
comm, sendmode, false);
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
&(sendreq)->req_send.req_base,
PERUSE_SEND);
MCA_PML_CSUM_SEND_REQUEST_START(sendreq, rc);
*request = (ompi_request_t *) sendreq;
return rc;
}
int mca_pml_csum_send(void *buf,
size_t count,
ompi_datatype_t * datatype,
int dst,
int tag,
mca_pml_base_send_mode_t sendmode,
ompi_communicator_t * comm)
{
int rc;
mca_pml_csum_send_request_t *sendreq;
MCA_PML_CSUM_SEND_REQUEST_ALLOC(comm, dst, sendreq, rc);
if (rc != OMPI_SUCCESS)
return rc;
MCA_PML_CSUM_SEND_REQUEST_INIT(sendreq,
buf,
count,
datatype,
dst, tag,
comm, sendmode, false);
PERUSE_TRACE_COMM_EVENT (PERUSE_COMM_REQ_ACTIVATE,
&(sendreq)->req_send.req_base,
PERUSE_SEND);
MCA_PML_CSUM_SEND_REQUEST_START(sendreq, rc);
if (rc != OMPI_SUCCESS) {
MCA_PML_CSUM_SEND_REQUEST_RETURN( sendreq );
return rc;
}
ompi_request_wait_completion(&sendreq->req_send.req_base.req_ompi);
rc = sendreq->req_send.req_base.req_ompi.req_status.MPI_ERROR;
ompi_request_free( (ompi_request_t**)&sendreq );
return rc;
}

Просмотреть файл

@ -1,77 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2008 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "pml_csum.h"
#include "pml_csum_sendreq.h"
#include "ompi/mca/bml/base/base.h"
int mca_pml_csum_progress(void)
{
int i, queue_length = opal_list_get_size(&mca_pml_csum.send_pending);
int j, completed_requests = 0;
bool send_succedded;
if( OPAL_LIKELY(0 == queue_length) )
return 0;
for( i = 0; i < queue_length; i++ ) {
mca_pml_csum_send_pending_t pending_type = MCA_PML_CSUM_SEND_PENDING_NONE;
mca_pml_csum_send_request_t* sendreq;
mca_bml_base_endpoint_t* endpoint;
sendreq = get_request_from_send_pending(&pending_type);
if(OPAL_UNLIKELY(NULL == sendreq))
break;
switch(pending_type) {
case MCA_PML_CSUM_SEND_PENDING_NONE:
assert(0);
return 0;
case MCA_PML_CSUM_SEND_PENDING_SCHEDULE:
if( mca_pml_csum_send_request_schedule_exclusive(sendreq) ==
OMPI_ERR_OUT_OF_RESOURCE ) {
return 0;
}
completed_requests++;
break;
case MCA_PML_CSUM_SEND_PENDING_START:
endpoint = sendreq->req_endpoint;
send_succedded = false;
for(j = 0; j < (int)mca_bml_base_btl_array_get_size(&endpoint->btl_eager); j++) {
mca_bml_base_btl_t* bml_btl;
int rc;
/* select a btl */
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
rc = mca_pml_csum_send_request_start_btl(sendreq, bml_btl);
if( OPAL_LIKELY(OMPI_SUCCESS == rc) ) {
send_succedded = true;
completed_requests++;
break;
}
}
if( false == send_succedded ) {
add_request_to_send_pending(sendreq, MCA_PML_CSUM_SEND_PENDING_START, true);
}
}
}
return completed_requests;
}

Просмотреть файл

@ -1,118 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
#include "ompi_config.h"
#include "ompi/constants.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/bml/bml.h"
#include "ompi/mca/mpool/mpool.h"
#include "pml_csum.h"
#include "pml_csum_rdma.h"
/* Use this registration if no registration needed for a BTL instead of NULL.
* This will help other code to distinguish case when memory is not registered
* from case when registration is not needed */
static mca_mpool_base_registration_t pml_csum_dummy_reg;
/*
* Check to see if memory is registered or can be registered. Build a
* set of registrations on the request.
*/
size_t mca_pml_csum_rdma_btls(
mca_bml_base_endpoint_t* bml_endpoint,
unsigned char* base,
size_t size,
mca_pml_csum_com_btl_t* rdma_btls)
{
int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
double weight_total = 0;
int num_btls_used = 0, n;
/* shortcut when there are no rdma capable btls */
if(num_btls == 0) {
return 0;
}
/* check to see if memory is registered */
for(n = 0; n < num_btls && num_btls_used < mca_pml_csum.max_rdma_per_request;
n++) {
mca_bml_base_btl_t* bml_btl =
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma,
(bml_endpoint->btl_rdma_index + n) % num_btls);
mca_mpool_base_registration_t* reg = &pml_csum_dummy_reg;
mca_mpool_base_module_t *btl_mpool = bml_btl->btl->btl_mpool;
if( NULL != btl_mpool ) {
if(!mca_pml_csum.leave_pinned) {
/* look through existing registrations */
btl_mpool->mpool_find(btl_mpool, base, size, &reg);
} else {
/* register the memory */
btl_mpool->mpool_register(btl_mpool, base, size, 0, &reg);
}
if(NULL == reg)
continue;
}
rdma_btls[num_btls_used].bml_btl = bml_btl;
rdma_btls[num_btls_used].btl_reg = reg;
weight_total += bml_btl->btl_weight;
num_btls_used++;
}
/* if we don't use leave_pinned and all BTLs that already have this memory
* registered amount to less then half of available bandwidth - fall back to
* pipeline protocol */
if(0 == num_btls_used || (!mca_pml_csum.leave_pinned && weight_total < 0.5))
return 0;
mca_pml_csum_calc_weighted_length(rdma_btls, num_btls_used, size,
weight_total);
bml_endpoint->btl_rdma_index = (bml_endpoint->btl_rdma_index + 1) % num_btls;
return num_btls_used;
}
size_t mca_pml_csum_rdma_pipeline_btls( mca_bml_base_endpoint_t* bml_endpoint,
size_t size,
mca_pml_csum_com_btl_t* rdma_btls )
{
int i, num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
double weight_total = 0;
for(i = 0; i < num_btls && i < mca_pml_csum.max_rdma_per_request; i++) {
rdma_btls[i].bml_btl =
mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma);
if(NULL != rdma_btls[i].bml_btl->btl->btl_mpool)
rdma_btls[i].btl_reg = NULL;
else
rdma_btls[i].btl_reg = &pml_csum_dummy_reg;
weight_total += rdma_btls[i].bml_btl->btl_weight;
}
mca_pml_csum_calc_weighted_length(rdma_btls, i, size, weight_total);
return i;
}

Просмотреть файл

@ -1,41 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PML_CSUM_RDMA_H
#define MCA_PML_CSUM_RDMA_H
struct mca_bml_base_endpoint_t;
/*
* Of the set of available btls that support RDMA,
* find those that already have registrations - or
* register if required (for leave_pinned option)
*/
size_t mca_pml_csum_rdma_btls(struct mca_bml_base_endpoint_t* endpoint,
unsigned char* base, size_t size, struct mca_pml_csum_com_btl_t* btls);
/* Choose RDMA BTLs to use for sending of a request by pipeline protocol.
* Calculate number of bytes to send through each BTL according to available
* bandwidth */
size_t mca_pml_csum_rdma_pipeline_btls(struct mca_bml_base_endpoint_t* endpoint,
size_t size, mca_pml_csum_com_btl_t* rdma_btls);
#endif

Просмотреть файл

@ -1,29 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "pml_csum.h"
#include "pml_csum_rdmafrag.h"
OBJ_CLASS_INSTANCE(
mca_pml_csum_rdma_frag_t,
ompi_free_list_item_t,
NULL,
NULL);

Просмотреть файл

@ -1,71 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PML_CSUM_RDMAFRAG_H
#define MCA_PML_CSUM_RDMAFRAG_H
#include "ompi/mca/btl/btl.h"
#include "pml_csum_hdr.h"
BEGIN_C_DECLS
typedef enum {
MCA_PML_CSUM_RDMA_PUT,
MCA_PML_CSUM_RDMA_GET
} mca_pml_csum_rdma_state_t;
struct mca_pml_csum_rdma_frag_t {
ompi_free_list_item_t super;
mca_bml_base_btl_t* rdma_bml;
mca_pml_csum_hdr_t rdma_hdr;
mca_pml_csum_rdma_state_t rdma_state;
size_t rdma_length;
uint8_t rdma_segs[MCA_BTL_SEG_MAX_SIZE * MCA_BTL_DES_MAX_SEGMENTS];
void *rdma_req;
struct mca_bml_base_endpoint_t* rdma_ep;
opal_convertor_t convertor;
mca_mpool_base_registration_t* reg;
uint32_t retries;
};
typedef struct mca_pml_csum_rdma_frag_t mca_pml_csum_rdma_frag_t;
OBJ_CLASS_DECLARATION(mca_pml_csum_rdma_frag_t);
#define MCA_PML_CSUM_RDMA_FRAG_ALLOC(frag,rc) \
do { \
ompi_free_list_item_t* item; \
OMPI_FREE_LIST_WAIT(&mca_pml_csum.rdma_frags, item, rc); \
frag = (mca_pml_csum_rdma_frag_t*)item; \
} while(0)
#define MCA_PML_CSUM_RDMA_FRAG_RETURN(frag) \
do { \
/* return fragment */ \
OMPI_FREE_LIST_RETURN(&mca_pml_csum.rdma_frags, \
(ompi_free_list_item_t*)frag); \
} while(0)
END_C_DECLS
#endif

Просмотреть файл

@ -1,841 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2006-2008 University of Houston. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* Copyright (c) 2009 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#include "ompi_config.h"
#include "opal/class/opal_list.h"
#include "opal/util/crc.h"
#include "opal/threads/mutex.h"
#include "opal/prefetch.h"
#include "opal/util/output.h"
#include "ompi/constants.h"
#include "ompi/communicator/communicator.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/pml/base/base.h"
#include "ompi/peruse/peruse-internal.h"
#include "ompi/memchecker.h"
#include "pml_csum.h"
#include "pml_csum_comm.h"
#include "pml_csum_recvfrag.h"
#include "pml_csum_recvreq.h"
#include "pml_csum_sendreq.h"
#include "pml_csum_hdr.h"
OBJ_CLASS_INSTANCE( mca_pml_csum_buffer_t,
ompi_free_list_item_t,
NULL,
NULL );
OBJ_CLASS_INSTANCE( mca_pml_csum_recv_frag_t,
opal_list_item_t,
NULL,
NULL );
/**
* Static functions.
*/
/**
* Dump data elements that caused a checksum violation
*/
static void dump_csum_error_data(mca_btl_base_segment_t* segments, size_t num_segments)
{
size_t i, j;
uint8_t *data;
printf("CHECKSUM ERROR DATA\n");
for (i = 0; i < num_segments; ++i) {
printf("Segment %lu", (unsigned long)i);
data = (uint8_t*)segments[i].seg_addr.pval;
for (j=0; j < segments[i].seg_len; j++) {
if (0 == (j % 40)) {
printf("\n");
}
printf("%02x ", data[j]);
};
}
printf("\nEND CHECKSUM ERROR DATA\n\n");
}
/**
* Append a unexpected descriptor to a queue. This function will allocate and
* initialize the fragment (if necessary) and then will add it to the specified
* queue. The allocated fragment is not returned to the caller.
*/
static void
append_frag_to_list(opal_list_t *queue, mca_btl_base_module_t *btl,
mca_pml_csum_match_hdr_t *hdr, mca_btl_base_segment_t* segments,
size_t num_segments, mca_pml_csum_recv_frag_t* frag)
{
int rc;
if(NULL == frag) {
MCA_PML_CSUM_RECV_FRAG_ALLOC(frag, rc);
MCA_PML_CSUM_RECV_FRAG_INIT(frag, hdr, segments, num_segments, btl);
}
opal_list_append(queue, (opal_list_item_t*)frag);
}
/**
* Match incoming recv_frags against posted receives.
* Supports out of order delivery.
*
* @param frag_header (IN) Header of received recv_frag.
* @param frag_desc (IN) Received recv_frag descriptor.
* @param match_made (OUT) Flag indicating wether a match was made.
* @param additional_matches (OUT) List of additional matches
* @return OMPI_SUCCESS or error status on failure.
*/
static int mca_pml_csum_recv_frag_match( mca_btl_base_module_t *btl,
mca_pml_csum_match_hdr_t *hdr,
mca_btl_base_segment_t* segments,
size_t num_segments,
int type);
static mca_pml_csum_recv_request_t*
match_one(mca_btl_base_module_t *btl,
mca_pml_csum_match_hdr_t *hdr, mca_btl_base_segment_t* segments,
size_t num_segments, ompi_communicator_t *comm_ptr,
mca_pml_csum_comm_proc_t *proc,
mca_pml_csum_recv_frag_t* frag);
void mca_pml_csum_recv_frag_callback_match(mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* des,
void* cbdata )
{
mca_btl_base_segment_t* segments = des->des_dst;
mca_pml_csum_match_hdr_t* hdr = (mca_pml_csum_match_hdr_t*)segments->seg_addr.pval;
ompi_communicator_t *comm_ptr;
mca_pml_csum_recv_request_t *match = NULL;
mca_pml_csum_comm_t *comm;
mca_pml_csum_comm_proc_t *proc;
size_t num_segments = des->des_dst_cnt;
size_t bytes_received = 0;
uint16_t csum_received, csum=0;
uint32_t csum_data;
assert(num_segments <= MCA_BTL_DES_MAX_SEGMENTS);
if( OPAL_UNLIKELY(segments->seg_len < OMPI_PML_CSUM_MATCH_HDR_LEN) ) {
return;
}
csum_hdr_ntoh(((mca_pml_csum_hdr_t*) hdr), MCA_PML_CSUM_HDR_TYPE_MATCH);
csum_received = hdr->hdr_common.hdr_csum;
hdr->hdr_common.hdr_csum = 0;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
hdr->hdr_common.hdr_flags &= ~MCA_PML_CSUM_HDR_FLAGS_NBO;
#endif
csum = opal_csum16(hdr, OMPI_PML_CSUM_MATCH_HDR_LEN);
hdr->hdr_common.hdr_csum = csum_received;
OPAL_OUTPUT_VERBOSE((5, mca_pml_base_output,
"%s:%s:%d common_hdr: %02x:%02x:%04x match_hdr: %04x:%04x:%08x:%08x:%08x",
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__,
hdr->hdr_common.hdr_type, hdr->hdr_common.hdr_flags, hdr->hdr_common.hdr_csum,
hdr->hdr_ctx, hdr->hdr_seq, hdr->hdr_src, hdr->hdr_tag, hdr->hdr_csum));
if (csum_received != csum) {
opal_output(0, "%s:%s:%d: Invalid \'match header\' - received csum:0x%04x != computed csum:0x%04x\n",
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
dump_csum_error_data(segments, 1);
ompi_rte_abort(-1,NULL);
}
/* communicator pointer */
comm_ptr = ompi_comm_lookup(hdr->hdr_ctx);
if(OPAL_UNLIKELY(NULL == comm_ptr)) {
/* This is a special case. A message for a not yet existing
* communicator can happens. Instead of doing a matching we
* will temporarily add it the a pending queue in the PML.
* Later on, when the communicator is completely instantiated,
* this pending queue will be searched and all matching fragments
* moved to the right communicator.
*/
append_frag_to_list( &mca_pml_csum.non_existing_communicator_pending,
btl, hdr, segments, num_segments, NULL );
return;
}
comm = (mca_pml_csum_comm_t *)comm_ptr->c_pml_comm;
/* source sequence number */
proc = &comm->procs[hdr->hdr_src];
/* We generate the MSG_ARRIVED event as soon as the PML is aware
* of a matching fragment arrival. Independing if it is received
* on the correct order or not. This will allow the tools to
* figure out if the messages are not received in the correct
* order (if multiple network interfaces).
*/
PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm_ptr,
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
/* get next expected message sequence number - if threaded
* run, lock to make sure that if another thread is processing
* a frag from the same message a match is made only once.
* Also, this prevents other posted receives (for a pair of
* end points) from being processed, and potentially "loosing"
* the fragment.
*/
OPAL_THREAD_LOCK(&comm->matching_lock);
/* get sequence number of next message that can be processed */
if(OPAL_UNLIKELY((((uint16_t) hdr->hdr_seq) != ((uint16_t) proc->expected_sequence)) ||
(opal_list_get_size(&proc->frags_cant_match) > 0 ))) {
goto slow_path;
}
/* This is the sequence number we were expecting, so we can try
* matching it to already posted receives.
*/
/* We're now expecting the next sequence number. */
proc->expected_sequence++;
/* We generate the SEARCH_POSTED_QUEUE only when the message is
* received in the correct sequence. Otherwise, we delay the event
* generation until we reach the correct sequence number.
*/
PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_BEGIN, comm_ptr,
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
match = match_one(btl, hdr, segments, num_segments, comm_ptr, proc, NULL);
/* The match is over. We generate the SEARCH_POSTED_Q_END here,
* before going into the mca_pml_csum_check_cantmatch_for_match so
* we can make a difference for the searching time for all
* messages.
*/
PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr,
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
/* release matching lock before processing fragment */
OPAL_THREAD_UNLOCK(&comm->matching_lock);
if(OPAL_LIKELY(match)) {
bytes_received = segments->seg_len - OMPI_PML_CSUM_MATCH_HDR_LEN;
match->req_recv.req_bytes_packed = bytes_received;
MCA_PML_CSUM_RECV_REQUEST_MATCHED(match, hdr);
if(match->req_bytes_expected > 0) {
struct iovec iov[MCA_BTL_DES_MAX_SEGMENTS];
uint32_t iov_count = 1;
/*
* Make user buffer accessable(defined) before unpacking.
*/
MEMCHECKER(
memchecker_call(&opal_memchecker_base_mem_defined,
match->req_recv.req_base.req_addr,
match->req_recv.req_base.req_count,
match->req_recv.req_base.req_datatype);
);
iov[0].iov_len = bytes_received;
iov[0].iov_base = (IOVBASE_TYPE*)((unsigned char*)segments->seg_addr.pval +
OMPI_PML_CSUM_MATCH_HDR_LEN);
while (iov_count < num_segments) {
bytes_received += segments[iov_count].seg_len;
iov[iov_count].iov_len = segments[iov_count].seg_len;
iov[iov_count].iov_base = (IOVBASE_TYPE*)((unsigned char*)segments[iov_count].seg_addr.pval);
iov_count++;
}
opal_convertor_unpack( &match->req_recv.req_base.req_convertor,
iov,
&iov_count,
&bytes_received );
match->req_bytes_received = bytes_received;
/*
* Unpacking finished, make the user buffer unaccessable again.
*/
MEMCHECKER(
memchecker_call(&opal_memchecker_base_mem_noaccess,
match->req_recv.req_base.req_addr,
match->req_recv.req_base.req_count,
match->req_recv.req_base.req_datatype);
);
}
if (bytes_received > 0) {
csum_data = match->req_recv.req_base.req_convertor.checksum;
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
"%s Received \'match\' with data csum:0x%x, header csum:0x%04x, size:%lu\n",
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), hdr->hdr_csum, csum_received, (unsigned long)bytes_received));
if (csum_data != hdr->hdr_csum) {
opal_output(0, "%s:%s:%d: Invalid \'match data\' - received csum:0x%x != computed csum:0x%x\n",
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, hdr->hdr_csum, csum_data);
dump_csum_error_data(segments, num_segments);
ompi_rte_abort(-1,NULL);
}
}
/* no need to check if complete we know we are.. */
/* don't need a rmb as that is for checking */
recv_request_pml_complete(match);
}
return;
slow_path:
OPAL_THREAD_UNLOCK(&comm->matching_lock);
mca_pml_csum_recv_frag_match(btl, hdr, segments,
num_segments, MCA_PML_CSUM_HDR_TYPE_MATCH);
}
void mca_pml_csum_recv_frag_callback_rndv(mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* des,
void* cbdata )
{
mca_btl_base_segment_t* segments = des->des_dst;
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
uint16_t csum_received, csum;
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
return;
}
csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_RNDV);
csum_received = hdr->hdr_common.hdr_csum;
hdr->hdr_common.hdr_csum = 0;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
hdr->hdr_common.hdr_flags &= ~MCA_PML_CSUM_HDR_FLAGS_NBO;
#endif
csum = opal_csum16(hdr, sizeof(mca_pml_csum_rendezvous_hdr_t));
hdr->hdr_common.hdr_csum = csum_received;
if (csum_received != csum) {
opal_output(0, "%s:%s:%d: Invalid \'rndv header\' - received csum:0x%04x != computed csum:0x%04x\n",
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
dump_csum_error_data(segments, 1);
ompi_rte_abort(-1,NULL);
}
mca_pml_csum_recv_frag_match(btl, &hdr->hdr_match, segments,
des->des_dst_cnt, MCA_PML_CSUM_HDR_TYPE_RNDV);
return;
}
void mca_pml_csum_recv_frag_callback_rget(mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* des,
void* cbdata )
{
mca_btl_base_segment_t* segments = des->des_dst;
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
return;
}
csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_RGET);
mca_pml_csum_recv_frag_match(btl, &hdr->hdr_match, segments,
des->des_dst_cnt, MCA_PML_CSUM_HDR_TYPE_RGET);
return;
}
void mca_pml_csum_recv_frag_callback_ack(mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* des,
void* cbdata )
{
mca_btl_base_segment_t* segments = des->des_dst;
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
mca_pml_csum_send_request_t* sendreq;
uint16_t csum_received, csum;
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
return;
}
csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_ACK);
csum_received = hdr->hdr_common.hdr_csum;
hdr->hdr_common.hdr_csum = 0;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
hdr->hdr_common.hdr_flags &= ~MCA_PML_CSUM_HDR_FLAGS_NBO;
#endif
csum = opal_csum16(hdr, sizeof(mca_pml_csum_ack_hdr_t));
hdr->hdr_common.hdr_csum = csum_received;
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
"%s Received \'ACK\' with header csum:0x%04x\n", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), csum));
if (csum_received != csum) {
opal_output(0, "%s:%s:%d: Invalid \'ACK header\' - received csum:0x%04x != computed csum:0x%04x\n",
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
dump_csum_error_data(segments, 1);
ompi_rte_abort(-1,NULL);
}
sendreq = (mca_pml_csum_send_request_t*)hdr->hdr_ack.hdr_src_req.pval;
sendreq->req_recv = hdr->hdr_ack.hdr_dst_req;
/* if the request should be delivered entirely by copy in/out
* then throttle sends */
if(hdr->hdr_common.hdr_flags & MCA_PML_CSUM_HDR_FLAGS_NORDMA)
sendreq->req_throttle_sends = true;
mca_pml_csum_send_request_copy_in_out(sendreq,
hdr->hdr_ack.hdr_send_offset,
sendreq->req_send.req_bytes_packed -
hdr->hdr_ack.hdr_send_offset);
if (sendreq->req_state != 0) {
/* Typical receipt of an ACK message causes req_state to be
* decremented. However, a send request that started as an
* RGET request can become a RNDV. For example, when the
* receiver determines that its receive buffer is not
* contiguous and therefore cannot support the RGET
* protocol. A send request that started with the RGET
* protocol has req_state == 0 and as such should not be
* decremented.
*/
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
}
if(send_request_pml_complete_check(sendreq) == false)
mca_pml_csum_send_request_schedule(sendreq);
return;
}
void mca_pml_csum_recv_frag_callback_frag(mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* des,
void* cbdata ) {
mca_btl_base_segment_t* segments = des->des_dst;
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
mca_pml_csum_recv_request_t* recvreq;
uint16_t csum_received, csum;
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
return;
}
csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_FRAG);
csum_received = hdr->hdr_common.hdr_csum;
hdr->hdr_common.hdr_csum = 0;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
hdr->hdr_common.hdr_flags &= ~MCA_PML_CSUM_HDR_FLAGS_NBO;
#endif
csum = opal_csum16(hdr, sizeof(mca_pml_csum_frag_hdr_t));
hdr->hdr_common.hdr_csum = csum_received;
if(csum_received != csum) {
opal_output(0, "%s:%s:%d: Invalid \'frag header\' - received csum:0x%04x != computed csum:0x%04x\n",
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
dump_csum_error_data(segments, 1);
ompi_rte_abort(-1,NULL);
}
recvreq = (mca_pml_csum_recv_request_t*)hdr->hdr_frag.hdr_dst_req.pval;
mca_pml_csum_recv_request_progress_frag(recvreq,btl,segments,des->des_dst_cnt);
return;
}
void mca_pml_csum_recv_frag_callback_put(mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* des,
void* cbdata ) {
mca_btl_base_segment_t* segments = des->des_dst;
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
mca_pml_csum_send_request_t* sendreq;
uint16_t csum_received, csum;
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
return;
}
csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_PUT);
csum_received = hdr->hdr_common.hdr_csum;
hdr->hdr_common.hdr_csum = 0;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
hdr->hdr_common.hdr_flags &= ~MCA_PML_CSUM_HDR_FLAGS_NBO;
#endif
csum = opal_csum16(hdr, sizeof(mca_pml_csum_rdma_hdr_t));
hdr->hdr_common.hdr_csum = csum_received;
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
"%s Received \'PUT\' with header csum:0x%04x\n", OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), csum));
if(csum_received != csum) {
opal_output(0, "%s:%s:%d: Invalid \'PUT header\' - received csum:0x%04x != computed csum:0x%04x\n",
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
dump_csum_error_data(segments, 1);
ompi_rte_abort(-1,NULL);
}
sendreq = (mca_pml_csum_send_request_t*)hdr->hdr_rdma.hdr_req.pval;
mca_pml_csum_send_request_put(sendreq,btl,&hdr->hdr_rdma);
return;
}
void mca_pml_csum_recv_frag_callback_fin(mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* des,
void* cbdata ) {
mca_btl_base_segment_t* segments = des->des_dst;
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
mca_btl_base_descriptor_t* rdma;
uint16_t csum_received, csum;
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
return;
}
csum_hdr_ntoh(hdr, MCA_PML_CSUM_HDR_TYPE_FIN);
csum_received = hdr->hdr_common.hdr_csum;
hdr->hdr_common.hdr_csum = 0;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
hdr->hdr_common.hdr_flags &= ~MCA_PML_CSUM_HDR_FLAGS_NBO;
#endif
csum = opal_csum16(hdr, sizeof(mca_pml_csum_fin_hdr_t));
hdr->hdr_common.hdr_csum = csum_received;
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
"%s Received \'FIN\' with header csum:0x%04x\n",OMPI_NAME_PRINT(OMPI_PROC_MY_NAME),csum));
if(csum_received != csum) {
opal_output(0, "%s:%s:%d: Invalid \'FIN header\' - received csum:0x%04x != computed csum:0x%04x\n",
OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), __FILE__, __LINE__, csum_received, csum);
dump_csum_error_data(segments, 1);
ompi_rte_abort(-1,NULL);
}
rdma = (mca_btl_base_descriptor_t*)hdr->hdr_fin.hdr_des.pval;
rdma->des_cbfunc(btl, NULL, rdma,
hdr->hdr_fin.hdr_fail ? OMPI_ERROR : OMPI_SUCCESS);
return;
}
#define PML_MAX_SEQ ~((mca_pml_sequence_t)0);
static inline mca_pml_csum_recv_request_t* get_posted_recv(opal_list_t *queue)
{
if(opal_list_get_size(queue) == 0)
return NULL;
return (mca_pml_csum_recv_request_t*)opal_list_get_first(queue);
}
static inline mca_pml_csum_recv_request_t* get_next_posted_recv(
opal_list_t *queue,
mca_pml_csum_recv_request_t* req)
{
opal_list_item_t *i = opal_list_get_next((opal_list_item_t*)req);
if(opal_list_get_end(queue) == i)
return NULL;
return (mca_pml_csum_recv_request_t*)i;
}
static mca_pml_csum_recv_request_t *match_incomming(
mca_pml_csum_match_hdr_t *hdr, mca_pml_csum_comm_t *comm,
mca_pml_csum_comm_proc_t *proc)
{
mca_pml_csum_recv_request_t *specific_recv, *wild_recv;
mca_pml_sequence_t wild_recv_seq, specific_recv_seq;
int tag = hdr->hdr_tag;
specific_recv = get_posted_recv(&proc->specific_receives);
wild_recv = get_posted_recv(&comm->wild_receives);
wild_recv_seq = wild_recv ?
wild_recv->req_recv.req_base.req_sequence : PML_MAX_SEQ;
specific_recv_seq = specific_recv ?
specific_recv->req_recv.req_base.req_sequence : PML_MAX_SEQ;
/* they are equal only if both are PML_MAX_SEQ */
while(wild_recv_seq != specific_recv_seq) {
mca_pml_csum_recv_request_t **match;
opal_list_t *queue;
int req_tag;
mca_pml_sequence_t *seq;
if (OPAL_UNLIKELY(wild_recv_seq < specific_recv_seq)) {
match = &wild_recv;
queue = &comm->wild_receives;
seq = &wild_recv_seq;
} else {
match = &specific_recv;
queue = &proc->specific_receives;
seq = &specific_recv_seq;
}
req_tag = (*match)->req_recv.req_base.req_tag;
if(req_tag == tag || (req_tag == OMPI_ANY_TAG && tag >= 0)) {
opal_list_remove_item(queue, (opal_list_item_t*)(*match));
PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_REMOVE_FROM_POSTED_Q,
&((*match)->req_recv.req_base), PERUSE_RECV);
return *match;
}
*match = get_next_posted_recv(queue, *match);
*seq = (*match) ? (*match)->req_recv.req_base.req_sequence : PML_MAX_SEQ;
}
return NULL;
}
static mca_pml_csum_recv_request_t*
match_one(mca_btl_base_module_t *btl,
mca_pml_csum_match_hdr_t *hdr, mca_btl_base_segment_t* segments,
size_t num_segments, ompi_communicator_t *comm_ptr,
mca_pml_csum_comm_proc_t *proc,
mca_pml_csum_recv_frag_t* frag)
{
mca_pml_csum_recv_request_t *match;
mca_pml_csum_comm_t *comm = (mca_pml_csum_comm_t *)comm_ptr->c_pml_comm;
do {
match = match_incomming(hdr, comm, proc);
/* if match found, process data */
if(OPAL_LIKELY(NULL != match)) {
match->req_recv.req_base.req_proc = proc->ompi_proc;
if(OPAL_UNLIKELY(MCA_PML_REQUEST_PROBE == match->req_recv.req_base.req_type)) {
/* complete the probe */
mca_pml_csum_recv_request_matched_probe(match, btl, segments,
num_segments);
/* attempt to match actual request */
continue;
}
PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_MSG_MATCH_POSTED_REQ,
&(match->req_recv.req_base), PERUSE_RECV);
return match;
}
/* if no match found, place on unexpected queue */
append_frag_to_list(&proc->unexpected_frags, btl, hdr, segments,
num_segments, frag);
PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm_ptr,
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
return NULL;
} while(true);
}
static mca_pml_csum_recv_frag_t* check_cantmatch_for_match(mca_pml_csum_comm_proc_t *proc)
{
mca_pml_csum_recv_frag_t *frag;
/* search the list for a fragment from the send with sequence
* number next_msg_seq_expected
*/
for(frag = (mca_pml_csum_recv_frag_t*)opal_list_get_first(&proc->frags_cant_match);
frag != (mca_pml_csum_recv_frag_t*)opal_list_get_end(&proc->frags_cant_match);
frag = (mca_pml_csum_recv_frag_t*)opal_list_get_next(frag))
{
mca_pml_csum_match_hdr_t* hdr = &frag->hdr.hdr_match;
/*
* If the message has the next expected seq from that proc...
*/
if(hdr->hdr_seq != proc->expected_sequence)
continue;
opal_list_remove_item(&proc->frags_cant_match, (opal_list_item_t*)frag);
return frag;
}
return NULL;
}
/**
* RCS/CTS receive side matching
*
* @param hdr list of parameters needed for matching
* This list is also embeded in frag,
* but this allows to save a memory copy when
* a match is made in this routine. (IN)
* @param frag pointer to receive fragment which we want
* to match (IN/OUT). If a match is not made,
* hdr is copied to frag.
* @param match_made parameter indicating if we matched frag/
* hdr (OUT)
* @param additional_matches if a match is made with frag, we
* may be able to match fragments that previously
* have arrived out-of-order. If this is the
* case, the associated fragment descriptors are
* put on this list for further processing. (OUT)
*
* @return OMPI error code
*
* This routine is used to try and match a newly arrived message fragment
* to pre-posted receives. The following assumptions are made
* - fragments are received out of order
* - for long messages, e.g. more than one fragment, a RTS/CTS algorithm
* is used.
* - 2nd and greater fragments include a receive descriptor pointer
* - fragments may be dropped
* - fragments may be corrupt
* - this routine may be called simultaneously by more than one thread
*/
static int mca_pml_csum_recv_frag_match( mca_btl_base_module_t *btl,
mca_pml_csum_match_hdr_t *hdr,
mca_btl_base_segment_t* segments,
size_t num_segments,
int type)
{
/* local variables */
uint16_t next_msg_seq_expected, frag_msg_seq;
ompi_communicator_t *comm_ptr;
mca_pml_csum_recv_request_t *match = NULL;
mca_pml_csum_comm_t *comm;
mca_pml_csum_comm_proc_t *proc;
mca_pml_csum_recv_frag_t* frag = NULL;
/* communicator pointer */
comm_ptr = ompi_comm_lookup(hdr->hdr_ctx);
if(OPAL_UNLIKELY(NULL == comm_ptr)) {
/* This is a special case. A message for a not yet existing
* communicator can happens. Instead of doing a matching we
* will temporarily add it the a pending queue in the PML.
* Later on, when the communicator is completely instantiated,
* this pending queue will be searched and all matching fragments
* moved to the right communicator.
*/
append_frag_to_list( &mca_pml_csum.non_existing_communicator_pending,
btl, hdr, segments, num_segments, NULL );
return OMPI_SUCCESS;
}
comm = (mca_pml_csum_comm_t *)comm_ptr->c_pml_comm;
/* source sequence number */
frag_msg_seq = hdr->hdr_seq;
proc = &comm->procs[hdr->hdr_src];
/**
* We generate the MSG_ARRIVED event as soon as the PML is aware of a matching
* fragment arrival. Independing if it is received on the correct order or not.
* This will allow the tools to figure out if the messages are not received in the
* correct order (if multiple network interfaces).
*/
PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm_ptr,
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
/* get next expected message sequence number - if threaded
* run, lock to make sure that if another thread is processing
* a frag from the same message a match is made only once.
* Also, this prevents other posted receives (for a pair of
* end points) from being processed, and potentially "loosing"
* the fragment.
*/
OPAL_THREAD_LOCK(&comm->matching_lock);
/* get sequence number of next message that can be processed */
next_msg_seq_expected = (uint16_t)proc->expected_sequence;
if(OPAL_UNLIKELY(frag_msg_seq != next_msg_seq_expected))
goto wrong_seq;
/*
* This is the sequence number we were expecting,
* so we can try matching it to already posted
* receives.
*/
out_of_order_match:
/* We're now expecting the next sequence number. */
proc->expected_sequence++;
/**
* We generate the SEARCH_POSTED_QUEUE only when the message is received
* in the correct sequence. Otherwise, we delay the event generation until
* we reach the correct sequence number.
*/
PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_BEGIN, comm_ptr,
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
match = match_one(btl, hdr, segments, num_segments, comm_ptr, proc, frag);
/**
* The match is over. We generate the SEARCH_POSTED_Q_END here, before going
* into the mca_pml_csum_check_cantmatch_for_match so we can make a difference
* for the searching time for all messages.
*/
PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_SEARCH_POSTED_Q_END, comm_ptr,
hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
/* release matching lock before processing fragment */
OPAL_THREAD_UNLOCK(&comm->matching_lock);
if(OPAL_LIKELY(match)) {
switch(type) {
case MCA_PML_CSUM_HDR_TYPE_MATCH:
mca_pml_csum_recv_request_progress_match(match, btl, segments, num_segments);
break;
case MCA_PML_CSUM_HDR_TYPE_RNDV:
mca_pml_csum_recv_request_progress_rndv(match, btl, segments, num_segments);
break;
case MCA_PML_CSUM_HDR_TYPE_RGET:
mca_pml_csum_recv_request_progress_rget(match, btl, segments, num_segments);
break;
}
if(OPAL_UNLIKELY(frag))
MCA_PML_CSUM_RECV_FRAG_RETURN(frag);
}
/*
* Now that new message has arrived, check to see if
* any fragments on the c_c_frags_cant_match list
* may now be used to form new matchs
*/
if(OPAL_UNLIKELY(opal_list_get_size(&proc->frags_cant_match) > 0)) {
OPAL_THREAD_LOCK(&comm->matching_lock);
if((frag = check_cantmatch_for_match(proc))) {
hdr = &frag->hdr.hdr_match;
segments = frag->segments;
num_segments = frag->num_segments;
btl = frag->btl;
type = hdr->hdr_common.hdr_type;
goto out_of_order_match;
}
OPAL_THREAD_UNLOCK(&comm->matching_lock);
}
return OMPI_SUCCESS;
wrong_seq:
/*
* This message comes after the next expected, so it
* is ahead of sequence. Save it for later.
*/
append_frag_to_list(&proc->frags_cant_match, btl, hdr, segments,
num_segments, NULL);
OPAL_THREAD_UNLOCK(&comm->matching_lock);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,175 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* Copyright (c) 2009 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef MCA_PML_CSUM_RECVFRAG_H
#define MCA_PML_CSUM_RECVFRAG_H
#include "ompi/mca/btl/btl.h"
#include "pml_csum_hdr.h"
BEGIN_C_DECLS
struct mca_pml_csum_buffer_t {
size_t len;
void * addr;
};
typedef struct mca_pml_csum_buffer_t mca_pml_csum_buffer_t;
struct mca_pml_csum_recv_frag_t {
ompi_free_list_item_t super;
mca_pml_csum_hdr_t hdr;
size_t num_segments;
mca_btl_base_module_t* btl;
mca_btl_base_segment_t segments[MCA_BTL_DES_MAX_SEGMENTS];
mca_pml_csum_buffer_t buffers[MCA_BTL_DES_MAX_SEGMENTS];
unsigned char addr[1];
};
typedef struct mca_pml_csum_recv_frag_t mca_pml_csum_recv_frag_t;
OBJ_CLASS_DECLARATION(mca_pml_csum_recv_frag_t);
#define MCA_PML_CSUM_RECV_FRAG_ALLOC(frag,rc) \
do { \
ompi_free_list_item_t* item; \
OMPI_FREE_LIST_WAIT(&mca_pml_csum.recv_frags, item, rc); \
frag = (mca_pml_csum_recv_frag_t*)item; \
} while(0)
#define MCA_PML_CSUM_RECV_FRAG_INIT(frag, hdr, segs, cnt, btl ) \
do { \
size_t i, _size; \
mca_btl_base_segment_t* macro_segments = frag->segments; \
mca_pml_csum_buffer_t* buffers = frag->buffers; \
unsigned char* _ptr = (unsigned char*)frag->addr; \
/* init recv_frag */ \
frag->btl = btl; \
frag->hdr = *(mca_pml_csum_hdr_t*)hdr; \
frag->num_segments = 1; \
_size = segs[0].seg_len; \
for( i = 1; i < cnt; i++ ) { \
_size += segs[i].seg_len; \
} \
/* copy over data */ \
if(_size <= mca_pml_csum.unexpected_limit ) { \
macro_segments[0].seg_addr.pval = frag->addr; \
} else { \
buffers[0].len = _size; \
buffers[0].addr = (char*) \
mca_pml_csum.allocator->alc_alloc( mca_pml_csum.allocator, \
buffers[0].len, \
0, NULL); \
_ptr = (unsigned char*)(buffers[0].addr); \
macro_segments[0].seg_addr.pval = buffers[0].addr; \
} \
macro_segments[0].seg_len = _size; \
for( i = 0; i < cnt; i++ ) { \
memcpy( _ptr, segs[i].seg_addr.pval, segs[i].seg_len); \
_ptr += segs[i].seg_len; \
} \
} while(0)
#define MCA_PML_CSUM_RECV_FRAG_RETURN(frag) \
do { \
if( frag->segments[0].seg_len > mca_pml_csum.unexpected_limit ) { \
/* return buffers */ \
mca_pml_csum.allocator->alc_free( mca_pml_csum.allocator, \
frag->buffers[0].addr ); \
} \
frag->num_segments = 0; \
\
/* return recv_frag */ \
OMPI_FREE_LIST_RETURN(&mca_pml_csum.recv_frags, \
(ompi_free_list_item_t*)frag); \
} while(0)
/**
* Callback from BTL on receipt of a recv_frag (match).
*/
extern void mca_pml_csum_recv_frag_callback_match( mca_btl_base_module_t *btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* descriptor,
void* cbdata );
/**
* Callback from BTL on receipt of a recv_frag (rndv).
*/
extern void mca_pml_csum_recv_frag_callback_rndv( mca_btl_base_module_t *btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* descriptor,
void* cbdata );
/**
* Callback from BTL on receipt of a recv_frag (rget).
*/
extern void mca_pml_csum_recv_frag_callback_rget( mca_btl_base_module_t *btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* descriptor,
void* cbdata );
/**
* Callback from BTL on receipt of a recv_frag (ack).
*/
extern void mca_pml_csum_recv_frag_callback_ack( mca_btl_base_module_t *btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* descriptor,
void* cbdata );
/**
* Callback from BTL on receipt of a recv_frag (frag).
*/
extern void mca_pml_csum_recv_frag_callback_frag( mca_btl_base_module_t *btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* descriptor,
void* cbdata );
/**
* Callback from BTL on receipt of a recv_frag (put).
*/
extern void mca_pml_csum_recv_frag_callback_put( mca_btl_base_module_t *btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* descriptor,
void* cbdata );
/**
* Callback from BTL on receipt of a recv_frag (fin).
*/
extern void mca_pml_csum_recv_frag_callback_fin( mca_btl_base_module_t *btl,
mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* descriptor,
void* cbdata );
END_C_DECLS
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,425 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2010 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*/
#ifndef OMPI_PML_CSUM_RECV_REQUEST_H
#define OMPI_PML_CSUM_RECV_REQUEST_H
#include "pml_csum.h"
#include "pml_csum_rdma.h"
#include "pml_csum_rdmafrag.h"
#include "ompi/proc/proc.h"
#include "ompi/mca/pml/csum/pml_csum_comm.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/pml/base/pml_base_recvreq.h"
BEGIN_C_DECLS
struct mca_pml_csum_recv_request_t {
mca_pml_base_recv_request_t req_recv;
ompi_ptr_t remote_req_send;
int32_t req_lock;
size_t req_pipeline_depth;
size_t req_bytes_received; /**< amount of data transferred into the user buffer */
size_t req_bytes_expected; /**< local size of the data as suggested by the user */
size_t req_rdma_offset;
size_t req_send_offset;
uint32_t req_rdma_cnt;
uint32_t req_rdma_idx;
bool req_pending;
bool req_ack_sent; /**< whether ack was sent to the sender */
bool req_match_received; /**< Prevent request to be completed prematurely */
opal_mutex_t lock;
mca_pml_csum_com_btl_t req_rdma[1];
};
typedef struct mca_pml_csum_recv_request_t mca_pml_csum_recv_request_t;
OBJ_CLASS_DECLARATION(mca_pml_csum_recv_request_t);
static inline bool lock_recv_request(mca_pml_csum_recv_request_t *recvreq)
{
return OPAL_THREAD_ADD32(&recvreq->req_lock, 1) == 1;
}
static inline bool unlock_recv_request(mca_pml_csum_recv_request_t *recvreq)
{
return OPAL_THREAD_ADD32(&recvreq->req_lock, -1) == 0;
}
/**
* Allocate a recv request from the modules free list.
*
* @param rc (OUT) OMPI_SUCCESS or error status on failure.
* @return Receive request.
*/
#define MCA_PML_CSUM_RECV_REQUEST_ALLOC(recvreq, rc) \
do { \
ompi_free_list_item_t* item; \
rc = OMPI_SUCCESS; \
OMPI_FREE_LIST_GET(&mca_pml_base_recv_requests, item, rc); \
recvreq = (mca_pml_csum_recv_request_t*)item; \
} while(0)
/**
* Initialize a receive request with call parameters.
*
* @param request (IN) Receive request.
* @param addr (IN) User buffer.
* @param count (IN) Number of elements of indicated datatype.
* @param datatype (IN) User defined datatype.
* @param src (IN) Source rank w/in the communicator.
* @param tag (IN) User defined tag.
* @param comm (IN) Communicator.
* @param persistent (IN) Is this a ersistent request.
*/
#define MCA_PML_CSUM_RECV_REQUEST_INIT( request, \
addr, \
count, \
datatype, \
src, \
tag, \
comm, \
persistent) \
do { \
MCA_PML_BASE_RECV_REQUEST_INIT( &(request)->req_recv, \
addr, \
count, \
datatype, \
src, \
tag, \
comm, \
persistent); \
} while(0)
/**
* Mark the request as completed at MPI level for internal purposes.
*
* @param recvreq (IN) Receive request.
*/
#define MCA_PML_CSUM_RECV_REQUEST_MPI_COMPLETE( recvreq ) \
do { \
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \
&(recvreq->req_recv.req_base), PERUSE_RECV ); \
ompi_request_complete( &(recvreq->req_recv.req_base.req_ompi), true ); \
} while (0)
/*
* Free the PML receive request
*/
#define MCA_PML_CSUM_RECV_REQUEST_RETURN(recvreq) \
{ \
MCA_PML_BASE_RECV_REQUEST_FINI(&(recvreq)->req_recv); \
OMPI_FREE_LIST_RETURN( &mca_pml_base_recv_requests, \
(ompi_free_list_item_t*)(recvreq)); \
}
/**
* Complete receive request. Request structure cannot be accessed after calling
* this function any more.
*
* @param recvreq (IN) Receive request.
*/
static inline void
recv_request_pml_complete(mca_pml_csum_recv_request_t *recvreq)
{
size_t i;
assert(false == recvreq->req_recv.req_base.req_pml_complete);
if(recvreq->req_recv.req_bytes_packed > 0) {
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END,
&recvreq->req_recv.req_base, PERUSE_RECV );
}
for(i = 0; i < recvreq->req_rdma_cnt; i++) {
mca_mpool_base_registration_t* btl_reg = recvreq->req_rdma[i].btl_reg;
if( NULL != btl_reg && btl_reg->mpool != NULL) {
btl_reg->mpool->mpool_deregister( btl_reg->mpool, btl_reg );
}
}
recvreq->req_rdma_cnt = 0;
OPAL_THREAD_LOCK(&ompi_request_lock);
if(true == recvreq->req_recv.req_base.req_free_called) {
MCA_PML_CSUM_RECV_REQUEST_RETURN(recvreq);
} else {
/* initialize request status */
recvreq->req_recv.req_base.req_pml_complete = true;
recvreq->req_recv.req_base.req_ompi.req_status._ucount =
recvreq->req_bytes_received;
if (recvreq->req_recv.req_bytes_packed > recvreq->req_bytes_expected) {
recvreq->req_recv.req_base.req_ompi.req_status._ucount =
recvreq->req_recv.req_bytes_packed;
recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR =
MPI_ERR_TRUNCATE;
}
MCA_PML_CSUM_RECV_REQUEST_MPI_COMPLETE(recvreq);
}
OPAL_THREAD_UNLOCK(&ompi_request_lock);
}
static inline bool
recv_request_pml_complete_check(mca_pml_csum_recv_request_t *recvreq)
{
#if OPAL_ENABLE_MULTI_THREADS
opal_atomic_rmb();
#endif
if(recvreq->req_match_received &&
recvreq->req_bytes_received >= recvreq->req_recv.req_bytes_packed &&
lock_recv_request(recvreq)) {
recv_request_pml_complete(recvreq);
return true;
}
return false;
}
extern void mca_pml_csum_recv_req_start(mca_pml_csum_recv_request_t *req);
#define MCA_PML_CSUM_RECV_REQUEST_START(r) mca_pml_csum_recv_req_start(r)
static inline void prepare_recv_req_converter(mca_pml_csum_recv_request_t *req)
{
if( req->req_recv.req_base.req_datatype->super.size | req->req_recv.req_base.req_count ) {
opal_convertor_copy_and_prepare_for_recv(
req->req_recv.req_base.req_proc->proc_convertor,
&(req->req_recv.req_base.req_datatype->super),
req->req_recv.req_base.req_count,
req->req_recv.req_base.req_addr,
0,
&req->req_recv.req_base.req_convertor);
opal_convertor_get_unpacked_size(&req->req_recv.req_base.req_convertor,
&req->req_bytes_expected);
}
}
#define MCA_PML_CSUM_RECV_REQUEST_MATCHED(request, hdr) \
recv_req_matched(request, hdr)
static inline void recv_req_matched(mca_pml_csum_recv_request_t *req,
mca_pml_csum_match_hdr_t *hdr)
{
req->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = hdr->hdr_src;
req->req_recv.req_base.req_ompi.req_status.MPI_TAG = hdr->hdr_tag;
req->req_match_received = true;
#if OPAL_ENABLE_MULTI_THREADS
opal_atomic_wmb();
#endif
if(req->req_recv.req_bytes_packed > 0) {
if(MPI_ANY_SOURCE == req->req_recv.req_base.req_peer) {
/* non wildcard prepared during post recv */
prepare_recv_req_converter(req);
}
PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_XFER_BEGIN,
&req->req_recv.req_base, PERUSE_RECV);
}
}
/**
*
*/
#define MCA_PML_CSUM_RECV_REQUEST_UNPACK( request, \
segments, \
num_segments, \
seg_offset, \
data_offset, \
bytes_received, \
bytes_delivered) \
do { \
bytes_delivered = 0; \
if(request->req_recv.req_bytes_packed > 0) { \
struct iovec iov[MCA_BTL_DES_MAX_SEGMENTS]; \
uint32_t iov_count = 0; \
size_t max_data = bytes_received; \
size_t n, offset = seg_offset; \
mca_btl_base_segment_t* segment = segments; \
\
OPAL_THREAD_LOCK(&request->lock); \
for( n = 0; n < num_segments; n++, segment++ ) { \
if(offset >= segment->seg_len) { \
offset -= segment->seg_len; \
} else { \
iov[iov_count].iov_len = segment->seg_len - offset; \
iov[iov_count].iov_base = (IOVBASE_TYPE*) \
((unsigned char*)segment->seg_addr.pval + offset); \
iov_count++; \
offset = 0; \
} \
} \
PERUSE_TRACE_COMM_OMPI_EVENT (PERUSE_COMM_REQ_XFER_CONTINUE, \
&(recvreq->req_recv.req_base), max_data, \
PERUSE_RECV); \
opal_convertor_set_position( &(request->req_recv.req_base.req_convertor), \
&data_offset ); \
opal_convertor_unpack( &(request)->req_recv.req_base.req_convertor, \
iov, \
&iov_count, \
&max_data ); \
bytes_delivered = max_data; \
OPAL_THREAD_UNLOCK(&request->lock); \
} \
} while (0)
/**
*
*/
void mca_pml_csum_recv_request_progress_match(
mca_pml_csum_recv_request_t* req,
struct mca_btl_base_module_t* btl,
mca_btl_base_segment_t* segments,
size_t num_segments);
/**
*
*/
void mca_pml_csum_recv_request_progress_frag(
mca_pml_csum_recv_request_t* req,
struct mca_btl_base_module_t* btl,
mca_btl_base_segment_t* segments,
size_t num_segments);
/**
*
*/
void mca_pml_csum_recv_request_progress_rndv(
mca_pml_csum_recv_request_t* req,
struct mca_btl_base_module_t* btl,
mca_btl_base_segment_t* segments,
size_t num_segments);
/**
*
*/
void mca_pml_csum_recv_request_progress_rget(
mca_pml_csum_recv_request_t* req,
struct mca_btl_base_module_t* btl,
mca_btl_base_segment_t* segments,
size_t num_segments);
/**
*
*/
void mca_pml_csum_recv_request_matched_probe(
mca_pml_csum_recv_request_t* req,
struct mca_btl_base_module_t* btl,
mca_btl_base_segment_t* segments,
size_t num_segments);
/**
*
*/
int mca_pml_csum_recv_request_schedule_once(
mca_pml_csum_recv_request_t* req, mca_bml_base_btl_t* start_bml_btl);
static inline int mca_pml_csum_recv_request_schedule_exclusive(
mca_pml_csum_recv_request_t* req,
mca_bml_base_btl_t* start_bml_btl)
{
int rc;
do {
rc = mca_pml_csum_recv_request_schedule_once(req, start_bml_btl);
if(rc == OMPI_ERR_OUT_OF_RESOURCE)
break;
} while(!unlock_recv_request(req));
if(OMPI_SUCCESS == rc)
recv_request_pml_complete_check(req);
return rc;
}
static inline void mca_pml_csum_recv_request_schedule(
mca_pml_csum_recv_request_t* req,
mca_bml_base_btl_t* start_bml_btl)
{
if(!lock_recv_request(req))
return;
(void)mca_pml_csum_recv_request_schedule_exclusive(req, start_bml_btl);
}
#define MCA_PML_CSUM_ADD_ACK_TO_PENDING(P, S, D, O) \
do { \
mca_pml_csum_pckt_pending_t *_pckt; \
int _rc; \
\
MCA_PML_CSUM_PCKT_PENDING_ALLOC(_pckt,_rc); \
_pckt->hdr.hdr_common.hdr_type = MCA_PML_CSUM_HDR_TYPE_ACK; \
_pckt->hdr.hdr_ack.hdr_src_req.lval = (S); \
_pckt->hdr.hdr_ack.hdr_dst_req.pval = (D); \
_pckt->hdr.hdr_ack.hdr_send_offset = (O); \
_pckt->proc = (P); \
_pckt->bml_btl = NULL; \
OPAL_THREAD_LOCK(&mca_pml_csum.lock); \
opal_list_append(&mca_pml_csum.pckt_pending, \
(opal_list_item_t*)_pckt); \
OPAL_THREAD_UNLOCK(&mca_pml_csum.lock); \
} while(0)
int mca_pml_csum_recv_request_ack_send_btl(ompi_proc_t* proc,
mca_bml_base_btl_t* bml_btl, uint64_t hdr_src_req, void *hdr_dst_req,
uint64_t hdr_rdma_offset, bool nordma);
static inline int mca_pml_csum_recv_request_ack_send(ompi_proc_t* proc,
uint64_t hdr_src_req, void *hdr_dst_req, uint64_t hdr_send_offset,
bool nordma)
{
size_t i;
mca_bml_base_btl_t* bml_btl;
mca_bml_base_endpoint_t* endpoint =
(mca_bml_base_endpoint_t*)proc->proc_bml;
for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) {
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
if(mca_pml_csum_recv_request_ack_send_btl(proc, bml_btl, hdr_src_req,
hdr_dst_req, hdr_send_offset, nordma) == OMPI_SUCCESS)
return OMPI_SUCCESS;
}
MCA_PML_CSUM_ADD_ACK_TO_PENDING(proc, hdr_src_req, hdr_dst_req,
hdr_send_offset);
return OMPI_ERR_OUT_OF_RESOURCE;
}
int mca_pml_csum_recv_request_get_frag(mca_pml_csum_rdma_frag_t* frag);
/* This function tries to continue recvreq that stuck due to resource
* unavailability. Recvreq is added to recv_pending list if scheduling of put
* operation cannot be accomplished for some reason. */
void mca_pml_csum_recv_request_process_pending(void);
END_C_DECLS
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,470 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2010 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* Copyright (c) 2009-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_PML_CSUM_SEND_REQUEST_H
#define OMPI_PML_CSUM_SEND_REQUEST_H
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/pml/base/pml_base_sendreq.h"
#include "ompi/mca/mpool/base/base.h"
#include "pml_csum_comm.h"
#include "pml_csum_hdr.h"
#include "pml_csum_rdma.h"
#include "pml_csum_rdmafrag.h"
#include "opal/datatype/opal_convertor.h"
#include "ompi/mca/bml/bml.h"
BEGIN_C_DECLS
typedef enum {
MCA_PML_CSUM_SEND_PENDING_NONE,
MCA_PML_CSUM_SEND_PENDING_SCHEDULE,
MCA_PML_CSUM_SEND_PENDING_START
} mca_pml_csum_send_pending_t;
struct mca_pml_csum_send_request_t {
mca_pml_base_send_request_t req_send;
mca_bml_base_endpoint_t* req_endpoint;
ompi_ptr_t req_recv;
int32_t req_state;
int32_t req_lock;
bool req_throttle_sends;
size_t req_pipeline_depth;
size_t req_bytes_delivered;
uint32_t req_rdma_cnt;
mca_pml_csum_send_pending_t req_pending;
opal_mutex_t req_send_range_lock;
opal_list_t req_send_ranges;
mca_pml_csum_com_btl_t req_rdma[1];
};
typedef struct mca_pml_csum_send_request_t mca_pml_csum_send_request_t;
OBJ_CLASS_DECLARATION(mca_pml_csum_send_request_t);
struct mca_pml_csum_send_range_t {
ompi_free_list_item_t base;
uint64_t range_send_offset;
uint64_t range_send_length;
int range_btl_idx;
int range_btl_cnt;
mca_pml_csum_com_btl_t range_btls[1];
};
typedef struct mca_pml_csum_send_range_t mca_pml_csum_send_range_t;
OBJ_CLASS_DECLARATION(mca_pml_csum_send_range_t);
static inline bool lock_send_request(mca_pml_csum_send_request_t *sendreq)
{
return OPAL_THREAD_ADD32(&sendreq->req_lock, 1) == 1;
}
static inline bool unlock_send_request(mca_pml_csum_send_request_t *sendreq)
{
return OPAL_THREAD_ADD32(&sendreq->req_lock, -1) == 0;
}
static inline void
add_request_to_send_pending(mca_pml_csum_send_request_t* sendreq,
const mca_pml_csum_send_pending_t type,
const bool append)
{
opal_list_item_t *item = (opal_list_item_t*)sendreq;
OPAL_THREAD_LOCK(&mca_pml_csum.lock);
sendreq->req_pending = type;
if(append)
opal_list_append(&mca_pml_csum.send_pending, item);
else
opal_list_prepend(&mca_pml_csum.send_pending, item);
OPAL_THREAD_UNLOCK(&mca_pml_csum.lock);
}
static inline mca_pml_csum_send_request_t*
get_request_from_send_pending(mca_pml_csum_send_pending_t *type)
{
mca_pml_csum_send_request_t *sendreq;
OPAL_THREAD_LOCK(&mca_pml_csum.lock);
sendreq = (mca_pml_csum_send_request_t*)
opal_list_remove_first(&mca_pml_csum.send_pending);
if(sendreq) {
*type = sendreq->req_pending;
sendreq->req_pending = MCA_PML_CSUM_SEND_PENDING_NONE;
}
OPAL_THREAD_UNLOCK(&mca_pml_csum.lock);
return sendreq;
}
#define MCA_PML_CSUM_SEND_REQUEST_ALLOC( comm, \
dst, \
sendreq, \
rc) \
{ \
ompi_proc_t *proc = ompi_comm_peer_lookup( comm, dst ); \
ompi_free_list_item_t* item; \
\
rc = OMPI_ERR_OUT_OF_RESOURCE; \
if( OPAL_LIKELY(NULL != proc) ) { \
rc = OMPI_SUCCESS; \
OMPI_FREE_LIST_WAIT(&mca_pml_base_send_requests, item, rc); \
sendreq = (mca_pml_csum_send_request_t*)item; \
sendreq->req_send.req_base.req_proc = proc; \
} \
}
#define MCA_PML_CSUM_SEND_REQUEST_INIT(sendreq, \
buf, \
count, \
datatype, \
dst, \
tag, \
comm, \
sendmode, \
persistent) \
{ \
MCA_PML_BASE_SEND_REQUEST_INIT(&sendreq->req_send, \
buf, \
count, \
datatype, \
dst, \
tag, \
comm, \
sendmode, \
persistent, \
0); /* convertor_flags */ \
(sendreq)->req_recv.pval = NULL; \
}
static inline void mca_pml_csum_free_rdma_resources(mca_pml_csum_send_request_t* sendreq)
{
size_t r;
/* return mpool resources */
for(r = 0; r < sendreq->req_rdma_cnt; r++) {
mca_mpool_base_registration_t* reg = sendreq->req_rdma[r].btl_reg;
if( NULL != reg && reg->mpool != NULL ) {
reg->mpool->mpool_deregister(reg->mpool, reg);
}
}
sendreq->req_rdma_cnt = 0;
}
/**
* Start a send request.
*/
#define MCA_PML_CSUM_SEND_REQUEST_START(sendreq, rc) \
do { \
rc = mca_pml_csum_send_request_start(sendreq); \
} while (0)
/*
* Mark a send request as completed at the MPI level.
*/
#define MCA_PML_CSUM_SEND_REQUEST_MPI_COMPLETE(sendreq, with_signal) \
do { \
(sendreq)->req_send.req_base.req_ompi.req_status.MPI_SOURCE = \
(sendreq)->req_send.req_base.req_comm->c_my_rank; \
(sendreq)->req_send.req_base.req_ompi.req_status.MPI_TAG = \
(sendreq)->req_send.req_base.req_tag; \
(sendreq)->req_send.req_base.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; \
(sendreq)->req_send.req_base.req_ompi.req_status._ucount = \
(sendreq)->req_send.req_bytes_packed; \
ompi_request_complete( &((sendreq)->req_send.req_base.req_ompi), (with_signal) ); \
\
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \
&(sendreq->req_send.req_base), PERUSE_SEND); \
} while(0)
/*
* Release resources associated with a request
*/
#define MCA_PML_CSUM_SEND_REQUEST_RETURN(sendreq) \
do { \
/* Let the base handle the reference counts */ \
MCA_PML_BASE_SEND_REQUEST_FINI((&(sendreq)->req_send)); \
OMPI_FREE_LIST_RETURN( &mca_pml_base_send_requests, \
(ompi_free_list_item_t*)sendreq); \
} while(0)
/*
* The PML has completed a send request. Note that this request
* may have been orphaned by the user or have already completed
* at the MPI level.
* This function will never be called directly from the upper level, as it
* should only be an internal call to the PML.
*
*/
static inline void
send_request_pml_complete(mca_pml_csum_send_request_t *sendreq)
{
assert(false == sendreq->req_send.req_base.req_pml_complete);
if(sendreq->req_send.req_bytes_packed > 0) {
PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END,
&(sendreq->req_send.req_base), PERUSE_SEND);
}
/* return mpool resources */
mca_pml_csum_free_rdma_resources(sendreq);
if (sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED &&
sendreq->req_send.req_addr != sendreq->req_send.req_base.req_addr) {
mca_pml_base_bsend_request_fini((ompi_request_t*)sendreq);
}
OPAL_THREAD_LOCK(&ompi_request_lock);
if(false == sendreq->req_send.req_base.req_ompi.req_complete) {
/* Should only be called for long messages (maybe synchronous) */
MCA_PML_CSUM_SEND_REQUEST_MPI_COMPLETE(sendreq, true);
}
sendreq->req_send.req_base.req_pml_complete = true;
if(sendreq->req_send.req_base.req_free_called) {
MCA_PML_CSUM_SEND_REQUEST_RETURN(sendreq);
}
OPAL_THREAD_UNLOCK(&ompi_request_lock);
}
/* returns true if request was completed on PML level */
static inline bool
send_request_pml_complete_check(mca_pml_csum_send_request_t *sendreq)
{
#if OPAL_ENABLE_MULTI_THREADS
opal_atomic_rmb();
#endif
/* if no more events are expected for the request and the whole message is
* already sent and send fragment scheduling isn't running in another
* thread then complete the request on PML level. From now on, if user
* called free on this request, the request structure can be reused for
* another request or if the request is persistent it can be restarted */
if(sendreq->req_state == 0 &&
sendreq->req_bytes_delivered >= sendreq->req_send.req_bytes_packed
&& lock_send_request(sendreq)) {
send_request_pml_complete(sendreq);
return true;
}
return false;
}
/**
* Schedule additional fragments
*/
int
mca_pml_csum_send_request_schedule_once(mca_pml_csum_send_request_t*);
static inline int
mca_pml_csum_send_request_schedule_exclusive(mca_pml_csum_send_request_t* sendreq)
{
int rc;
do {
rc = mca_pml_csum_send_request_schedule_once(sendreq);
if(rc == OMPI_ERR_OUT_OF_RESOURCE)
break;
} while(!unlock_send_request(sendreq));
if(OMPI_SUCCESS == rc)
send_request_pml_complete_check(sendreq);
return rc;
}
static inline void
mca_pml_csum_send_request_schedule(mca_pml_csum_send_request_t* sendreq)
{
/*
* Only allow one thread in this routine for a given request.
* However, we cannot block callers on a mutex, so simply keep track
* of the number of times the routine has been called and run through
* the scheduling logic once for every call.
*/
if(!lock_send_request(sendreq))
return;
mca_pml_csum_send_request_schedule_exclusive(sendreq);
}
/**
* Start the specified request
*/
int mca_pml_csum_send_request_start_buffered(
mca_pml_csum_send_request_t* sendreq,
mca_bml_base_btl_t* bml_btl,
size_t size);
int mca_pml_csum_send_request_start_copy(
mca_pml_csum_send_request_t* sendreq,
mca_bml_base_btl_t* bml_btl,
size_t size);
int mca_pml_csum_send_request_start_prepare(
mca_pml_csum_send_request_t* sendreq,
mca_bml_base_btl_t* bml_btl,
size_t size);
int mca_pml_csum_send_request_start_rdma(
mca_pml_csum_send_request_t* sendreq,
mca_bml_base_btl_t* bml_btl,
size_t size);
int mca_pml_csum_send_request_start_rndv(
mca_pml_csum_send_request_t* sendreq,
mca_bml_base_btl_t* bml_btl,
size_t size,
int flags);
static inline int
mca_pml_csum_send_request_start_btl( mca_pml_csum_send_request_t* sendreq,
mca_bml_base_btl_t* bml_btl )
{
size_t size = sendreq->req_send.req_bytes_packed;
mca_btl_base_module_t* btl = bml_btl->btl;
size_t eager_limit = btl->btl_eager_limit - sizeof(mca_pml_csum_hdr_t);
int rc;
if( OPAL_LIKELY(size <= eager_limit) ) {
switch(sendreq->req_send.req_send_mode) {
case MCA_PML_BASE_SEND_SYNCHRONOUS:
rc = mca_pml_csum_send_request_start_rndv(sendreq, bml_btl, size, 0);
break;
case MCA_PML_BASE_SEND_BUFFERED:
rc = mca_pml_csum_send_request_start_copy(sendreq, bml_btl, size);
break;
case MCA_PML_BASE_SEND_COMPLETE:
rc = mca_pml_csum_send_request_start_prepare(sendreq, bml_btl, size);
break;
default:
if (size != 0 && bml_btl->btl_flags & MCA_BTL_FLAGS_SEND_INPLACE) {
rc = mca_pml_csum_send_request_start_prepare(sendreq, bml_btl, size);
} else {
rc = mca_pml_csum_send_request_start_copy(sendreq, bml_btl, size);
}
break;
}
} else {
size = eager_limit;
if(OPAL_UNLIKELY(btl->btl_rndv_eager_limit < eager_limit))
size = btl->btl_rndv_eager_limit;
if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) {
rc = mca_pml_csum_send_request_start_buffered(sendreq, bml_btl, size);
} else if
(opal_convertor_need_buffers(&sendreq->req_send.req_base.req_convertor) == false) {
unsigned char *base;
opal_convertor_get_current_pointer( &sendreq->req_send.req_base.req_convertor, (void**)&base );
if( 0 != (sendreq->req_rdma_cnt = (uint32_t)mca_pml_csum_rdma_btls(
sendreq->req_endpoint,
base,
sendreq->req_send.req_bytes_packed,
sendreq->req_rdma))) {
rc = mca_pml_csum_send_request_start_rdma(sendreq, bml_btl,
sendreq->req_send.req_bytes_packed);
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
mca_pml_csum_free_rdma_resources(sendreq);
}
} else {
rc = mca_pml_csum_send_request_start_rndv(sendreq, bml_btl, size,
MCA_PML_CSUM_HDR_FLAGS_CONTIG);
}
} else {
rc = mca_pml_csum_send_request_start_rndv(sendreq, bml_btl, size, 0);
}
}
return rc;
}
static inline int
mca_pml_csum_send_request_start( mca_pml_csum_send_request_t* sendreq )
{
mca_pml_csum_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm;
mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*)
sendreq->req_send.req_base.req_proc->proc_bml;
size_t i;
if( OPAL_UNLIKELY(endpoint == NULL) ) {
return OMPI_ERR_UNREACH;
}
sendreq->req_endpoint = endpoint;
sendreq->req_state = 0;
sendreq->req_lock = 0;
sendreq->req_pipeline_depth = 0;
sendreq->req_bytes_delivered = 0;
sendreq->req_pending = MCA_PML_CSUM_SEND_PENDING_NONE;
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32(
&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1);
MCA_PML_BASE_SEND_START( &sendreq->req_send.req_base );
for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) {
mca_bml_base_btl_t* bml_btl;
int rc;
/* select a btl */
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
rc = mca_pml_csum_send_request_start_btl(sendreq, bml_btl);
if( OPAL_LIKELY(OMPI_ERR_OUT_OF_RESOURCE != rc) )
return rc;
}
add_request_to_send_pending(sendreq, MCA_PML_CSUM_SEND_PENDING_START, true);
return OMPI_SUCCESS;
}
/**
* Initiate a put scheduled by the receiver.
*/
void mca_pml_csum_send_request_put( mca_pml_csum_send_request_t* sendreq,
mca_btl_base_module_t* btl,
mca_pml_csum_rdma_hdr_t* hdr );
int mca_pml_csum_send_request_put_frag(mca_pml_csum_rdma_frag_t* frag);
/* This function tries to continue sendreq that was stuck because of resource
* unavailability. A sendreq may be added to send_pending list if there is no
* resource to send initial packet or there is not resource to schedule data
* for sending. The reason the sendreq was added to the list is stored inside
* sendreq struct and appropriate operation is retried when resource became
* available. bml_btl passed to the function doesn't represents sendreq
* destination, it represents BTL on which resource was freed, so only this BTL
* should be considered for sending packets */
void mca_pml_csum_send_request_process_pending(mca_bml_base_btl_t *bml_btl);
void mca_pml_csum_send_request_copy_in_out(mca_pml_csum_send_request_t *sendreq,
uint64_t send_offset, uint64_t send_length);
END_C_DECLS
#endif /* OMPI_PML_CSUM_SEND_REQUEST_H */

Просмотреть файл

@ -1,148 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "pml_csum.h"
#include "pml_csum_recvreq.h"
#include "pml_csum_sendreq.h"
#include "ompi/memchecker.h"
int mca_pml_csum_start(size_t count, ompi_request_t** requests)
{
int rc;
size_t i;
bool reuse_old_request = true;
for(i=0; i<count; i++) {
mca_pml_base_request_t *pml_request = (mca_pml_base_request_t*)requests[i];
if(NULL == pml_request) {
continue;
}
if (OMPI_REQUEST_PML != requests[i]->req_type) {
continue;
}
/* If the persistent request is currently active - obtain the
* request lock and verify the status is incomplete. if the
* pml layer has not completed the request - mark the request
* as free called - so that it will be freed when the request
* completes - and create a new request.
*/
reuse_old_request = true;
switch(pml_request->req_ompi.req_state) {
case OMPI_REQUEST_INACTIVE:
if(pml_request->req_pml_complete == true)
break;
/* otherwise fall through */
case OMPI_REQUEST_ACTIVE: {
ompi_request_t *request;
OPAL_THREAD_LOCK(&ompi_request_lock);
if (pml_request->req_pml_complete == false) {
/* free request after it completes */
pml_request->req_free_called = true;
} else {
/* can reuse the existing request */
OPAL_THREAD_UNLOCK(&ompi_request_lock);
break;
}
reuse_old_request = false;
/* allocate a new request */
switch(pml_request->req_type) {
case MCA_PML_REQUEST_SEND: {
mca_pml_base_send_mode_t sendmode =
((mca_pml_base_send_request_t*)pml_request)->req_send_mode;
rc = mca_pml_csum_isend_init(
pml_request->req_addr,
pml_request->req_count,
pml_request->req_datatype,
pml_request->req_peer,
pml_request->req_tag,
sendmode,
pml_request->req_comm,
&request);
break;
}
case MCA_PML_REQUEST_RECV:
rc = mca_pml_csum_irecv_init(
pml_request->req_addr,
pml_request->req_count,
pml_request->req_datatype,
pml_request->req_peer,
pml_request->req_tag,
pml_request->req_comm,
&request);
break;
default:
rc = OMPI_ERR_REQUEST;
break;
}
OPAL_THREAD_UNLOCK(&ompi_request_lock);
if(OMPI_SUCCESS != rc)
return rc;
pml_request = (mca_pml_base_request_t*)request;
requests[i] = request;
break;
}
default:
return OMPI_ERR_REQUEST;
}
/* start the request */
switch(pml_request->req_type) {
case MCA_PML_REQUEST_SEND:
{
mca_pml_csum_send_request_t* sendreq = (mca_pml_csum_send_request_t*)pml_request;
MEMCHECKER(
memchecker_call(&opal_memchecker_base_isdefined,
pml_request->req_addr, pml_request->req_count,
pml_request->req_datatype);
);
if( reuse_old_request && (sendreq->req_send.req_bytes_packed != 0) ) {
size_t offset = 0;
/**
* Reset the convertor in case we're dealing with the original
* request, which when completed do not reset the convertor.
*/
opal_convertor_set_position( &sendreq->req_send.req_base.req_convertor,
&offset );
}
MCA_PML_CSUM_SEND_REQUEST_START(sendreq, rc);
if(rc != OMPI_SUCCESS)
return rc;
break;
}
case MCA_PML_REQUEST_RECV:
{
mca_pml_csum_recv_request_t* recvreq = (mca_pml_csum_recv_request_t*)pml_request;
MCA_PML_CSUM_RECV_REQUEST_START(recvreq);
break;
}
default:
return OMPI_ERR_REQUEST;
}
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1 +0,0 @@
DIRECT_CALL_HEADER="ompi/mca/pml/csum/pml_csum.h"