1
1
openmpi/ompi/mca/ptl/gm/ptl_gm_priv.c
Brian Barrett a13166b500 * rename ompi_output to opal_output
This commit was SVN r6329.
2005-07-03 23:31:27 +00:00

997 строки
48 KiB
C

/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004 The Ohio State University.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "include/types.h"
#include "mca/ptl/base/ptl_base_sendreq.h"
#include "mca/ptl/base/ptl_base_header.h"
#include "ptl_gm.h"
#include "ptl_gm_peer.h"
#include "ptl_gm_proc.h"
#include "ptl_gm_sendfrag.h"
#include "ptl_gm_priv.h"
#include "mca/pml/teg/pml_teg_proc.h"
static int mca_ptl_gm_send_quick_fin_message( struct mca_ptl_gm_peer_t* ptl_peer,
struct mca_ptl_base_frag_t* frag );
static void mca_ptl_gm_basic_frag_callback( struct gm_port* port, void* context, gm_status_t status )
{
mca_ptl_gm_module_t* gm_ptl;
mca_ptl_base_frag_t* frag_base;
mca_ptl_base_header_t* header;
header = (mca_ptl_base_header_t*)context;
frag_base = (mca_ptl_base_frag_t*)header->hdr_frag.hdr_src_ptr.pval;
gm_ptl = (mca_ptl_gm_module_t *)frag_base->frag_owner;
switch( status ) {
case GM_SUCCESS:
OMPI_GM_FREE_LIST_RETURN( &(gm_ptl->gm_send_dma_frags), ((opal_list_item_t*)header) );
/* release the send token */
opal_atomic_add( &(gm_ptl->num_send_tokens), 1 );
break;
case GM_SEND_TIMED_OUT:
opal_output( 0, "send_continue timed out\n" );
break;
case GM_SEND_DROPPED:
opal_output( 0, "send_continue dropped\n" );
break;
default:
opal_output( 0, "send_continue other error %d\n", status );
}
}
#define DO_DEBUG( INST )
#if OMPI_MCA_PTL_GM_HAVE_RDMA_GET
static inline
int mca_ptl_gm_receiver_advance_pipeline( mca_ptl_gm_recv_frag_t* frag, int onlyifget );
/* This function get called when the gm_get is finish (i.e. when the read from remote memory
* is completed. We have to send back the ack. If the original data was too large for just one
* fragment it will be split in severals. We have to send back for each of these fragments one
* ack.
*/
static void mca_ptl_gm_get_callback( struct gm_port *port, void * context, gm_status_t status )
{
mca_ptl_gm_recv_frag_t* frag = (mca_ptl_gm_recv_frag_t*)context;
mca_ptl_gm_peer_t* peer = (mca_ptl_gm_peer_t*)frag->frag_recv.frag_base.frag_peer;
switch( status ) {
case GM_SUCCESS:
DO_DEBUG( opal_output( 0, "receiver %d %p get_callback processed %lld validated %lld",
orte_process_info.my_name->vpid, frag, frag->frag_bytes_processed, frag->frag_bytes_validated ); )
/* send an ack message to the sender */
mca_ptl_gm_send_quick_fin_message( peer, &(frag->frag_recv.frag_base) );
peer->get_started = false;
/* mark the memory as being ready to be deregistered */
frag->pipeline.lines[frag->pipeline.pos_deregister].flags |= PTL_GM_PIPELINE_DEREGISTER;
mca_ptl_gm_receiver_advance_pipeline( frag, 0 );
break;
case GM_SEND_TIMED_OUT:
opal_output( 0, "mca_ptl_gm_get_callback timed out\n" );
break;
case GM_SEND_DROPPED:
opal_output( 0, "mca_ptl_gm_get_callback dropped\n" );
break;
default:
opal_output( 0, "mca_ptl_gm_get_callback other error %d\n", status );
}
}
static inline
int mca_ptl_gm_receiver_advance_pipeline( mca_ptl_gm_recv_frag_t* frag, int onlyifget )
{
mca_ptl_gm_peer_t* peer;
gm_status_t status;
mca_ptl_gm_pipeline_line_t *get_line, *reg_line, *dereg_line;
uint64_t length;
DO_DEBUG( int count = 0; char buffer[128]; )
peer = (mca_ptl_gm_peer_t*)frag->frag_recv.frag_base.frag_peer;
DO_DEBUG( count = sprintf( buffer, " %p", (void*)frag ); )
/* start the current get */
get_line = &(frag->pipeline.lines[frag->pipeline.pos_transfert]);
if( (PTL_GM_PIPELINE_TRANSFERT & get_line->flags) == PTL_GM_PIPELINE_TRANSFERT ) {
peer->get_started = true;
gm_get( peer->peer_ptl->gm_port, get_line->remote_memory.lval,
get_line->local_memory.pval, get_line->length,
GM_LOW_PRIORITY, peer->peer_addr.local_id, peer->peer_addr.port_id,
mca_ptl_gm_get_callback, frag );
get_line->flags ^= PTL_GM_PIPELINE_REMOTE;
DO_DEBUG( count += sprintf( buffer + count, " start get %lld (%d)", get_line->length, frag->pipeline.pos_transfert ); );
frag->pipeline.pos_transfert = (frag->pipeline.pos_transfert + 1) % GM_PIPELINE_DEPTH;
} else if( 1 == onlyifget ) goto check_completion_status;
/* register the next segment */
reg_line = &(frag->pipeline.lines[frag->pipeline.pos_register]);
length = frag->frag_recv.frag_base.frag_size - frag->frag_bytes_processed;
if( (0 != length) && !(reg_line->flags & PTL_GM_PIPELINE_REGISTER) ) {
reg_line->hdr_flags = get_line->hdr_flags;
reg_line->offset = get_line->offset + get_line->length;
reg_line->length = length;
if( reg_line->length > mca_ptl_gm_component.gm_rdma_frag_size )
reg_line->length = mca_ptl_gm_component.gm_rdma_frag_size;
reg_line->local_memory.lval = 0L;
reg_line->local_memory.pval = (char*)frag->frag_recv.frag_base.frag_addr +
reg_line->offset;
status = mca_ptl_gm_register_memory( peer->peer_ptl->gm_port, reg_line->local_memory.pval,
reg_line->length );
if( GM_SUCCESS != status ) {
opal_output( 0, "Cannot register receiver memory (%p, %ld) bytes offset %ld\n",
reg_line->local_memory.pval, reg_line->length, reg_line->offset );
return OMPI_ERROR;
}
DO_DEBUG( count += sprintf( buffer + count, " start register %lld offset %lld processed %lld(%d)",
reg_line->length, reg_line->offset, frag->frag_bytes_processed,
frag->pipeline.pos_register ); );
reg_line->flags |= PTL_GM_PIPELINE_REGISTER;
frag->frag_bytes_processed += reg_line->length;
frag->pipeline.pos_register = (frag->pipeline.pos_register + 1) % GM_PIPELINE_DEPTH;
}
/* deregister the previous one */
dereg_line = &(frag->pipeline.lines[frag->pipeline.pos_deregister]);
if( dereg_line->flags & PTL_GM_PIPELINE_DEREGISTER ) { /* something usefull */
status = mca_ptl_gm_deregister_memory( peer->peer_ptl->gm_port,
dereg_line->local_memory.pval, dereg_line->length );
if( GM_SUCCESS != status ) {
opal_output( 0, "unpinning receiver memory from get (%p, %u) failed \n",
dereg_line->local_memory.pval,
dereg_line->length );
}
dereg_line->flags ^= (PTL_GM_PIPELINE_DEREGISTER|PTL_GM_PIPELINE_REGISTER);
assert( dereg_line->flags == 0 );
frag->frag_bytes_validated += dereg_line->length;
DO_DEBUG( count += sprintf( buffer + count, " start deregister %lld offset %lld (%d)", dereg_line->length,
dereg_line->offset, frag->pipeline.pos_deregister ); )
frag->pipeline.pos_deregister = (frag->pipeline.pos_deregister + 1) % GM_PIPELINE_DEPTH;
}
check_completion_status:
if( frag->frag_recv.frag_base.frag_size <= frag->frag_bytes_validated ) {
peer->peer_ptl->super.ptl_recv_progress( (mca_ptl_base_module_t*)peer->peer_ptl,
frag->frag_recv.frag_request, frag->frag_recv.frag_base.frag_size,
frag->frag_recv.frag_base.frag_size );
OMPI_FREE_LIST_RETURN( &(peer->peer_ptl->gm_recv_frags_free), (opal_list_item_t*)frag );
DO_DEBUG( count += sprintf( buffer + count, " finish" ); )
}
DO_DEBUG( opal_output( 0, "receiver %d %s", orte_process_info.my_name->vpid, buffer ); )
return OMPI_SUCCESS;
}
static inline
int mca_ptl_gm_sender_advance_pipeline( mca_ptl_gm_send_frag_t* frag )
{
mca_ptl_gm_peer_t* peer;
gm_status_t status;
mca_ptl_gm_pipeline_line_t *send_line, *reg_line, *dereg_line;
mca_ptl_gm_frag_header_t* hdr;
DO_DEBUG( int count = 0; char buffer[256]; )
peer = (mca_ptl_gm_peer_t*)frag->frag_send.frag_base.frag_peer;
DO_DEBUG( count = sprintf( buffer, " %p", (void*)frag ); )
/* send current segment */
send_line = &(frag->pipeline.lines[frag->pipeline.pos_transfert]);
if( (send_line->flags & PTL_GM_PIPELINE_TRANSFERT) == PTL_GM_PIPELINE_TRANSFERT ) {
opal_list_item_t* item;
int32_t rc;
OMPI_FREE_LIST_WAIT( &(peer->peer_ptl->gm_send_dma_frags), item, rc );
opal_atomic_sub( &(peer->peer_ptl->num_send_tokens), 1 );
hdr = (mca_ptl_gm_frag_header_t*)item;
hdr->hdr_frag.hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG;
hdr->hdr_frag.hdr_common.hdr_flags = send_line->hdr_flags |
frag->frag_send.frag_base.frag_header.hdr_common.hdr_flags;
hdr->hdr_frag.hdr_src_ptr.lval = 0L; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */
hdr->hdr_frag.hdr_src_ptr.pval = frag;
hdr->hdr_frag.hdr_dst_ptr = frag->frag_send.frag_base.frag_header.hdr_ack.hdr_dst_match;
hdr->hdr_frag.hdr_frag_offset = send_line->offset;
hdr->hdr_frag.hdr_frag_length = send_line->length;
hdr->registered_memory = send_line->local_memory;
gm_send_with_callback( peer->peer_ptl->gm_port, hdr,
GM_SIZE, sizeof(mca_ptl_gm_frag_header_t),
GM_HIGH_PRIORITY, peer->peer_addr.local_id, peer->peer_addr.port_id,
mca_ptl_gm_basic_frag_callback, (void*)hdr );
send_line->flags ^= PTL_GM_PIPELINE_REMOTE;
frag->pipeline.pos_transfert = (frag->pipeline.pos_transfert + 1) % GM_PIPELINE_DEPTH;
DO_DEBUG( count += sprintf( buffer + count, " send new fragment %lld", send_line->length ); )
}
/* deregister previous segment */
dereg_line = &(frag->pipeline.lines[frag->pipeline.pos_deregister]);
if( dereg_line->flags & PTL_GM_PIPELINE_DEREGISTER ) { /* something usefull */
status = mca_ptl_gm_deregister_memory( peer->peer_ptl->gm_port,
dereg_line->local_memory.pval, dereg_line->length );
if( GM_SUCCESS != status ) {
opal_output( 0, "unpinning receiver memory from get (%p, %u) failed \n",
dereg_line->local_memory.pval, dereg_line->length );
}
dereg_line->flags ^= (PTL_GM_PIPELINE_REGISTER | PTL_GM_PIPELINE_DEREGISTER);
assert( dereg_line->flags == 0 );
frag->frag_bytes_validated += dereg_line->length;
frag->pipeline.pos_deregister = (frag->pipeline.pos_deregister + 1) % GM_PIPELINE_DEPTH;
DO_DEBUG( count += sprintf( buffer + count, " start deregister %lld offset %lld (validated %lld)",
dereg_line->length, dereg_line->offset, frag->frag_bytes_validated ); )
}
/* register next segment */
reg_line = &(frag->pipeline.lines[frag->pipeline.pos_register]);
if( !(reg_line->flags & PTL_GM_PIPELINE_REGISTER) ) {
reg_line->length = frag->frag_send.frag_base.frag_size - frag->frag_bytes_processed;
if( 0 != reg_line->length ) {
reg_line->hdr_flags = frag->frag_send.frag_base.frag_header.hdr_common.hdr_flags;
if( reg_line->length > mca_ptl_gm_component.gm_rdma_frag_size ) {
reg_line->length = mca_ptl_gm_component.gm_rdma_frag_size;
} else {
reg_line->hdr_flags |= PTL_FLAG_GM_LAST_FRAGMENT;
}
reg_line->offset = send_line->offset + send_line->length;
reg_line->local_memory.lval = 0L;
reg_line->local_memory.pval = (char*)frag->frag_send.frag_base.frag_addr +
reg_line->offset;
status = mca_ptl_gm_register_memory( peer->peer_ptl->gm_port, reg_line->local_memory.pval,
reg_line->length );
if( GM_SUCCESS != status ) {
opal_output( 0, "Cannot register sender memory (%p, %ld) bytes offset %ld\n",
reg_line->local_memory.pval, reg_line->length, reg_line->offset );
return OMPI_ERROR;
}
reg_line->flags |= PTL_GM_PIPELINE_TRANSFERT;
frag->frag_bytes_processed += reg_line->length;
frag->pipeline.pos_register = (frag->pipeline.pos_register + 1) % GM_PIPELINE_DEPTH;
DO_DEBUG( count += sprintf( buffer + count, " start register %lld offset %lld",
reg_line->length, reg_line->offset ); )
}
}
DO_DEBUG( opal_output( 0, "sender %d %s", orte_process_info.my_name->vpid, buffer ); )
return OMPI_SUCCESS;
}
#endif /* OMPI_MCA_PTL_GM_HAVE_RDMA_GET */
static inline
int mca_ptl_gm_send_internal_rndv_header( mca_ptl_gm_peer_t *ptl_peer,
mca_ptl_gm_send_frag_t *fragment,
mca_ptl_gm_frag_header_t* hdr,
int flags )
{
struct iovec iov;
uint32_t in_size;
size_t max_data;
int32_t freeAfter;
ompi_convertor_t *convertor = &(fragment->frag_send.frag_base.frag_convertor);
iov.iov_base = (char*)hdr + sizeof(mca_ptl_gm_frag_header_t);
iov.iov_len = fragment->frag_send.frag_base.frag_size - fragment->frag_bytes_processed;
if( iov.iov_len > (mca_ptl_gm_component.gm_segment_size - sizeof(mca_ptl_gm_frag_header_t)) )
iov.iov_len = (mca_ptl_gm_component.gm_segment_size - sizeof(mca_ptl_gm_frag_header_t));
max_data = iov.iov_len;
in_size = 1;
if( ompi_convertor_pack(convertor, &(iov), &in_size, &max_data, &freeAfter) < 0)
return OMPI_ERROR;
hdr->hdr_frag.hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG;
hdr->hdr_frag.hdr_common.hdr_flags = flags;
hdr->hdr_frag.hdr_src_ptr.lval = 0L; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */
hdr->hdr_frag.hdr_src_ptr.pval = fragment;
hdr->hdr_frag.hdr_dst_ptr = fragment->frag_send.frag_request->req_peer_match;
hdr->hdr_frag.hdr_frag_offset = fragment->frag_offset + fragment->frag_bytes_processed;
hdr->hdr_frag.hdr_frag_length = fragment->frag_send.frag_base.frag_size -
fragment->frag_bytes_processed;
hdr->registered_memory.lval = 0L;
hdr->registered_memory.pval = NULL;
DO_DEBUG( opal_output( 0, "sender %d before send internal rndv header hdr_offset %lld hdr_length %lld max_data %u",
orte_process_info.my_name->vpid, hdr->hdr_frag.hdr_frag_offset, hdr->hdr_frag.hdr_frag_length, max_data ); );
gm_send_with_callback( ptl_peer->peer_ptl->gm_port, hdr, GM_SIZE,
sizeof(mca_ptl_gm_frag_header_t) + max_data,
GM_LOW_PRIORITY, ptl_peer->peer_addr.local_id, ptl_peer->peer_addr.port_id,
mca_ptl_gm_basic_frag_callback, (void *)hdr );
fragment->frag_bytes_processed += max_data;
fragment->frag_bytes_validated += max_data;
DO_DEBUG( opal_output( 0, "sender %d after send internal rndv header processed %lld, validated %lld max_data %u",
orte_process_info.my_name->vpid, fragment->frag_bytes_processed, fragment->frag_bytes_validated, max_data ); );
return OMPI_SUCCESS;
}
static inline
int mca_ptl_gm_send_burst_data( mca_ptl_gm_peer_t *ptl_peer,
mca_ptl_gm_send_frag_t *fragment,
uint32_t burst_length,
mca_ptl_base_frag_header_t* hdr,
int32_t flags )
{
int32_t freeAfter, rc;
uint32_t in_size;
size_t max_data;
struct iovec iov;
ompi_convertor_t *convertor = &(fragment->frag_send.frag_base.frag_convertor);
while( 0 < burst_length ) { /* send everything for the burst_length size */
if( NULL == hdr ) {
opal_list_item_t* item;
OMPI_FREE_LIST_WAIT( &(ptl_peer->peer_ptl->gm_send_dma_frags), item, rc );
opal_atomic_sub( &(ptl_peer->peer_ptl->num_send_tokens), 1 );
hdr = (mca_ptl_base_frag_header_t*)item;
}
iov.iov_base = (char*)hdr + sizeof(mca_ptl_base_frag_header_t);
iov.iov_len = mca_ptl_gm_component.gm_segment_size - sizeof(mca_ptl_base_frag_header_t);
if( iov.iov_len >= burst_length )
iov.iov_len = burst_length;
max_data = iov.iov_len;
in_size = 1;
if( ompi_convertor_pack(convertor, &(iov), &in_size, &max_data, &freeAfter) < 0)
return OMPI_ERROR;
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FRAG;
hdr->hdr_common.hdr_flags = flags;
hdr->hdr_src_ptr.lval = 0L; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */
hdr->hdr_src_ptr.pval = fragment;
hdr->hdr_dst_ptr = fragment->frag_send.frag_request->req_peer_match;
assert( hdr->hdr_dst_ptr.pval != NULL );
hdr->hdr_frag_offset = fragment->frag_offset + fragment->frag_bytes_processed;
hdr->hdr_frag_length = max_data;
fragment->frag_bytes_processed += max_data;
fragment->frag_bytes_validated += max_data;
burst_length -= max_data;
if( fragment->frag_send.frag_base.frag_size == fragment->frag_bytes_processed ) {
assert( 0 == burst_length );
hdr->hdr_common.hdr_flags |= PTL_FLAG_GM_LAST_FRAGMENT;
}
/* for the last piece set the header type to FIN */
gm_send_with_callback( ptl_peer->peer_ptl->gm_port, hdr, GM_SIZE,
iov.iov_len + sizeof(mca_ptl_base_frag_header_t),
GM_LOW_PRIORITY, ptl_peer->peer_addr.local_id, ptl_peer->peer_addr.port_id,
mca_ptl_gm_basic_frag_callback, (void*)hdr );
hdr = NULL; /* force to retrieve a new one on the next loop */
}
DO_DEBUG( opal_output( 0, "sender %d after burst offset %lld, processed %lld, validated %lld\n",
orte_process_info.my_name->vpid, fragment->frag_offset, fragment->frag_bytes_processed, fragment->frag_bytes_validated); );
return OMPI_SUCCESS;
}
int mca_ptl_gm_peer_send_continue( mca_ptl_gm_peer_t *ptl_peer,
mca_ptl_gm_send_frag_t *fragment,
struct mca_ptl_base_send_request_t *sendreq,
size_t offset,
size_t *size,
int flags )
{
mca_ptl_gm_frag_header_t* hdr;
uint64_t remaining_bytes, burst_length;
opal_list_item_t *item;
int rc = 0;
#if OMPI_MCA_PTL_GM_HAVE_RDMA_GET
gm_status_t status;
mca_ptl_gm_pipeline_line_t* pipeline;
#endif /* OMPI_MCA_PTL_GM_HAVE_RDMA_GET */
fragment->frag_offset = offset;
/* must update the offset after actual fragment size is determined
* before attempting to send the fragment
*/
mca_ptl_base_send_request_offset( fragment->frag_send.frag_request,
fragment->frag_send.frag_base.frag_size );
DO_DEBUG( opal_output( 0, "sender %d start new send length %ld offset %ld\n", orte_process_info.my_name->vpid, *size, offset ); )
/* The first DMA memory buffer has been alocated in same time as the fragment */
item = (opal_list_item_t*)fragment->send_buf;
hdr = (mca_ptl_gm_frag_header_t*)item;
remaining_bytes = fragment->frag_send.frag_base.frag_size - fragment->frag_bytes_processed;
if( remaining_bytes < mca_ptl_gm_component.gm_eager_limit ) {
burst_length = remaining_bytes;
} else {
#if OMPI_MCA_PTL_GM_HAVE_RDMA_GET
if( remaining_bytes < mca_ptl_gm_component.gm_rndv_burst_limit ) {
burst_length = remaining_bytes % (mca_ptl_gm_component.gm_segment_size - sizeof(mca_ptl_base_frag_header_t));
} else {
if( mca_ptl_gm_component.gm_rdma_frag_size == UINT_MAX )
burst_length = 0;
else
burst_length = remaining_bytes % mca_ptl_gm_component.gm_rdma_frag_size;
}
#else
/*burst_length = remaining_bytes % (mca_ptl_gm_component.gm_segment_size - sizeof(mca_ptl_base_frag_header_t));*/
burst_length = (mca_ptl_gm_component.gm_segment_size - sizeof(mca_ptl_base_frag_header_t));
#endif /* OMPI_MCA_PTL_GM_HAVE_RDMA_GET */
}
if( burst_length > 0 ) {
mca_ptl_gm_send_burst_data( ptl_peer, fragment, burst_length, &(hdr->hdr_frag), flags );
item = NULL; /* this buffer was already used by the mca_ptl_gm_send_burst_data function */
DO_DEBUG( opal_output( 0, "sender %d burst %ld bytes", orte_process_info.my_name->vpid, burst_length ); );
}
if( fragment->frag_send.frag_base.frag_size == fragment->frag_bytes_processed ) {
*size = fragment->frag_bytes_processed;
if( !(flags & MCA_PTL_FLAGS_ACK) ) {
ptl_peer->peer_ptl->super.ptl_send_progress( (mca_ptl_base_module_t*)ptl_peer->peer_ptl,
fragment->frag_send.frag_request,
(*size) );
OMPI_FREE_LIST_RETURN( &(ptl_peer->peer_ptl->gm_send_frags), ((opal_list_item_t*)fragment) );
}
return OMPI_SUCCESS;
}
if( NULL == item ) {
OMPI_FREE_LIST_WAIT( &(ptl_peer->peer_ptl->gm_send_dma_frags), item, rc );
opal_atomic_sub( &(ptl_peer->peer_ptl->num_send_tokens), 1 );
hdr = (mca_ptl_gm_frag_header_t*)item;
}
/* Large set of data => we have to setup a rendez-vous protocol. Here we can
* use the match header already filled in by the upper level and just complete it
* with the others informations. When we reach this point the rendez-vous protocol
* has already been realized so we know that the receiver expect our message.
*/
#if OMPI_MCA_PTL_GM_HAVE_RDMA_GET
/* Trigger the long rendez-vous protocol only if gm_get is supported */
if( remaining_bytes > mca_ptl_gm_component.gm_rndv_burst_limit )
flags |= PTL_FLAG_GM_REQUIRE_LOCK;
#endif /* OMPI_MCA_PTL_GM_HAVE_RDMA_GET */
mca_ptl_gm_send_internal_rndv_header( ptl_peer, fragment, hdr, flags );
if( !(PTL_FLAG_GM_REQUIRE_LOCK & flags) )
return OMPI_SUCCESS;
#if OMPI_MCA_PTL_GM_HAVE_RDMA_GET
pipeline = &(fragment->pipeline.lines[0]);
pipeline->length = fragment->frag_send.frag_base.frag_size - fragment->frag_bytes_processed;
if( pipeline->length > mca_ptl_gm_component.gm_rdma_frag_size ) {
pipeline->length = mca_ptl_gm_component.gm_rdma_frag_size;
}
pipeline->offset = fragment->frag_offset + fragment->frag_bytes_processed;
pipeline->hdr_flags = fragment->frag_send.frag_base.frag_header.hdr_common.hdr_flags;
pipeline->local_memory.lval = 0L;
pipeline->local_memory.pval = (char*)fragment->frag_send.frag_base.frag_addr + pipeline->offset;
status = mca_ptl_gm_register_memory( ptl_peer->peer_ptl->gm_port, pipeline->local_memory.pval,
pipeline->length );
if( GM_SUCCESS != status ) {
opal_output( 0, "Cannot register sender memory (%p, %ld) bytes offset %ld\n",
pipeline->local_memory.pval, pipeline->length, pipeline->offset );
}
pipeline->flags = PTL_GM_PIPELINE_TRANSFERT;
fragment->frag_bytes_processed += pipeline->length;
DO_DEBUG( opal_output( 0, "sender %d %p start register %lld (%d)", orte_process_info.my_name->vpid,
fragment, pipeline->length, fragment->pipeline.pos_register ); )
fragment->pipeline.pos_register = (fragment->pipeline.pos_register + 1) % GM_PIPELINE_DEPTH;
/* Now we are waiting for the ack message. Meanwhile we can register the sender first piece
* of data. In this way we have a recovery between the expensive registration on both sides.
*/
#else
assert( 0 );
#endif /* OMPI_MCA_PTL_GM_HAVE_RDMA_GET */
return OMPI_SUCCESS;
}
static void send_match_callback( struct gm_port* port, void* context, gm_status_t status )
{
mca_ptl_gm_module_t* gm_ptl;
mca_ptl_base_header_t* header = (mca_ptl_base_header_t*)context;
gm_ptl = (mca_ptl_gm_module_t*)((long)header->hdr_rndv.hdr_frag_length);
OMPI_GM_FREE_LIST_RETURN( &(gm_ptl->gm_send_dma_frags), ((opal_list_item_t*)header) );
/* release the send token */
opal_atomic_add( &(gm_ptl->num_send_tokens), 1 );
}
/* This function is used for the initial send. For small size messages the data will be attached
* to the header, when for long size messages we will setup a rendez-vous protocol. We dont need
* to fill a fragment description here as all that we need is the request pointer. In same time
* even if we fill a fragment it will be lost as soon as we get the answer from the remote node
* and we will be unable to reuse any informations stored inside (like the convertor).
*/
int mca_ptl_gm_peer_send( struct mca_ptl_base_module_t* ptl,
struct mca_ptl_base_peer_t* ptl_base_peer,
struct mca_ptl_base_send_request_t *sendreq,
size_t offset,
size_t size,
int flags )
{
const int header_length = sizeof(mca_ptl_base_rendezvous_header_t);
mca_ptl_base_header_t* hdr;
mca_ptl_gm_module_t* ptl_gm = (mca_ptl_gm_module_t*)ptl;
ompi_convertor_t *convertor = NULL;
int rc, freeAfter;
size_t max_data = 0;
mca_ptl_gm_peer_t* ptl_peer = (mca_ptl_gm_peer_t*)ptl_base_peer;
opal_list_item_t *item;
char* sendbuf;
OMPI_FREE_LIST_WAIT( &(ptl_gm->gm_send_dma_frags), item, rc );
opal_atomic_sub( &(ptl_gm->num_send_tokens), 1 );
sendbuf = (char*)item;
hdr = (mca_ptl_base_header_t*)item;
/* Populate the header with the match informations */
(void)mca_ptl_gm_init_header_rndv( hdr, sendreq, flags );
hdr->hdr_rndv.hdr_frag_length = (uint64_t)((long)ptl);
if( size > 0 ) {
struct iovec iov;
uint32_t iov_count = 1;
convertor = &sendreq->req_send.req_convertor;
/* We send here the first fragment, and the convertor does not need any
* particular options. Thus, we can use the one already prepared on the
* request.
*/
if( (size + header_length) <= mca_ptl_gm_component.gm_segment_size )
iov.iov_len = size;
else
iov.iov_len = mca_ptl_gm_component.gm_segment_size - header_length;
/* copy the data to the registered buffer */
iov.iov_base = ((char*)hdr) + header_length;
max_data = iov.iov_len;
if((rc = ompi_convertor_pack(convertor, &(iov), &iov_count, &max_data, &freeAfter)) < 0)
return OMPI_ERROR;
assert( max_data != 0 );
/* must update the offset after actual fragment size is determined
* before attempting to send the fragment
*/
mca_ptl_base_send_request_offset( sendreq, max_data );
}
/* Send the first fragment */
gm_send_with_callback( ptl_gm->gm_port, hdr,
GM_SIZE, max_data + header_length, GM_LOW_PRIORITY,
ptl_peer->peer_addr.local_id, ptl_peer->peer_addr.port_id,
send_match_callback, (void *)hdr );
if( !(flags & MCA_PTL_FLAGS_ACK) ) {
ptl->ptl_send_progress( ptl, sendreq, max_data );
DO_DEBUG( opal_output( 0, "sender %d complete request %p w/o rndv with %d bytes",
orte_process_info.my_name->vpid, sendreq, max_data ); );
} else {
DO_DEBUG( opal_output( 0, "sender %d sent request %p for rndv with %d bytes",
orte_process_info.my_name->vpid, sendreq, max_data ); );
}
return OMPI_SUCCESS;
}
static mca_ptl_gm_recv_frag_t*
mca_ptl_gm_recv_frag_ctrl( struct mca_ptl_gm_module_t *ptl,
mca_ptl_base_header_t * header, uint32_t msg_len )
{
mca_ptl_base_send_request_t *req;
assert( MCA_PTL_FLAGS_ACK & header->hdr_common.hdr_flags );
req = (mca_ptl_base_send_request_t*)(header->hdr_ack.hdr_src_ptr.pval);
req->req_peer_match = header->hdr_ack.hdr_dst_match;
req->req_peer_addr = header->hdr_ack.hdr_dst_addr;
req->req_peer_size = header->hdr_ack.hdr_dst_size;
DO_DEBUG( opal_output( 0, "sender %d get back the rendez-vous for request %p",
orte_process_info.my_name->vpid, req ); );
ptl->super.ptl_send_progress( (mca_ptl_base_module_t*)ptl, req, req->req_offset );
return NULL;
}
/* We get a RNDV header in two situations:
* - when the remote node need a ack
* - when we set a rendez-vous protocol with the remote node.
* In both cases we have to send an ack back.
*/
static mca_ptl_gm_recv_frag_t*
mca_ptl_gm_recv_frag_match( struct mca_ptl_gm_module_t *ptl,
mca_ptl_base_header_t* hdr, uint32_t msg_len )
{
mca_ptl_gm_recv_frag_t* recv_frag;
bool matched;
/* allocate a receive fragment */
recv_frag = mca_ptl_gm_alloc_recv_frag( (struct mca_ptl_base_module_t*)ptl );
if( MCA_PTL_HDR_TYPE_MATCH == hdr->hdr_rndv.hdr_match.hdr_common.hdr_type ) {
recv_frag->frag_recv.frag_base.frag_addr =
(char*)hdr + sizeof(mca_ptl_base_match_header_t);
recv_frag->frag_recv.frag_base.frag_size = hdr->hdr_match.hdr_msg_length;
} else {
assert( MCA_PTL_HDR_TYPE_RNDV == hdr->hdr_rndv.hdr_match.hdr_common.hdr_type );
recv_frag->frag_recv.frag_base.frag_addr =
(char*)hdr + sizeof(mca_ptl_base_rendezvous_header_t);
recv_frag->frag_recv.frag_base.frag_size = hdr->hdr_rndv.hdr_match.hdr_msg_length;
}
recv_frag->frag_recv.frag_is_buffered = false;
recv_frag->have_allocated_buffer = false;
recv_frag->attached_data_length = msg_len - sizeof(mca_ptl_base_rendezvous_header_t);
recv_frag->frag_recv.frag_base.frag_peer = NULL;
recv_frag->frag_recv.frag_base.frag_header.hdr_rndv = hdr->hdr_rndv;
matched = ptl->super.ptl_match( &(ptl->super),
&(recv_frag->frag_recv),
&(recv_frag->frag_recv.frag_base.frag_header.hdr_match) );
if( true == matched ) return NULL; /* done and fragment already removed */
/* get some memory and copy the data inside. We can then release the receive buffer */
if( 0 != recv_frag->attached_data_length ) {
char* ptr = (char*)gm_get_local_buffer();
recv_frag->have_allocated_buffer = true;
memcpy( ptr, recv_frag->frag_recv.frag_base.frag_addr, recv_frag->attached_data_length );
recv_frag->frag_recv.frag_base.frag_addr = ptr;
} else {
recv_frag->frag_recv.frag_base.frag_addr = NULL;
}
recv_frag->matched = false;
return recv_frag;
}
static void recv_short_callback( struct gm_port* port, void* context, gm_status_t status )
{
mca_ptl_gm_module_t* gm_ptl;
mca_ptl_base_frag_t* frag_base;
mca_ptl_base_ack_header_t* header;
header = (mca_ptl_base_ack_header_t*)context;
frag_base = (mca_ptl_base_frag_t*)header->hdr_dst_match.pval;
gm_ptl = (mca_ptl_gm_module_t *)frag_base->frag_owner;
OMPI_GM_FREE_LIST_RETURN( &(gm_ptl->gm_send_dma_frags), ((opal_list_item_t*)header) );
/* release the send token */
opal_atomic_add( &(gm_ptl->num_send_tokens), 1 );
}
static int mca_ptl_gm_send_quick_fin_message( struct mca_ptl_gm_peer_t* ptl_peer,
struct mca_ptl_base_frag_t* frag )
{
opal_list_item_t *item;
mca_ptl_base_header_t *hdr;
int rc;
OMPI_FREE_LIST_WAIT( &(ptl_peer->peer_ptl->gm_send_dma_frags), item, rc );
opal_atomic_sub( &(ptl_peer->peer_ptl->num_send_tokens), 1 );
hdr = (mca_ptl_base_header_t*)item;
hdr->hdr_common.hdr_type = MCA_PTL_HDR_TYPE_FIN;
hdr->hdr_common.hdr_flags = PTL_FLAG_GM_HAS_FRAGMENT | frag->frag_header.hdr_common.hdr_flags;
hdr->hdr_ack.hdr_src_ptr.pval = frag->frag_header.hdr_frag.hdr_src_ptr.pval;
hdr->hdr_ack.hdr_dst_match.lval = 0;
hdr->hdr_ack.hdr_dst_match.pval = frag;
hdr->hdr_ack.hdr_dst_addr.lval = 0; /*we are filling both p and val of dest address */
hdr->hdr_ack.hdr_dst_addr.pval = NULL;
hdr->hdr_ack.hdr_dst_size = frag->frag_header.hdr_frag.hdr_frag_length;
gm_send_with_callback(ptl_peer->peer_ptl->gm_port, hdr,
GM_SIZE, sizeof(mca_ptl_base_ack_header_t),
GM_HIGH_PRIORITY, ptl_peer->peer_addr.local_id, ptl_peer->peer_addr.port_id,
recv_short_callback, (void*)hdr );
DO_DEBUG( opal_output( 0, "receiver %d %p send quick message for length %lld", orte_process_info.my_name->vpid,
frag, frag->frag_header.hdr_frag.hdr_frag_length ); )
return OMPI_SUCCESS;
}
static mca_ptl_gm_recv_frag_t*
mca_ptl_gm_recv_frag_frag( struct mca_ptl_gm_module_t* ptl,
mca_ptl_gm_frag_header_t* hdr, uint32_t msg_len )
{
mca_ptl_base_recv_request_t *request;
ompi_convertor_t local_convertor, *convertor;
struct iovec iov;
uint32_t iov_count, header_length;
size_t max_data = 0;
int32_t freeAfter, rc;
mca_ptl_gm_recv_frag_t* frag;
header_length = sizeof(mca_ptl_base_frag_header_t);
if( hdr->hdr_frag.hdr_common.hdr_flags & PTL_FLAG_GM_HAS_FRAGMENT ) {
frag = (mca_ptl_gm_recv_frag_t*)hdr->hdr_frag.hdr_dst_ptr.pval;
frag->frag_recv.frag_base.frag_header.hdr_frag = hdr->hdr_frag;
request = (mca_ptl_base_recv_request_t*)frag->frag_recv.frag_request;
/* here we can have a synchronisation problem if several threads work in same time
* with the same request. The only question is if it's possible ?
*/
convertor = &(frag->frag_recv.frag_base.frag_convertor);
DO_DEBUG( opal_output( 0, "receiver %d get message tagged as HAS_FRAGMENT", orte_process_info.my_name->vpid ); );
if( PTL_FLAG_GM_REQUIRE_LOCK & hdr->hdr_frag.hdr_common.hdr_flags )
header_length = sizeof(mca_ptl_gm_frag_header_t);
} else {
request = (mca_ptl_base_recv_request_t*)hdr->hdr_frag.hdr_dst_ptr.pval;
if( hdr->hdr_frag.hdr_frag_length <= (mca_ptl_gm_component.gm_segment_size -
sizeof(mca_ptl_base_frag_header_t)) ) {
convertor = &local_convertor;
request->req_recv.req_base.req_proc =
ompi_comm_peer_lookup( request->req_recv.req_base.req_comm,
request->req_recv.req_base.req_ompi.req_status.MPI_SOURCE );
frag = NULL;
} else { /* large message => we have to create a receive fragment */
frag = mca_ptl_gm_alloc_recv_frag( (struct mca_ptl_base_module_t*)ptl );
frag->frag_recv.frag_request = request;
frag->frag_offset = hdr->hdr_frag.hdr_frag_offset;
frag->matched = true;
frag->frag_recv.frag_base.frag_addr = frag->frag_recv.frag_request->req_recv.req_base.req_addr;
frag->frag_recv.frag_base.frag_size = hdr->hdr_frag.hdr_frag_length;
frag->frag_recv.frag_base.frag_peer = (struct mca_ptl_base_peer_t*)
mca_pml_teg_proc_lookup_remote_peer( request->req_recv.req_base.req_comm,
request->req_recv.req_base.req_ompi.req_status.MPI_SOURCE,
(struct mca_ptl_base_module_t*)ptl );
/* send an ack message to the sender ... quick hack (TODO) */
frag->frag_recv.frag_base.frag_header.hdr_frag = hdr->hdr_frag;
frag->frag_recv.frag_base.frag_header.hdr_frag.hdr_frag_length = 0;
mca_ptl_gm_send_quick_fin_message( (mca_ptl_gm_peer_t*)frag->frag_recv.frag_base.frag_peer,
&(frag->frag_recv.frag_base) );
header_length = sizeof(mca_ptl_gm_frag_header_t);
frag->frag_recv.frag_base.frag_header.hdr_frag.hdr_frag_length = hdr->hdr_frag.hdr_frag_length;
convertor = &(frag->frag_recv.frag_base.frag_convertor);
DO_DEBUG( opal_output( 0, "receiver %d create fragment with offset %lld and length %lld",
orte_process_info.my_name->vpid, frag->frag_offset, frag->frag_recv.frag_base.frag_size ); );
}
/* GM does not use any of the convertor specializations, so we can just clone the
* standard convertor attached to the request and set the position.
*/
ompi_convertor_clone_with_position( &(request->req_recv.req_convertor),
convertor, 1,
&(hdr->hdr_frag.hdr_frag_offset) );
}
if( header_length != msg_len ) {
iov.iov_base = (char*)hdr + header_length;
iov.iov_len = msg_len - header_length;
iov_count = 1;
max_data = iov.iov_len;
freeAfter = 0; /* unused here */
rc = ompi_convertor_unpack( convertor, &iov, &iov_count, &max_data, &freeAfter );
assert( 0 == freeAfter );
/* If we are in a short burst mode then update the request */
if( NULL == frag ) {
ptl->super.ptl_recv_progress( (mca_ptl_base_module_t*)ptl, request, max_data, max_data );
return NULL;
}
}
/* Update the status of the fragment depending on the amount of data converted so far */
frag->frag_bytes_processed += max_data;
frag->frag_bytes_validated += max_data;
if( !(PTL_FLAG_GM_REQUIRE_LOCK & hdr->hdr_frag.hdr_common.hdr_flags) ) {
if( frag->frag_bytes_validated == frag->frag_recv.frag_base.frag_size ) {
ptl->super.ptl_recv_progress( (mca_ptl_base_module_t*)ptl, request,
frag->frag_recv.frag_base.frag_size,
frag->frag_recv.frag_base.frag_size );
OMPI_FREE_LIST_RETURN( &(((mca_ptl_gm_peer_t*)frag->frag_recv.frag_base.frag_peer)->peer_ptl->gm_recv_frags_free), (opal_list_item_t*)frag );
}
DO_DEBUG( opal_output( 0, "receiver %d waiting for burst with fragment ...", orte_process_info.my_name->vpid ); );
return NULL;
}
#if OMPI_MCA_PTL_GM_HAVE_RDMA_GET
{
mca_ptl_gm_pipeline_line_t* pipeline;
/* There is a kind of rendez-vous protocol used internally by the GM driver. If the amount of data
* to transfert is large enough, then the sender will start sending a frag message with the
* remote_memory set to NULL (but with the length set to the length of the first fragment).
* It will allow the receiver to start to register it's own memory. Later when the receiver
* get a fragment with the remote_memory field not NULL it can start getting the data.
*/
if( NULL == hdr->registered_memory.pval ) { /* first round of the local rendez-vous protocol */
pipeline = &(frag->pipeline.lines[0]);
pipeline->hdr_flags = hdr->hdr_frag.hdr_common.hdr_flags;
pipeline->offset = frag->frag_offset + frag->frag_bytes_processed;
pipeline->length = 0; /* we can lie about this one */
mca_ptl_gm_receiver_advance_pipeline( frag, 0 );
} else {
pipeline = &(frag->pipeline.lines[frag->pipeline.pos_remote]);
DO_DEBUG( opal_output( 0, "receiver %d %p get remote memory length %lld (%d)\n",
orte_process_info.my_name->vpid, frag, hdr->hdr_frag.hdr_frag_length, frag->pipeline.pos_remote ); );
frag->pipeline.pos_remote = (frag->pipeline.pos_remote + 1) % GM_PIPELINE_DEPTH;
assert( (pipeline->flags & PTL_GM_PIPELINE_REMOTE) == 0 );
pipeline->remote_memory = hdr->registered_memory;
pipeline->flags |= PTL_GM_PIPELINE_REMOTE;
mca_ptl_gm_receiver_advance_pipeline( frag, 0 );
}
}
#else
assert( 0 );
#endif /* OMPI_MCA_PTL_GM_HAVE_RDMA_GET */
return NULL;
}
static mca_ptl_gm_recv_frag_t*
mca_ptl_gm_recv_frag_fin( struct mca_ptl_gm_module_t* ptl,
mca_ptl_base_header_t* hdr, uint32_t msg_len )
{
mca_ptl_gm_send_frag_t* frag;
frag = (mca_ptl_gm_send_frag_t*)hdr->hdr_ack.hdr_src_ptr.pval;
frag->frag_send.frag_base.frag_header.hdr_common.hdr_flags = hdr->hdr_common.hdr_flags;
frag->frag_send.frag_base.frag_header.hdr_ack.hdr_dst_match = hdr->hdr_ack.hdr_dst_match;
frag->frag_send.frag_request->req_peer_match = hdr->hdr_ack.hdr_dst_match;
if( PTL_FLAG_GM_REQUIRE_LOCK & hdr->hdr_common.hdr_flags ) {
#if OMPI_MCA_PTL_GM_HAVE_RDMA_GET
if( 0 == hdr->hdr_ack.hdr_dst_size ) {
DO_DEBUG( opal_output( 0, "sender %d %p get FIN message (initial)", orte_process_info.my_name->vpid, frag ); );
/* I just receive the ack for the first fragment => setup the pipeline */
mca_ptl_gm_sender_advance_pipeline( frag );
} else {
/* mark the memory as ready to be deregistered */
frag->pipeline.lines[frag->pipeline.pos_deregister].flags |= PTL_GM_PIPELINE_DEREGISTER;
DO_DEBUG( opal_output( 0, "sender %d %p get FIN message (%d)", orte_process_info.my_name->vpid, frag, frag->pipeline.pos_deregister ); );
}
/* continue the pipeline ... send the next segment */
mca_ptl_gm_sender_advance_pipeline( frag );
#else
assert( 0 );
#endif /* OMPI_MCA_PTL_GM_HAVE_RDMA_GET */
} else {
DO_DEBUG( opal_output( 0, "sender %d burst data after rendez-vous protocol", orte_process_info.my_name->vpid ); );
/* do a burst but with the remote fragment as we just get it from the message */
mca_ptl_gm_send_burst_data( (mca_ptl_gm_peer_t*)frag->frag_send.frag_base.frag_peer, frag,
frag->frag_send.frag_base.frag_size - frag->frag_bytes_validated,
NULL, hdr->hdr_common.hdr_flags );
}
if( frag->frag_send.frag_base.frag_size == frag->frag_bytes_validated ) {
DO_DEBUG( opal_output( 0, "sender %d complete send operation", orte_process_info.my_name->vpid ); );
ptl->super.ptl_send_progress( (mca_ptl_base_module_t*)ptl,
frag->frag_send.frag_request,
frag->frag_bytes_validated );
OMPI_FREE_LIST_RETURN( &(ptl->gm_send_frags), (opal_list_item_t*)frag );
}
return NULL;
}
void mca_ptl_gm_outstanding_recv( struct mca_ptl_gm_module_t *ptl )
{
mca_ptl_gm_recv_frag_t * frag = NULL;
int size;
bool matched;
size = opal_list_get_size (&ptl->gm_recv_outstanding_queue);
if (size > 0) {
frag = (mca_ptl_gm_recv_frag_t *)
opal_list_remove_first( (opal_list_t *)&(ptl->gm_recv_outstanding_queue) );
matched = ptl->super.ptl_match( &(ptl->super), &(frag->frag_recv),
&(frag->frag_recv.frag_base.frag_header.hdr_match) );
if(!matched) {
opal_list_append((opal_list_t *)&(ptl->gm_recv_outstanding_queue),
(opal_list_item_t *) frag);
} else {
/* if allocated buffer, free the buffer */
/* return the recv descriptor to the free list */
OMPI_FREE_LIST_RETURN(&(ptl->gm_recv_frags_free), (opal_list_item_t *)frag);
}
}
}
frag_management_fct_t* frag_management_fct[MCA_PTL_HDR_TYPE_MAX] = {
NULL, /* empty no header type equal to zero */
NULL, /* mca_ptl_gm_recv_frag_match, */
mca_ptl_gm_recv_frag_match,
(frag_management_fct_t*)mca_ptl_gm_recv_frag_frag, /* force the conversion to remove a warning */
mca_ptl_gm_recv_frag_ctrl,
NULL,
NULL,
mca_ptl_gm_recv_frag_fin,
NULL };
int mca_ptl_gm_analyze_recv_event( struct mca_ptl_gm_module_t* ptl, gm_recv_event_t* event )
{
mca_ptl_base_header_t *header = NULL, *release_buf;
frag_management_fct_t* function;
uint32_t priority = GM_HIGH_PRIORITY, msg_len;
release_buf = (mca_ptl_base_header_t*)gm_ntohp(event->recv.buffer);
switch (gm_ntohc(event->recv.type)) {
case GM_FAST_RECV_EVENT:
case GM_FAST_PEER_RECV_EVENT:
priority = GM_LOW_PRIORITY;
case GM_FAST_HIGH_RECV_EVENT:
case GM_FAST_HIGH_PEER_RECV_EVENT:
header = (mca_ptl_base_header_t *)gm_ntohp(event->recv.message);
break;
case GM_RECV_EVENT:
case GM_PEER_RECV_EVENT:
priority = GM_LOW_PRIORITY;
case GM_HIGH_RECV_EVENT:
case GM_HIGH_PEER_RECV_EVENT:
header = release_buf;
break;
case GM_NO_RECV_EVENT:
default:
gm_unknown(ptl->gm_port, event);
return 1;
}
assert( header->hdr_common.hdr_type < MCA_PTL_HDR_TYPE_MAX );
function = frag_management_fct[header->hdr_common.hdr_type];
assert( NULL != function );
msg_len = gm_ntohl( event->recv.length );
(void)function( ptl, header, msg_len );
gm_provide_receive_buffer( ptl->gm_port, release_buf, GM_SIZE, priority );
return 0;
}
void mca_ptl_gm_dump_header( char* str, mca_ptl_base_header_t* hdr )
{
switch( hdr->hdr_common.hdr_type ) {
case MCA_PTL_HDR_TYPE_MATCH:
goto print_match_hdr;
case MCA_PTL_HDR_TYPE_RNDV:
goto print_rndv_hdr;
case MCA_PTL_HDR_TYPE_FRAG:
goto print_frag_hdr;
case MCA_PTL_HDR_TYPE_ACK:
goto print_ack_hdr;
case MCA_PTL_HDR_TYPE_NACK:
goto print_ack_hdr;
case MCA_PTL_HDR_TYPE_GET:
goto print_match_hdr;
case MCA_PTL_HDR_TYPE_FIN:
goto print_ack_hdr;
case MCA_PTL_HDR_TYPE_FIN_ACK:
goto print_match_hdr;
default:
opal_output( 0, "unknown header of type %d\n", hdr->hdr_common.hdr_type );
}
return;
print_ack_hdr:
opal_output( 0, "%s hdr_common hdr_type %d hdr_flags %x\nack header hdr_src_ptr (lval %lld, pval %p)\n hdr_dst_match (lval %lld pval %p)\n hdr_dst_addr (lval %lld pval %p)\n hdr_dst_size %lld\n",
str, hdr->hdr_common.hdr_type, hdr->hdr_common.hdr_flags,
hdr->hdr_ack.hdr_src_ptr.lval, hdr->hdr_ack.hdr_src_ptr.pval,
hdr->hdr_ack.hdr_dst_match.lval, hdr->hdr_ack.hdr_dst_match.pval,
hdr->hdr_ack.hdr_dst_addr.lval, hdr->hdr_ack.hdr_dst_addr.pval, hdr->hdr_ack.hdr_dst_size );
return;
print_frag_hdr:
opal_output( 0, "%s hdr_common hdr_type %d hdr_flags %x\nfrag header hdr_frag_length %lld hdr_frag_offset %lld\n hdr_src_ptr (lval %lld, pval %p)\n hdr_dst_ptr (lval %lld, pval %p)\n",
str, hdr->hdr_common.hdr_type, hdr->hdr_common.hdr_flags,
hdr->hdr_frag.hdr_frag_length, hdr->hdr_frag.hdr_frag_offset, hdr->hdr_frag.hdr_src_ptr.lval,
hdr->hdr_frag.hdr_src_ptr.pval, hdr->hdr_frag.hdr_dst_ptr.lval, hdr->hdr_frag.hdr_dst_ptr.pval );
return;
print_match_hdr:
opal_output( 0, "%s hdr_common hdr_type %d hdr_flags %x\nmatch header hdr_contextid %d hdr_src %d hdr_dst %d hdr_tag %d\n hdr_msg_length %lld hdr_msg_seq %d\n",
str, hdr->hdr_common.hdr_type, hdr->hdr_common.hdr_flags,
hdr->hdr_match.hdr_contextid, hdr->hdr_match.hdr_src, hdr->hdr_match.hdr_dst,
hdr->hdr_match.hdr_tag, hdr->hdr_match.hdr_msg_length, hdr->hdr_match.hdr_msg_seq );
return;
print_rndv_hdr:
opal_output( 0, "%s hdr_common hdr_type %d hdr_flags %x\nrndv header hdr_contextid %d hdr_src %d hdr_dst %d hdr_tag %d\n hdr_msg_length %lld hdr_msg_seq %d\n hdr_frag_length %lld hdr_src_ptr (lval %lld, pval %p)\n",
str, hdr->hdr_common.hdr_type, hdr->hdr_common.hdr_flags,
hdr->hdr_rndv.hdr_match.hdr_contextid, hdr->hdr_rndv.hdr_match.hdr_src,
hdr->hdr_rndv.hdr_match.hdr_dst, hdr->hdr_rndv.hdr_match.hdr_tag,
hdr->hdr_rndv.hdr_match.hdr_msg_length, hdr->hdr_rndv.hdr_match.hdr_msg_seq,
hdr->hdr_rndv.hdr_frag_length, hdr->hdr_rndv.hdr_src_ptr.lval, hdr->hdr_rndv.hdr_src_ptr.pval);
return;
}