2005-11-22 20:24:47 +03:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
2010-07-22 00:07:00 +04:00
|
|
|
* Copyright (c) 2004-2010 The University of Tennessee and The University
|
2005-11-22 20:24:47 +03:00
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
2006-03-28 02:44:26 +04:00
|
|
|
* Copyright (c) 2004-2006 The Regents of the University of California.
|
2005-11-22 20:24:47 +03:00
|
|
|
* All rights reserved.
|
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
/**
|
|
|
|
* @file
|
|
|
|
*/
|
|
|
|
#ifndef OMPI_PML_DR_RECV_REQUEST_H
|
|
|
|
#define OMPI_PML_DR_RECV_REQUEST_H
|
|
|
|
|
2005-12-21 00:42:58 +03:00
|
|
|
#include "ompi_config.h"
|
2006-02-12 04:33:29 +03:00
|
|
|
#include "ompi/mca/mpool/base/base.h"
|
|
|
|
#include "ompi/mca/pml/base/pml_base_recvreq.h"
|
2005-11-22 20:24:47 +03:00
|
|
|
|
2005-12-21 00:42:58 +03:00
|
|
|
#include "pml_dr.h"
|
2006-03-22 18:02:36 +03:00
|
|
|
#include "pml_dr_hdr.h"
|
2005-12-21 00:42:58 +03:00
|
|
|
#include "pml_dr_vfrag.h"
|
2006-03-22 18:02:36 +03:00
|
|
|
#include "pml_dr_comm.h"
|
2005-12-21 00:42:58 +03:00
|
|
|
|
2009-08-20 15:42:18 +04:00
|
|
|
BEGIN_C_DECLS
|
2005-11-22 20:24:47 +03:00
|
|
|
|
|
|
|
|
2006-01-26 14:55:00 +03:00
|
|
|
struct mca_pml_dr_recv_request_t {
|
2005-11-22 20:24:47 +03:00
|
|
|
mca_pml_base_recv_request_t req_recv;
|
2006-03-22 18:02:36 +03:00
|
|
|
size_t req_bytes_received;
|
|
|
|
size_t req_bytes_delivered;
|
|
|
|
bool req_acked;
|
|
|
|
|
|
|
|
/* filled in after match */
|
|
|
|
struct mca_pml_dr_comm_proc_t* req_proc;
|
2006-03-29 20:19:17 +04:00
|
|
|
struct mca_pml_dr_endpoint_t* req_endpoint;
|
2006-03-22 18:02:36 +03:00
|
|
|
opal_mutex_t* req_mutex;
|
2005-12-21 00:42:58 +03:00
|
|
|
|
2006-03-22 18:02:36 +03:00
|
|
|
/* vfrag state */
|
2005-12-21 00:42:58 +03:00
|
|
|
mca_pml_dr_vfrag_t *req_vfrag;
|
|
|
|
mca_pml_dr_vfrag_t req_vfrag0;
|
|
|
|
opal_list_t req_vfrags;
|
2005-11-22 20:24:47 +03:00
|
|
|
};
|
|
|
|
typedef struct mca_pml_dr_recv_request_t mca_pml_dr_recv_request_t;
|
|
|
|
|
|
|
|
|
|
|
|
OBJ_CLASS_DECLARATION(mca_pml_dr_recv_request_t);
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Allocate a recv request from the modules free list.
|
|
|
|
*
|
|
|
|
* @param rc (OUT) OMPI_SUCCESS or error status on failure.
|
|
|
|
* @return Receive request.
|
|
|
|
*/
|
2005-12-21 00:42:58 +03:00
|
|
|
#define MCA_PML_DR_RECV_REQUEST_ALLOC(recvreq, rc) \
|
2005-11-22 20:24:47 +03:00
|
|
|
do { \
|
2006-06-12 20:44:00 +04:00
|
|
|
ompi_free_list_item_t* item; \
|
2005-11-22 20:24:47 +03:00
|
|
|
rc = OMPI_SUCCESS; \
|
2007-07-11 02:16:38 +04:00
|
|
|
OMPI_FREE_LIST_GET(&mca_pml_base_recv_requests, item, rc); \
|
2005-12-21 00:42:58 +03:00
|
|
|
recvreq = (mca_pml_dr_recv_request_t*)item; \
|
2005-11-22 20:24:47 +03:00
|
|
|
} while(0)
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Initialize a receive request with call parameters.
|
|
|
|
*
|
|
|
|
* @param request (IN) Receive request.
|
|
|
|
* @param addr (IN) User buffer.
|
|
|
|
* @param count (IN) Number of elements of indicated datatype.
|
|
|
|
* @param datatype (IN) User defined datatype.
|
|
|
|
* @param src (IN) Source rank w/in the communicator.
|
|
|
|
* @param tag (IN) User defined tag.
|
|
|
|
* @param comm (IN) Communicator.
|
|
|
|
* @param persistent (IN) Is this a ersistent request.
|
|
|
|
*/
|
2005-12-21 00:42:58 +03:00
|
|
|
#define MCA_PML_DR_RECV_REQUEST_INIT( \
|
2005-11-22 20:24:47 +03:00
|
|
|
request, \
|
|
|
|
addr, \
|
|
|
|
count, \
|
|
|
|
datatype, \
|
|
|
|
src, \
|
|
|
|
tag, \
|
|
|
|
comm, \
|
|
|
|
persistent) \
|
|
|
|
do { \
|
|
|
|
MCA_PML_BASE_RECV_REQUEST_INIT( \
|
|
|
|
&(request)->req_recv, \
|
|
|
|
addr, \
|
|
|
|
count, \
|
|
|
|
datatype, \
|
|
|
|
src, \
|
|
|
|
tag, \
|
|
|
|
comm, \
|
|
|
|
persistent); \
|
|
|
|
} while(0)
|
|
|
|
|
2006-03-16 01:53:41 +03:00
|
|
|
/**
|
|
|
|
* Mark a recv request complete.
|
|
|
|
*
|
|
|
|
* @param request (IN) Receive request.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define MCA_PML_DR_RECV_REQUEST_PML_COMPLETE(recvreq) \
|
|
|
|
do { \
|
2006-06-13 02:09:03 +04:00
|
|
|
ompi_free_list_item_t* item; \
|
2006-03-16 01:53:41 +03:00
|
|
|
assert( false == recvreq->req_recv.req_base.req_pml_complete ); \
|
2006-03-30 02:21:35 +04:00
|
|
|
OPAL_THREAD_LOCK((recvreq)->req_mutex); \
|
2006-06-13 02:09:03 +04:00
|
|
|
while(NULL != (item = (ompi_free_list_item_t*) \
|
|
|
|
opal_list_remove_first(&(recvreq)->req_vfrags))) { \
|
2006-03-30 02:21:35 +04:00
|
|
|
OMPI_FREE_LIST_RETURN(&mca_pml_dr.vfrags, item); \
|
|
|
|
} \
|
|
|
|
OPAL_THREAD_UNLOCK((recvreq)->req_mutex); \
|
2006-03-16 01:53:41 +03:00
|
|
|
\
|
2006-06-13 02:09:03 +04:00
|
|
|
opal_list_remove_item(&(recvreq)->req_proc->matched_receives, \
|
|
|
|
(opal_list_item_t*)(recvreq)); \
|
2006-03-30 02:21:35 +04:00
|
|
|
\
|
2006-03-16 01:53:41 +03:00
|
|
|
/* initialize request status */ \
|
|
|
|
recvreq->req_recv.req_base.req_pml_complete = true; \
|
2008-05-30 07:58:39 +04:00
|
|
|
if (recvreq->req_bytes_received > recvreq->req_bytes_delivered) { \
|
2010-07-22 00:07:00 +04:00
|
|
|
recvreq->req_recv.req_base.req_ompi.req_status._ucount = \
|
2008-05-30 07:58:39 +04:00
|
|
|
recvreq->req_bytes_delivered; \
|
|
|
|
recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR = \
|
|
|
|
MPI_ERR_TRUNCATE; \
|
|
|
|
} else { \
|
2010-07-22 00:07:00 +04:00
|
|
|
recvreq->req_recv.req_base.req_ompi.req_status._ucount = \
|
2008-05-30 07:58:39 +04:00
|
|
|
recvreq->req_bytes_received; \
|
|
|
|
} \
|
|
|
|
ompi_request_complete( &(recvreq->req_recv.req_base.req_ompi), true ); \
|
2006-03-16 01:53:41 +03:00
|
|
|
\
|
|
|
|
if( true == recvreq->req_recv.req_base.req_free_called ) { \
|
|
|
|
MCA_PML_DR_RECV_REQUEST_RETURN( recvreq ); \
|
|
|
|
} \
|
|
|
|
} while(0)
|
|
|
|
|
|
|
|
|
2005-11-22 20:24:47 +03:00
|
|
|
/**
|
|
|
|
* Return a recv request to the modules free list.
|
|
|
|
*
|
|
|
|
* @param request (IN) Receive request.
|
|
|
|
*/
|
|
|
|
#define MCA_PML_DR_RECV_REQUEST_RETURN(recvreq) \
|
|
|
|
do { \
|
2005-12-21 00:42:58 +03:00
|
|
|
/* decrement reference counts */ \
|
2005-11-22 20:24:47 +03:00
|
|
|
MCA_PML_BASE_RECV_REQUEST_FINI(&(recvreq)->req_recv); \
|
2007-07-11 02:16:38 +04:00
|
|
|
OMPI_FREE_LIST_RETURN(&mca_pml_base_recv_requests, \
|
|
|
|
(ompi_free_list_item_t*)(recvreq)); \
|
2005-11-22 20:24:47 +03:00
|
|
|
} while(0)
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Attempt to match the request against the unexpected fragment list
|
|
|
|
* for all source ranks w/in the communicator.
|
|
|
|
*
|
|
|
|
* @param request (IN) Request to match.
|
|
|
|
*/
|
|
|
|
void mca_pml_dr_recv_request_match_wild(mca_pml_dr_recv_request_t* request);
|
2006-03-16 01:53:41 +03:00
|
|
|
|
2005-11-22 20:24:47 +03:00
|
|
|
/**
|
|
|
|
* Attempt to match the request against the unexpected fragment list
|
|
|
|
* for a specific source rank.
|
|
|
|
*
|
|
|
|
* @param request (IN) Request to match.
|
|
|
|
*/
|
|
|
|
void mca_pml_dr_recv_request_match_specific(mca_pml_dr_recv_request_t* request);
|
|
|
|
|
2006-03-22 18:02:36 +03:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Ack a matched request.
|
|
|
|
*/
|
|
|
|
void mca_pml_dr_recv_request_ack(
|
2006-11-07 00:27:17 +03:00
|
|
|
mca_btl_base_module_t* blt,
|
|
|
|
mca_pml_dr_recv_request_t* recvreq,
|
|
|
|
mca_pml_dr_common_hdr_t* hdr,
|
|
|
|
ompi_ptr_t src_ptr,
|
|
|
|
size_t vlen,
|
|
|
|
uint64_t vmask);
|
2006-03-22 18:02:36 +03:00
|
|
|
|
2005-11-22 20:24:47 +03:00
|
|
|
/**
|
|
|
|
* Start an initialized request.
|
|
|
|
*
|
|
|
|
* @param request Receive request.
|
|
|
|
* @return OMPI_SUCESS or error status on failure.
|
|
|
|
*/
|
2005-12-21 00:42:58 +03:00
|
|
|
#define MCA_PML_DR_RECV_REQUEST_START(request) \
|
2005-11-22 20:24:47 +03:00
|
|
|
do { \
|
|
|
|
/* init/re-init the request */ \
|
|
|
|
(request)->req_bytes_received = 0; \
|
|
|
|
(request)->req_bytes_delivered = 0; \
|
2006-03-22 18:02:36 +03:00
|
|
|
(request)->req_acked = false; \
|
2005-11-22 20:24:47 +03:00
|
|
|
(request)->req_recv.req_base.req_pml_complete = false; \
|
|
|
|
(request)->req_recv.req_base.req_ompi.req_complete = false; \
|
|
|
|
(request)->req_recv.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \
|
2005-12-21 00:42:58 +03:00
|
|
|
(request)->req_vfrag = &(request)->req_vfrag0; \
|
2006-03-30 02:21:35 +04:00
|
|
|
(request)->req_proc = NULL; \
|
|
|
|
(request)->req_endpoint = NULL; \
|
2005-11-22 20:24:47 +03:00
|
|
|
\
|
|
|
|
/* always set the req_status.MPI_TAG to ANY_TAG before starting the \
|
|
|
|
* request. This field is used if cancelled to find out if the request \
|
|
|
|
* has been matched or not. \
|
|
|
|
*/ \
|
|
|
|
(request)->req_recv.req_base.req_ompi.req_status.MPI_TAG = OMPI_ANY_TAG; \
|
|
|
|
(request)->req_recv.req_base.req_ompi.req_status.MPI_ERROR = OMPI_SUCCESS; \
|
|
|
|
(request)->req_recv.req_base.req_ompi.req_status._cancelled = 0; \
|
|
|
|
\
|
2006-03-16 22:38:21 +03:00
|
|
|
/* attempt to match unexpected recv */ \
|
2005-11-22 20:24:47 +03:00
|
|
|
if((request)->req_recv.req_base.req_peer == OMPI_ANY_SOURCE) { \
|
2005-12-21 00:42:58 +03:00
|
|
|
mca_pml_dr_recv_request_match_wild(request); \
|
2005-11-22 20:24:47 +03:00
|
|
|
} else { \
|
2005-12-21 00:42:58 +03:00
|
|
|
mca_pml_dr_recv_request_match_specific(request); \
|
2005-11-22 20:24:47 +03:00
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
2006-03-22 18:02:36 +03:00
|
|
|
* Initialize request when match is made
|
2005-11-22 20:24:47 +03:00
|
|
|
*/
|
|
|
|
|
2006-03-22 18:02:36 +03:00
|
|
|
#define MCA_PML_DR_RECV_REQUEST_MATCHED(request,comm,proc,hdr) \
|
2005-11-22 20:24:47 +03:00
|
|
|
do { \
|
2006-03-22 18:02:36 +03:00
|
|
|
(request)->req_mutex = &comm->matching_lock; \
|
|
|
|
(request)->req_proc = proc; \
|
2006-03-29 20:19:17 +04:00
|
|
|
(request)->req_endpoint = (mca_pml_dr_endpoint_t*)proc->ompi_proc->proc_pml; \
|
2005-11-22 20:24:47 +03:00
|
|
|
(request)->req_recv.req_base.req_ompi.req_status.MPI_TAG = (hdr)->hdr_tag; \
|
2006-03-29 20:19:17 +04:00
|
|
|
(request)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = proc->comm_rank; \
|
2006-03-24 02:04:20 +03:00
|
|
|
(request)->req_vfrag0.vf_id = (hdr)->hdr_common.hdr_vid; \
|
2006-03-22 18:02:36 +03:00
|
|
|
opal_list_append(&proc->matched_receives, (opal_list_item_t*)request); \
|
2006-03-29 20:19:17 +04:00
|
|
|
ompi_seq_tracker_insert(&request->req_endpoint->seq_recvs_matched, \
|
|
|
|
(hdr)->hdr_common.hdr_vid); \
|
2006-03-22 18:02:36 +03:00
|
|
|
} while(0)
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Setup convertor if message length is non-zero
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define MCA_PML_DR_RECV_REQUEST_BYTES_PACKED(request, bytes_packed) \
|
|
|
|
do { \
|
2006-06-28 00:23:47 +04:00
|
|
|
bool do_csum = mca_pml_dr.enable_csum && \
|
2006-07-04 05:20:20 +04:00
|
|
|
(request->req_endpoint->bml_endpoint->btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM); \
|
2006-03-22 18:02:36 +03:00
|
|
|
(request)->req_recv.req_bytes_packed = bytes_packed; \
|
2005-11-22 20:24:47 +03:00
|
|
|
if((request)->req_recv.req_bytes_packed != 0) { \
|
2006-03-22 18:02:36 +03:00
|
|
|
ompi_proc_t *proc = (request)->req_proc->ompi_proc; \
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
opal_convertor_copy_and_prepare_for_recv( proc->proc_convertor, \
|
|
|
|
&((request)->req_recv.req_base.req_datatype->super), \
|
|
|
|
(request)->req_recv.req_base.req_count, \
|
|
|
|
(request)->req_recv.req_base.req_addr, \
|
|
|
|
(do_csum ? CONVERTOR_WITH_CHECKSUM: 0), \
|
|
|
|
&(request)->req_recv.req_base.req_convertor ); \
|
2005-11-22 20:24:47 +03:00
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2005-12-21 00:42:58 +03:00
|
|
|
#define MCA_PML_DR_RECV_REQUEST_UNPACK( \
|
2005-11-22 20:24:47 +03:00
|
|
|
request, \
|
|
|
|
segments, \
|
|
|
|
num_segments, \
|
|
|
|
seg_offset, \
|
|
|
|
data_offset, \
|
|
|
|
bytes_received, \
|
2005-12-21 00:42:58 +03:00
|
|
|
bytes_delivered, \
|
|
|
|
csum) \
|
2005-11-22 20:24:47 +03:00
|
|
|
do { \
|
|
|
|
if(request->req_recv.req_bytes_packed > 0) { \
|
|
|
|
struct iovec iov[MCA_BTL_DES_MAX_SEGMENTS]; \
|
|
|
|
uint32_t iov_count = 0; \
|
|
|
|
size_t max_data = bytes_received; \
|
|
|
|
size_t n, offset = seg_offset; \
|
2006-06-28 00:23:47 +04:00
|
|
|
bool do_csum = mca_pml_dr.enable_csum && \
|
2006-07-04 05:20:20 +04:00
|
|
|
(request->req_endpoint->bml_endpoint->btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM); \
|
2005-11-22 20:24:47 +03:00
|
|
|
\
|
|
|
|
for(n=0; n<num_segments; n++) { \
|
|
|
|
mca_btl_base_segment_t* segment = segments+n; \
|
|
|
|
if(offset >= segment->seg_len) { \
|
|
|
|
offset -= segment->seg_len; \
|
|
|
|
} else { \
|
2006-06-16 07:06:30 +04:00
|
|
|
iov[iov_count].iov_len = segment->seg_len - offset; \
|
2007-01-05 01:07:37 +03:00
|
|
|
iov[iov_count].iov_base = (IOVBASE_TYPE*)((unsigned char*)segment->seg_addr.pval + offset); \
|
2006-06-16 07:06:30 +04:00
|
|
|
offset = 0; \
|
2005-11-22 20:24:47 +03:00
|
|
|
iov_count++; \
|
|
|
|
} \
|
|
|
|
} \
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
opal_convertor_set_position( &(request->req_recv.req_base.req_convertor), \
|
2007-07-11 02:16:38 +04:00
|
|
|
&data_offset); \
|
|
|
|
assert((request->req_recv.req_base.req_convertor.flags & CONVERTOR_COMPLETED) == 0); \
|
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
|
|
|
opal_convertor_unpack( &(request)->req_recv.req_base.req_convertor, \
|
2007-07-11 02:16:38 +04:00
|
|
|
iov, \
|
|
|
|
&iov_count, \
|
|
|
|
&max_data); \
|
2005-11-22 20:24:47 +03:00
|
|
|
bytes_delivered = max_data; \
|
2006-03-25 01:28:43 +03:00
|
|
|
if(bytes_received && !bytes_delivered) assert(0); \
|
2006-06-28 00:23:47 +04:00
|
|
|
csum = (do_csum ? \
|
2007-07-11 02:16:38 +04:00
|
|
|
request->req_recv.req_base.req_convertor.checksum : OPAL_CSUM_ZERO); \
|
2005-11-22 20:24:47 +03:00
|
|
|
} else { \
|
|
|
|
bytes_delivered = 0; \
|
2006-03-17 21:46:48 +03:00
|
|
|
csum = OPAL_CSUM_ZERO; \
|
2005-11-22 20:24:47 +03:00
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
void mca_pml_dr_recv_request_progress(
|
|
|
|
mca_pml_dr_recv_request_t* req,
|
|
|
|
struct mca_btl_base_module_t* btl,
|
|
|
|
mca_btl_base_segment_t* segments,
|
|
|
|
size_t num_segments);
|
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
void mca_pml_dr_recv_request_matched_probe(
|
|
|
|
mca_pml_dr_recv_request_t* req,
|
|
|
|
struct mca_btl_base_module_t* btl,
|
|
|
|
mca_btl_base_segment_t* segments,
|
|
|
|
size_t num_segments);
|
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
void mca_pml_dr_recv_request_schedule(
|
|
|
|
mca_pml_dr_recv_request_t* req);
|
|
|
|
|
2006-03-22 18:02:36 +03:00
|
|
|
/**
|
|
|
|
* Look for matched receive.
|
|
|
|
* Must be called w/ matching lock held.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static inline struct mca_pml_dr_recv_request_t* mca_pml_dr_comm_proc_check_matched(
|
|
|
|
mca_pml_dr_comm_proc_t* dr_proc,
|
|
|
|
uint32_t vfrag_id)
|
|
|
|
{
|
|
|
|
opal_list_item_t* item;
|
|
|
|
for(item = opal_list_get_first(&dr_proc->matched_receives);
|
|
|
|
item != opal_list_get_end(&dr_proc->matched_receives);
|
|
|
|
item = opal_list_get_next(item)) {
|
|
|
|
struct mca_pml_dr_recv_request_t* recvreq = (struct mca_pml_dr_recv_request_t*)item;
|
2006-03-24 02:04:20 +03:00
|
|
|
if(recvreq->req_vfrag0.vf_id == vfrag_id)
|
2006-03-22 18:02:36 +03:00
|
|
|
return recvreq;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2005-12-21 00:42:58 +03:00
|
|
|
/*
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define MCA_PML_DR_RECV_REQUEST_VFRAG_LOOKUP(recvreq,hdr,vfrag) \
|
|
|
|
do { \
|
2006-03-16 22:38:21 +03:00
|
|
|
if((recvreq)->req_vfrag->vf_id == (hdr)->hdr_common.hdr_vid) { \
|
2005-12-21 00:42:58 +03:00
|
|
|
vfrag = (recvreq)->req_vfrag; \
|
|
|
|
} else { \
|
|
|
|
opal_list_item_t* item; \
|
|
|
|
int rc; \
|
|
|
|
\
|
|
|
|
vfrag = NULL; \
|
2006-06-01 22:58:38 +04:00
|
|
|
OPAL_THREAD_LOCK(recvreq->req_mutex); \
|
2005-12-21 00:42:58 +03:00
|
|
|
for(item = opal_list_get_first(&(recvreq)->req_vfrags); \
|
|
|
|
item != opal_list_get_end(&(recvreq)->req_vfrags); \
|
|
|
|
item = opal_list_get_next(item)) { \
|
|
|
|
mca_pml_dr_vfrag_t* vf = (mca_pml_dr_vfrag_t*)item; \
|
2006-03-16 22:38:21 +03:00
|
|
|
if(vf->vf_id == (hdr)->hdr_common.hdr_vid) { \
|
2005-12-21 00:42:58 +03:00
|
|
|
vfrag = vf; \
|
|
|
|
break; \
|
|
|
|
} \
|
|
|
|
} \
|
|
|
|
if(NULL == vfrag) { \
|
|
|
|
MCA_PML_DR_VFRAG_ALLOC(vfrag,rc); \
|
|
|
|
if(NULL != vfrag) { \
|
2006-03-24 01:08:59 +03:00
|
|
|
MCA_PML_DR_VFRAG_INIT(vfrag); \
|
2006-03-16 22:38:21 +03:00
|
|
|
(vfrag)->vf_id = (hdr)->hdr_common.hdr_vid; \
|
2005-12-21 00:42:58 +03:00
|
|
|
(vfrag)->vf_len = (hdr)->hdr_vlen; \
|
|
|
|
if((hdr)->hdr_vlen == 64) { \
|
|
|
|
(vfrag)->vf_mask = ~(uint64_t)0; \
|
|
|
|
} else { \
|
|
|
|
(vfrag)->vf_mask = (((uint64_t)1 << (hdr)->hdr_vlen)-1); \
|
|
|
|
} \
|
|
|
|
opal_list_append(&(recvreq)->req_vfrags, (opal_list_item_t*)vfrag); \
|
2006-06-27 02:29:29 +04:00
|
|
|
(recvreq)->req_vfrag = vfrag; \
|
2005-12-21 00:42:58 +03:00
|
|
|
} \
|
|
|
|
} \
|
2006-06-01 22:58:38 +04:00
|
|
|
OPAL_THREAD_UNLOCK(recvreq->req_mutex); \
|
2005-12-21 00:42:58 +03:00
|
|
|
} \
|
|
|
|
} while(0)
|
2005-11-22 20:24:47 +03:00
|
|
|
|
2009-08-20 15:42:18 +04:00
|
|
|
END_C_DECLS
|
2005-11-22 20:24:47 +03:00
|
|
|
#endif
|
|
|
|
|