pml/ob1: update for BTL 3.0 interface
Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
родитель
44fb8369ff
Коммит
c4a0e02261
@ -14,7 +14,7 @@
|
||||
* Copyright (c) 2006-2008 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved
|
||||
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -500,17 +500,17 @@ static void mca_pml_ob1_dump_hdr(mca_pml_ob1_hdr_t* hdr)
|
||||
case MCA_PML_OB1_HDR_TYPE_RGET:
|
||||
type = "RGET";
|
||||
snprintf( header, 128, "ctx %5d src %d tag %d seq %d msg_length %" PRIu64
|
||||
"seg_cnt %d hdr_des %" PRIu64,
|
||||
"frag %" PRIu64 " src_ptr %" PRIu64,
|
||||
hdr->hdr_rndv.hdr_match.hdr_ctx, hdr->hdr_rndv.hdr_match.hdr_src,
|
||||
hdr->hdr_rndv.hdr_match.hdr_tag, hdr->hdr_rndv.hdr_match.hdr_seq,
|
||||
hdr->hdr_rndv.hdr_msg_length,
|
||||
hdr->hdr_rget.hdr_seg_cnt, hdr->hdr_rget.hdr_des.lval);
|
||||
hdr->hdr_rndv.hdr_msg_length, hdr->hdr_rget.hdr_frag.lval,
|
||||
hdr->hdr_rget.hdr_src_ptr);
|
||||
break;
|
||||
case MCA_PML_OB1_HDR_TYPE_ACK:
|
||||
type = "ACK";
|
||||
snprintf( header, 128, "src_req %p dst_req %p offset %" PRIu64,
|
||||
snprintf( header, 128, "src_req %p dst_req %p offset %" PRIu64 " size %" PRIu64,
|
||||
hdr->hdr_ack.hdr_src_req.pval, hdr->hdr_ack.hdr_dst_req.pval,
|
||||
hdr->hdr_ack.hdr_send_offset);
|
||||
hdr->hdr_ack.hdr_send_offset, hdr->hdr_ack.hdr_send_size);
|
||||
break;
|
||||
case MCA_PML_OB1_HDR_TYPE_FRAG:
|
||||
type = "FRAG";
|
||||
@ -520,10 +520,11 @@ static void mca_pml_ob1_dump_hdr(mca_pml_ob1_hdr_t* hdr)
|
||||
break;
|
||||
case MCA_PML_OB1_HDR_TYPE_PUT:
|
||||
type = "PUT";
|
||||
snprintf( header, 128, "seg_cnt %d dst_req %p src_des %p recv_req %p offset %" PRIu64 " [%p %" PRIu64 "]",
|
||||
hdr->hdr_rdma.hdr_seg_cnt, hdr->hdr_rdma.hdr_req.pval, hdr->hdr_rdma.hdr_des.pval,
|
||||
snprintf( header, 128, "dst_req %p src_frag %p recv_req %p offset %" PRIu64
|
||||
" dst_ptr %" PRIu64 " dst_size %" PRIu64,
|
||||
hdr->hdr_rdma.hdr_req.pval, hdr->hdr_rdma.hdr_frag.pval,
|
||||
hdr->hdr_rdma.hdr_recv_req.pval, hdr->hdr_rdma.hdr_rdma_offset,
|
||||
hdr->hdr_rdma.hdr_segs[0].seg_addr.pval, hdr->hdr_rdma.hdr_segs[0].seg_len);
|
||||
hdr->hdr_rdma.hdr_dst_ptr, hdr->hdr_rdma.hdr_dst_size);
|
||||
break;
|
||||
case MCA_PML_OB1_HDR_TYPE_FIN:
|
||||
type = "FIN";
|
||||
@ -638,37 +639,32 @@ static void mca_pml_ob1_fin_completion( mca_btl_base_module_t* btl,
|
||||
*/
|
||||
int mca_pml_ob1_send_fin( ompi_proc_t* proc,
|
||||
mca_bml_base_btl_t* bml_btl,
|
||||
opal_ptr_t hdr_des,
|
||||
opal_ptr_t hdr_frag,
|
||||
uint64_t rdma_size,
|
||||
uint8_t order,
|
||||
uint32_t status )
|
||||
int status )
|
||||
{
|
||||
mca_btl_base_descriptor_t* fin;
|
||||
mca_pml_ob1_fin_hdr_t* hdr;
|
||||
int rc;
|
||||
|
||||
mca_bml_base_alloc(bml_btl, &fin, order, sizeof(mca_pml_ob1_fin_hdr_t),
|
||||
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_FLAGS_SIGNAL);
|
||||
|
||||
if(NULL == fin) {
|
||||
MCA_PML_OB1_ADD_FIN_TO_PENDING(proc, hdr_des, bml_btl, order, status);
|
||||
MCA_PML_OB1_ADD_FIN_TO_PENDING(proc, hdr_frag, rdma_size, bml_btl, order, status);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
fin->des_cbfunc = mca_pml_ob1_fin_completion;
|
||||
fin->des_cbdata = NULL;
|
||||
|
||||
/* fill in header */
|
||||
hdr = (mca_pml_ob1_fin_hdr_t*)fin->des_local->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_flags = 0;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_FIN;
|
||||
hdr->hdr_des = hdr_des;
|
||||
hdr->hdr_fail = status;
|
||||
mca_pml_ob1_fin_hdr_prepare ((mca_pml_ob1_fin_hdr_t *) fin->des_segments->seg_addr.pval,
|
||||
0, hdr_frag.lval, status ? status : (int64_t) rdma_size);
|
||||
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_FIN, proc);
|
||||
|
||||
/* queue request */
|
||||
rc = mca_bml_base_send( bml_btl,
|
||||
fin,
|
||||
MCA_PML_OB1_HDR_TYPE_FIN );
|
||||
rc = mca_bml_base_send( bml_btl, fin, MCA_PML_OB1_HDR_TYPE_FIN );
|
||||
if( OPAL_LIKELY( rc >= 0 ) ) {
|
||||
if( OPAL_LIKELY( 1 == rc ) ) {
|
||||
MCA_PML_OB1_PROGRESS_PENDING(bml_btl);
|
||||
@ -676,7 +672,7 @@ int mca_pml_ob1_send_fin( ompi_proc_t* proc,
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
mca_bml_base_free(bml_btl, fin);
|
||||
MCA_PML_OB1_ADD_FIN_TO_PENDING(proc, hdr_des, bml_btl, order, status);
|
||||
MCA_PML_OB1_ADD_FIN_TO_PENDING(proc, hdr_frag, rdma_size, bml_btl, order, status);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -717,6 +713,7 @@ void mca_pml_ob1_process_pending_packets(mca_bml_base_btl_t* bml_btl)
|
||||
pckt->hdr.hdr_ack.hdr_src_req.lval,
|
||||
pckt->hdr.hdr_ack.hdr_dst_req.pval,
|
||||
pckt->hdr.hdr_ack.hdr_send_offset,
|
||||
pckt->hdr.hdr_ack.hdr_send_size,
|
||||
pckt->hdr.hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NORDMA);
|
||||
if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) {
|
||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||
@ -728,9 +725,10 @@ void mca_pml_ob1_process_pending_packets(mca_bml_base_btl_t* bml_btl)
|
||||
break;
|
||||
case MCA_PML_OB1_HDR_TYPE_FIN:
|
||||
rc = mca_pml_ob1_send_fin(pckt->proc, send_dst,
|
||||
pckt->hdr.hdr_fin.hdr_des,
|
||||
pckt->hdr.hdr_fin.hdr_frag,
|
||||
pckt->hdr.hdr_fin.hdr_size,
|
||||
pckt->order,
|
||||
pckt->hdr.hdr_fin.hdr_fail);
|
||||
pckt->status);
|
||||
if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) {
|
||||
return;
|
||||
}
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved
|
||||
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -216,6 +216,7 @@ struct mca_pml_ob1_pckt_pending_t {
|
||||
mca_pml_ob1_hdr_t hdr;
|
||||
struct mca_bml_base_btl_t *bml_btl;
|
||||
uint8_t order;
|
||||
int status;
|
||||
};
|
||||
typedef struct mca_pml_ob1_pckt_pending_t mca_pml_ob1_pckt_pending_t;
|
||||
OBJ_CLASS_DECLARATION(mca_pml_ob1_pckt_pending_t);
|
||||
@ -234,17 +235,17 @@ do { \
|
||||
(ompi_free_list_item_t*)pckt); \
|
||||
} while(0)
|
||||
|
||||
#define MCA_PML_OB1_ADD_FIN_TO_PENDING(P, D, B, O, S) \
|
||||
#define MCA_PML_OB1_ADD_FIN_TO_PENDING(P, D, Sz, B, O, S) \
|
||||
do { \
|
||||
mca_pml_ob1_pckt_pending_t *_pckt; \
|
||||
\
|
||||
MCA_PML_OB1_PCKT_PENDING_ALLOC(_pckt); \
|
||||
_pckt->hdr.hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_FIN; \
|
||||
_pckt->hdr.hdr_fin.hdr_des = (D); \
|
||||
_pckt->hdr.hdr_fin.hdr_fail = (S); \
|
||||
mca_pml_ob1_fin_hdr_prepare (&_pckt->hdr.hdr_fin, 0, \
|
||||
(D).lval, (Sz)); \
|
||||
_pckt->proc = (P); \
|
||||
_pckt->bml_btl = (B); \
|
||||
_pckt->order = (O); \
|
||||
_pckt->status = (S); \
|
||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock); \
|
||||
opal_list_append(&mca_pml_ob1.pckt_pending, \
|
||||
(opal_list_item_t*)_pckt); \
|
||||
@ -253,7 +254,7 @@ do { \
|
||||
|
||||
|
||||
int mca_pml_ob1_send_fin(ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl,
|
||||
opal_ptr_t hdr_des, uint8_t order, uint32_t status);
|
||||
opal_ptr_t hdr_frag, uint64_t size, uint8_t order, int status);
|
||||
|
||||
/* This function tries to resend FIN/ACK packets from pckt_pending queue.
|
||||
* Packets are added to the queue when sending of FIN or ACK is failed due to
|
||||
@ -283,20 +284,6 @@ void mca_pml_ob1_process_pending_rdma(void);
|
||||
/*
|
||||
* Compute the total number of bytes on supplied descriptor
|
||||
*/
|
||||
static inline size_t
|
||||
mca_pml_ob1_compute_segment_length(size_t seg_size, void *segments,
|
||||
size_t count, size_t hdrlen)
|
||||
{
|
||||
size_t i, length = 0;
|
||||
mca_btl_base_segment_t *segment = (mca_btl_base_segment_t*)segments;
|
||||
|
||||
for (i = 0; i < count ; ++i) {
|
||||
length += segment->seg_len;
|
||||
segment = (mca_btl_base_segment_t *)((char *)segment + seg_size);
|
||||
}
|
||||
return (length - hdrlen);
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
mca_pml_ob1_compute_segment_length_base(mca_btl_base_segment_t *segments,
|
||||
size_t count, size_t hdrlen)
|
||||
@ -338,7 +325,7 @@ mca_pml_ob1_compute_segment_length_remote (size_t seg_size, void *segments,
|
||||
/* represent BTL chosen for sending request */
|
||||
struct mca_pml_ob1_com_btl_t {
|
||||
mca_bml_base_btl_t *bml_btl;
|
||||
struct mca_mpool_base_registration_t* btl_reg;
|
||||
struct mca_btl_base_registration_handle_t *btl_reg;
|
||||
size_t length;
|
||||
};
|
||||
typedef struct mca_pml_ob1_com_btl_t mca_pml_ob1_com_btl_t;
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -12,6 +13,8 @@
|
||||
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012-2013 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -123,19 +126,20 @@ size_t mca_pml_ob1_rdma_cuda_btls(
|
||||
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n);
|
||||
|
||||
if (bml_btl->btl_flags & MCA_BTL_FLAGS_CUDA_GET) {
|
||||
mca_mpool_base_registration_t* reg = NULL;
|
||||
mca_mpool_base_module_t *btl_mpool = bml_btl->btl->btl_mpool;
|
||||
mca_btl_base_registration_handle_t *handle = NULL;
|
||||
|
||||
if( NULL != btl_mpool ) {
|
||||
if( NULL != bml_btl->btl->btl_register_mem ) {
|
||||
/* register the memory */
|
||||
btl_mpool->mpool_register(btl_mpool, base, size, MCA_MPOOL_FLAGS_CUDA_GPU_MEM, ®);
|
||||
handle = bml_btl->btl->btl_register_mem (bml_btl->btl, bml_btl->btl_endpoint,
|
||||
base, size, MCA_BTL_REG_FLAG_CUDA_GPU_MEM |
|
||||
MCA_BTL_REG_FLAG_REMOTE_READ);
|
||||
}
|
||||
|
||||
if(NULL == reg)
|
||||
if(NULL == handle)
|
||||
continue;
|
||||
|
||||
rdma_btls[num_btls_used].bml_btl = bml_btl;
|
||||
rdma_btls[num_btls_used].btl_reg = reg;
|
||||
rdma_btls[num_btls_used].btl_reg = handle;
|
||||
weight_total += bml_btl->btl_weight;
|
||||
num_btls_used++;
|
||||
}
|
||||
|
@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -64,6 +64,13 @@ struct mca_pml_ob1_common_hdr_t {
|
||||
};
|
||||
typedef struct mca_pml_ob1_common_hdr_t mca_pml_ob1_common_hdr_t;
|
||||
|
||||
static inline void mca_pml_ob1_common_hdr_prepare (mca_pml_ob1_common_hdr_t *hdr, uint8_t hdr_type,
|
||||
uint8_t hdr_flags)
|
||||
{
|
||||
hdr->hdr_type = hdr_type;
|
||||
hdr->hdr_flags = hdr_flags;
|
||||
}
|
||||
|
||||
#define MCA_PML_OB1_COMMON_HDR_NTOH(h)
|
||||
#define MCA_PML_OB1_COMMON_HDR_HTON(h)
|
||||
|
||||
@ -89,15 +96,19 @@ struct mca_pml_ob1_match_hdr_t {
|
||||
|
||||
typedef struct mca_pml_ob1_match_hdr_t mca_pml_ob1_match_hdr_t;
|
||||
|
||||
static inline void mca_pml_ob1_match_hdr_prepare (mca_pml_ob1_match_hdr_t *hdr, uint8_t hdr_type, uint8_t hdr_flags,
|
||||
uint16_t hdr_ctx, int32_t hdr_src, int32_t hdr_tag, uint16_t hdr_seq)
|
||||
{
|
||||
mca_pml_ob1_common_hdr_prepare (&hdr->hdr_common, hdr_type, hdr_flags);
|
||||
hdr->hdr_ctx = hdr_ctx;
|
||||
hdr->hdr_src = hdr_src;
|
||||
hdr->hdr_tag = hdr_tag;
|
||||
hdr->hdr_seq = hdr_seq;
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
|
||||
#define MCA_PML_OB1_MATCH_HDR_FILL(h) \
|
||||
do { \
|
||||
(h).hdr_padding[0] = 0; \
|
||||
(h).hdr_padding[1] = 0; \
|
||||
} while(0)
|
||||
#else
|
||||
#define MCA_PML_OB1_MATCH_HDR_FILL(h)
|
||||
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
|
||||
hdr->hdr_padding[0] = 0;
|
||||
hdr->hdr_padding[1] = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define MCA_PML_OB1_MATCH_HDR_NTOH(h) \
|
||||
do { \
|
||||
@ -111,7 +122,6 @@ do { \
|
||||
#define MCA_PML_OB1_MATCH_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \
|
||||
MCA_PML_OB1_MATCH_HDR_FILL(h); \
|
||||
(h).hdr_ctx = htons((h).hdr_ctx); \
|
||||
(h).hdr_src = htonl((h).hdr_src); \
|
||||
(h).hdr_tag = htonl((h).hdr_tag); \
|
||||
@ -130,12 +140,14 @@ struct mca_pml_ob1_rendezvous_hdr_t {
|
||||
};
|
||||
typedef struct mca_pml_ob1_rendezvous_hdr_t mca_pml_ob1_rendezvous_hdr_t;
|
||||
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
|
||||
#define MCA_PML_OB1_RNDV_HDR_FILL(h) \
|
||||
MCA_PML_OB1_MATCH_HDR_FILL((h).hdr_match)
|
||||
#else
|
||||
#define MCA_PML_OB1_RNDV_HDR_FILL(h)
|
||||
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
|
||||
static inline void mca_pml_ob1_rendezvous_hdr_prepare (mca_pml_ob1_rendezvous_hdr_t *hdr, uint8_t hdr_type, uint8_t hdr_flags,
|
||||
uint16_t hdr_ctx, int32_t hdr_src, int32_t hdr_tag, uint16_t hdr_seq,
|
||||
uint64_t hdr_msg_length, void *hdr_src_req)
|
||||
{
|
||||
mca_pml_ob1_match_hdr_prepare (&hdr->hdr_match, hdr_type, hdr_flags, hdr_ctx, hdr_src, hdr_tag, hdr_seq);
|
||||
hdr->hdr_msg_length = hdr_msg_length;
|
||||
hdr->hdr_src_req.pval = hdr_src_req;
|
||||
}
|
||||
|
||||
/* Note that hdr_src_req is not put in network byte order because it
|
||||
is never processed by the receiver, other than being copied into
|
||||
@ -149,7 +161,6 @@ typedef struct mca_pml_ob1_rendezvous_hdr_t mca_pml_ob1_rendezvous_hdr_t;
|
||||
#define MCA_PML_OB1_RNDV_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_MATCH_HDR_HTON((h).hdr_match); \
|
||||
MCA_PML_OB1_RNDV_HDR_FILL(h); \
|
||||
(h).hdr_msg_length = hton64((h).hdr_msg_length); \
|
||||
} while (0)
|
||||
|
||||
@ -158,38 +169,47 @@ typedef struct mca_pml_ob1_rendezvous_hdr_t mca_pml_ob1_rendezvous_hdr_t;
|
||||
*/
|
||||
struct mca_pml_ob1_rget_hdr_t {
|
||||
mca_pml_ob1_rendezvous_hdr_t hdr_rndv;
|
||||
uint32_t hdr_seg_cnt; /**< number of segments for rdma */
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
uint8_t hdr_padding[4];
|
||||
#endif
|
||||
opal_ptr_t hdr_des; /**< source descriptor */
|
||||
opal_ptr_t hdr_frag; /**< source fragment (for fin) */
|
||||
uint64_t hdr_src_ptr; /**< source pointer */
|
||||
/* btl registration handle data follows */
|
||||
};
|
||||
typedef struct mca_pml_ob1_rget_hdr_t mca_pml_ob1_rget_hdr_t;
|
||||
|
||||
static inline void mca_pml_ob1_rget_hdr_prepare (mca_pml_ob1_rget_hdr_t *hdr, uint8_t hdr_flags,
|
||||
uint16_t hdr_ctx, int32_t hdr_src, int32_t hdr_tag, uint16_t hdr_seq,
|
||||
uint64_t hdr_msg_length, void *hdr_src_req, void *hdr_frag,
|
||||
void *hdr_src_ptr, void *local_handle, size_t local_handle_size)
|
||||
{
|
||||
mca_pml_ob1_rendezvous_hdr_prepare (&hdr->hdr_rndv, MCA_PML_OB1_HDR_TYPE_RGET, hdr_flags,
|
||||
hdr_ctx, hdr_src, hdr_tag, hdr_seq, hdr_msg_length, hdr_src_req);
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
|
||||
#define MCA_PML_OB1_RGET_HDR_FILL(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_RNDV_HDR_FILL((h).hdr_rndv); \
|
||||
(h).hdr_padding[0] = 0; \
|
||||
(h).hdr_padding[1] = 0; \
|
||||
(h).hdr_padding[2] = 0; \
|
||||
(h).hdr_padding[3] = 0; \
|
||||
} while(0)
|
||||
#else
|
||||
#define MCA_PML_OB1_RGET_HDR_FILL(h)
|
||||
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
|
||||
hdr->hdr_padding[0] = 0;
|
||||
hdr->hdr_padding[1] = 0;
|
||||
hdr->hdr_padding[2] = 0;
|
||||
hdr->hdr_padding[3] = 0;
|
||||
#endif
|
||||
hdr->hdr_frag.pval = hdr_frag;
|
||||
hdr->hdr_src_ptr = (uint64_t)(intptr_t) hdr_src_ptr;
|
||||
|
||||
#define MCA_PML_OB1_RGET_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_RNDV_HDR_NTOH((h).hdr_rndv); \
|
||||
(h).hdr_seg_cnt = ntohl((h).hdr_seg_cnt); \
|
||||
/* copy registration handle */
|
||||
memcpy (hdr + 1, local_handle, local_handle_size);
|
||||
}
|
||||
|
||||
#define MCA_PML_OB1_RGET_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_RNDV_HDR_NTOH((h).hdr_rndv); \
|
||||
(h).hdr_seg_cnt = ntohl((h).hdr_seg_cnt); \
|
||||
(h).hdr_src_ptr = ntoh64((h).hdr_src_ptr); \
|
||||
} while (0)
|
||||
|
||||
#define MCA_PML_OB1_RGET_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_RNDV_HDR_HTON((h).hdr_rndv); \
|
||||
MCA_PML_OB1_RGET_HDR_FILL(h); \
|
||||
(h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \
|
||||
#define MCA_PML_OB1_RGET_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_RNDV_HDR_HTON((h).hdr_rndv); \
|
||||
(h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \
|
||||
(h).hdr_src_ptr = hton64((h).hdr_src_ptr); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
@ -206,19 +226,23 @@ struct mca_pml_ob1_frag_hdr_t {
|
||||
};
|
||||
typedef struct mca_pml_ob1_frag_hdr_t mca_pml_ob1_frag_hdr_t;
|
||||
|
||||
static inline void mca_pml_ob1_frag_hdr_prepare (mca_pml_ob1_frag_hdr_t *hdr, uint8_t hdr_flags,
|
||||
uint64_t hdr_frag_offset, void *hdr_src_req,
|
||||
uint64_t hdr_dst_req)
|
||||
{
|
||||
mca_pml_ob1_common_hdr_prepare (&hdr->hdr_common, MCA_PML_OB1_HDR_TYPE_FRAG, hdr_flags);
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
|
||||
#define MCA_PML_OB1_FRAG_HDR_FILL(h) \
|
||||
do { \
|
||||
(h).hdr_padding[0] = 0; \
|
||||
(h).hdr_padding[1] = 0; \
|
||||
(h).hdr_padding[2] = 0; \
|
||||
(h).hdr_padding[3] = 0; \
|
||||
(h).hdr_padding[4] = 0; \
|
||||
(h).hdr_padding[5] = 0; \
|
||||
} while(0)
|
||||
#else
|
||||
#define MCA_PML_OB1_FRAG_HDR_FILL(h)
|
||||
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
|
||||
hdr->hdr_padding[0] = 0;
|
||||
hdr->hdr_padding[1] = 0;
|
||||
hdr->hdr_padding[2] = 0;
|
||||
hdr->hdr_padding[3] = 0;
|
||||
hdr->hdr_padding[4] = 0;
|
||||
hdr->hdr_padding[5] = 0;
|
||||
#endif
|
||||
hdr->hdr_frag_offset = hdr_frag_offset;
|
||||
hdr->hdr_src_req.pval = hdr_src_req;
|
||||
hdr->hdr_dst_req.lval = hdr_dst_req;
|
||||
}
|
||||
|
||||
#define MCA_PML_OB1_FRAG_HDR_NTOH(h) \
|
||||
do { \
|
||||
@ -229,7 +253,6 @@ do { \
|
||||
#define MCA_PML_OB1_FRAG_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \
|
||||
MCA_PML_OB1_FRAG_HDR_FILL(h); \
|
||||
(h).hdr_frag_offset = hton64((h).hdr_frag_offset); \
|
||||
} while (0)
|
||||
|
||||
@ -245,38 +268,45 @@ struct mca_pml_ob1_ack_hdr_t {
|
||||
opal_ptr_t hdr_src_req; /**< source request */
|
||||
opal_ptr_t hdr_dst_req; /**< matched receive request */
|
||||
uint64_t hdr_send_offset; /**< starting point of copy in/out */
|
||||
uint64_t hdr_send_size; /**< number of bytes requested (0: all remaining) */
|
||||
};
|
||||
typedef struct mca_pml_ob1_ack_hdr_t mca_pml_ob1_ack_hdr_t;
|
||||
|
||||
static inline void mca_pml_ob1_ack_hdr_prepare (mca_pml_ob1_ack_hdr_t *hdr, uint8_t hdr_flags,
|
||||
uint64_t hdr_src_req, void *hdr_dst_req,
|
||||
uint64_t hdr_send_offset, uint64_t hdr_send_size)
|
||||
{
|
||||
mca_pml_ob1_common_hdr_prepare (&hdr->hdr_common, MCA_PML_OB1_HDR_TYPE_ACK, hdr_flags);
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
|
||||
#define MCA_PML_OB1_ACK_HDR_FILL(h) \
|
||||
do { \
|
||||
(h).hdr_padding[0] = 0; \
|
||||
(h).hdr_padding[1] = 0; \
|
||||
(h).hdr_padding[2] = 0; \
|
||||
(h).hdr_padding[3] = 0; \
|
||||
(h).hdr_padding[4] = 0; \
|
||||
(h).hdr_padding[5] = 0; \
|
||||
} while (0)
|
||||
#else
|
||||
#define MCA_PML_OB1_ACK_HDR_FILL(h)
|
||||
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
|
||||
hdr->hdr_padding[0] = 0;
|
||||
hdr->hdr_padding[1] = 0;
|
||||
hdr->hdr_padding[2] = 0;
|
||||
hdr->hdr_padding[3] = 0;
|
||||
hdr->hdr_padding[4] = 0;
|
||||
hdr->hdr_padding[5] = 0;
|
||||
#endif
|
||||
hdr->hdr_src_req.lval = hdr_src_req;
|
||||
hdr->hdr_dst_req.pval = hdr_dst_req;
|
||||
hdr->hdr_send_offset = hdr_send_offset;
|
||||
hdr->hdr_send_size = hdr_send_size;
|
||||
}
|
||||
|
||||
/* Note that the request headers are not put in NBO because the
|
||||
src_req is already in receiver's byte order and the dst_req is not
|
||||
used by the receiver for anything other than backpointers in return
|
||||
headers */
|
||||
#define MCA_PML_OB1_ACK_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_NTOH((h).hdr_common); \
|
||||
#define MCA_PML_OB1_ACK_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_NTOH((h).hdr_common); \
|
||||
(h).hdr_send_offset = ntoh64((h).hdr_send_offset); \
|
||||
(h).hdr_send_size = ntoh64((h).hdr_send_size); \
|
||||
} while (0)
|
||||
|
||||
#define MCA_PML_OB1_ACK_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \
|
||||
MCA_PML_OB1_ACK_HDR_FILL(h); \
|
||||
#define MCA_PML_OB1_ACK_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \
|
||||
(h).hdr_send_offset = hton64((h).hdr_send_offset); \
|
||||
(h).hdr_send_size = hton64((h).hdr_send_size); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
@ -288,38 +318,55 @@ struct mca_pml_ob1_rdma_hdr_t {
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
uint8_t hdr_padding[2]; /** two to pad out the hdr to a 4 byte alignment. hdr_req will then be 8 byte aligned after 4 for hdr_seg_cnt */
|
||||
#endif
|
||||
uint32_t hdr_seg_cnt; /**< number of segments for rdma */
|
||||
/* TODO: add real support for multiple destination segments */
|
||||
opal_ptr_t hdr_req; /**< destination request */
|
||||
opal_ptr_t hdr_des; /**< source descriptor */
|
||||
opal_ptr_t hdr_frag; /**< receiver fragment */
|
||||
opal_ptr_t hdr_recv_req; /**< receive request (NTH: needed for put fallback on send) */
|
||||
uint64_t hdr_rdma_offset; /**< current offset into user buffer */
|
||||
mca_btl_base_segment_t hdr_segs[1]; /**< list of segments for rdma */
|
||||
uint64_t hdr_rdma_offset; /**< current offset into user buffer */
|
||||
uint64_t hdr_dst_ptr; /**< destination address */
|
||||
uint64_t hdr_dst_size; /**< destination size */
|
||||
/* registration data follows */
|
||||
};
|
||||
typedef struct mca_pml_ob1_rdma_hdr_t mca_pml_ob1_rdma_hdr_t;
|
||||
|
||||
static inline void mca_pml_ob1_rdma_hdr_prepare (mca_pml_ob1_rdma_hdr_t *hdr, uint8_t hdr_flags,
|
||||
uint64_t hdr_req, void *hdr_frag, void *hdr_recv_req,
|
||||
uint64_t hdr_rdma_offset, void *hdr_dst_ptr,
|
||||
uint64_t hdr_dst_size, void *local_handle,
|
||||
size_t local_handle_size)
|
||||
{
|
||||
mca_pml_ob1_common_hdr_prepare (&hdr->hdr_common, MCA_PML_OB1_HDR_TYPE_PUT, hdr_flags);
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
|
||||
#define MCA_PML_OB1_RDMA_HDR_FILL(h) \
|
||||
do { \
|
||||
(h).hdr_padding[0] = 0; \
|
||||
(h).hdr_padding[1] = 0; \
|
||||
} while(0)
|
||||
#else
|
||||
#define MCA_PML_OB1_RDMA_HDR_FILL(h)
|
||||
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
|
||||
hdr->hdr_padding[0] = 0;
|
||||
hdr->hdr_padding[1] = 0;
|
||||
#endif
|
||||
hdr->hdr_req.lval = hdr_req;
|
||||
hdr->hdr_frag.pval = hdr_frag;
|
||||
hdr->hdr_recv_req.pval = hdr_recv_req;
|
||||
hdr->hdr_rdma_offset = hdr_rdma_offset;
|
||||
hdr->hdr_dst_ptr = (uint64_t)(intptr_t) hdr_dst_ptr;
|
||||
hdr->hdr_dst_size = hdr_dst_size;
|
||||
|
||||
#define MCA_PML_OB1_RDMA_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_NTOH((h).hdr_common); \
|
||||
(h).hdr_seg_cnt = ntohl((h).hdr_seg_cnt); \
|
||||
/* copy segments */
|
||||
memcpy (hdr + 1, local_handle, local_handle_size);
|
||||
}
|
||||
|
||||
#define MCA_PML_OB1_RDMA_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_NTOH((h).hdr_common); \
|
||||
(h).hdr_seg_cnt = ntohl((h).hdr_seg_cnt); \
|
||||
(h).hdr_rdma_offset = ntoh64((h).hdr_rdma_offset); \
|
||||
(h).hdr_dst_ptr = ntoh64((h).hdr_dst_ptr); \
|
||||
(h).hdr_dst_size = ntoh64((h).hdr_dst_size); \
|
||||
} while (0)
|
||||
|
||||
#define MCA_PML_OB1_RDMA_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \
|
||||
MCA_PML_OB1_RDMA_HDR_FILL(h); \
|
||||
(h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \
|
||||
#define MCA_PML_OB1_RDMA_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \
|
||||
(h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \
|
||||
(h).hdr_rdma_offset = hton64((h).hdr_rdma_offset); \
|
||||
(h).hdr_dst_ptr = hton64((h).hdr_dst_ptr); \
|
||||
(h).hdr_dst_size = hton64((h).hdr_dst_size); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
@ -331,31 +378,34 @@ struct mca_pml_ob1_fin_hdr_t {
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
uint8_t hdr_padding[2];
|
||||
#endif
|
||||
uint32_t hdr_fail; /**< RDMA operation failed */
|
||||
opal_ptr_t hdr_des; /**< completed descriptor */
|
||||
int64_t hdr_size; /**< number of bytes completed (positive), error code (negative) */
|
||||
opal_ptr_t hdr_frag; /**< completed RDMA fragment */
|
||||
};
|
||||
typedef struct mca_pml_ob1_fin_hdr_t mca_pml_ob1_fin_hdr_t;
|
||||
|
||||
static inline void mca_pml_ob1_fin_hdr_prepare (mca_pml_ob1_fin_hdr_t *hdr, uint8_t hdr_flags,
|
||||
uint64_t hdr_frag, int64_t hdr_size)
|
||||
{
|
||||
mca_pml_ob1_common_hdr_prepare (&hdr->hdr_common, MCA_PML_OB1_HDR_TYPE_FIN, hdr_flags);
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
|
||||
#define MCA_PML_OB1_FIN_HDR_FILL(h) \
|
||||
do { \
|
||||
(h).hdr_padding[0] = 0; \
|
||||
(h).hdr_padding[1] = 0; \
|
||||
} while (0)
|
||||
#else
|
||||
#define MCA_PML_OB1_FIN_HDR_FILL(h)
|
||||
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
|
||||
hdr->hdr_padding[0] = 0;
|
||||
hdr->hdr_padding[1] = 0;
|
||||
#endif
|
||||
hdr->hdr_frag.lval = hdr_frag;
|
||||
hdr->hdr_size = hdr_size;
|
||||
}
|
||||
|
||||
#define MCA_PML_OB1_FIN_HDR_NTOH(h) \
|
||||
do { \
|
||||
#define MCA_PML_OB1_FIN_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_NTOH((h).hdr_common); \
|
||||
(h).hdr_size = ntoh64((h).hdr_size); \
|
||||
} while (0)
|
||||
|
||||
#define MCA_PML_OB1_FIN_HDR_HTON(h) \
|
||||
do { \
|
||||
#define MCA_PML_OB1_FIN_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \
|
||||
MCA_PML_OB1_FIN_HDR_FILL(h); \
|
||||
} while (0)
|
||||
(h).hdr_size = hton64((h).hdr_size); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* Union of defined hdr types.
|
||||
|
@ -10,7 +10,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
@ -68,7 +68,6 @@ static inline int mca_pml_ob1_send_inline (void *buf, size_t count,
|
||||
ompi_proc_t *dst_proc, mca_bml_base_endpoint_t* endpoint,
|
||||
ompi_communicator_t * comm)
|
||||
{
|
||||
mca_btl_base_descriptor_t *des = NULL;
|
||||
mca_pml_ob1_match_hdr_t match;
|
||||
mca_bml_base_btl_t *bml_btl;
|
||||
opal_convertor_t convertor;
|
||||
@ -98,28 +97,21 @@ static inline int mca_pml_ob1_send_inline (void *buf, size_t count,
|
||||
size = 0;
|
||||
}
|
||||
|
||||
match.hdr_common.hdr_flags = 0;
|
||||
match.hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH;
|
||||
match.hdr_ctx = comm->c_contextid;
|
||||
match.hdr_src = comm->c_my_rank;
|
||||
match.hdr_tag = tag;
|
||||
match.hdr_seq = seqn;
|
||||
mca_pml_ob1_match_hdr_prepare (&match, MCA_PML_OB1_HDR_TYPE_MATCH, 0,
|
||||
comm->c_contextid, comm->c_my_rank,
|
||||
tag, seqn);
|
||||
|
||||
ob1_hdr_hton(&match, MCA_PML_OB1_HDR_TYPE_MATCH, dst_proc);
|
||||
|
||||
/* try to send immediately */
|
||||
rc = mca_bml_base_sendi (bml_btl, &convertor, &match, OMPI_PML_OB1_MATCH_HDR_LEN,
|
||||
size, MCA_BTL_NO_ORDER, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP,
|
||||
MCA_PML_OB1_HDR_TYPE_MATCH, &des);
|
||||
MCA_PML_OB1_HDR_TYPE_MATCH, NULL);
|
||||
if (count > 0) {
|
||||
opal_convertor_cleanup (&convertor);
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
if (des) {
|
||||
mca_bml_base_free (bml_btl, des);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -224,7 +216,7 @@ int mca_pml_ob1_send(void *buf,
|
||||
|
||||
OBJ_CONSTRUCT(sendreq, mca_pml_ob1_send_request_t);
|
||||
sendreq->req_send.req_base.req_proc = dst_proc;
|
||||
sendreq->src_des = NULL;
|
||||
sendreq->rdma_frag = NULL;
|
||||
|
||||
MCA_PML_OB1_SEND_REQUEST_INIT(sendreq,
|
||||
buf,
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -9,6 +10,8 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -27,11 +30,6 @@
|
||||
#include "pml_ob1.h"
|
||||
#include "pml_ob1_rdma.h"
|
||||
|
||||
/* Use this registration if no registration needed for a BTL instead of NULL.
|
||||
* This will help other code to distinguish case when memory is not registered
|
||||
* from case when registration is not needed */
|
||||
static mca_mpool_base_registration_t pml_ob1_dummy_reg;
|
||||
|
||||
/*
|
||||
* Check to see if memory is registered or can be registered. Build a
|
||||
* set of registrations on the request.
|
||||
@ -45,7 +43,7 @@ size_t mca_pml_ob1_rdma_btls(
|
||||
{
|
||||
int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
|
||||
double weight_total = 0;
|
||||
int num_btls_used = 0, n;
|
||||
int num_btls_used = 0;
|
||||
|
||||
/* shortcut when there are no rdma capable btls */
|
||||
if(num_btls == 0) {
|
||||
@ -53,29 +51,33 @@ size_t mca_pml_ob1_rdma_btls(
|
||||
}
|
||||
|
||||
/* check to see if memory is registered */
|
||||
for(n = 0; n < num_btls && num_btls_used < mca_pml_ob1.max_rdma_per_request;
|
||||
n++) {
|
||||
for (int n = 0; n < num_btls && num_btls_used < mca_pml_ob1.max_rdma_per_request; n++) {
|
||||
mca_bml_base_btl_t* bml_btl =
|
||||
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma,
|
||||
(bml_endpoint->btl_rdma_index + n) % num_btls);
|
||||
mca_mpool_base_registration_t* reg = &pml_ob1_dummy_reg;
|
||||
mca_mpool_base_module_t *btl_mpool = bml_btl->btl->btl_mpool;
|
||||
(bml_endpoint->btl_rdma_index + n) % num_btls);
|
||||
mca_btl_base_registration_handle_t *reg_handle = NULL;
|
||||
mca_btl_base_module_t *btl = bml_btl->btl;
|
||||
|
||||
if( NULL != btl_mpool ) {
|
||||
if(!mca_pml_ob1.leave_pinned) {
|
||||
/* look through existing registrations */
|
||||
btl_mpool->mpool_find(btl_mpool, base, size, ®);
|
||||
} else {
|
||||
/* register the memory */
|
||||
btl_mpool->mpool_register(btl_mpool, base, size, 0, ®);
|
||||
if (btl->btl_register_mem) {
|
||||
/* do not use the RDMA protocol with this btl if 1) leave pinned is disabled,
|
||||
* 2) the btl supports put, and 3) the fragment is larger than the minimum
|
||||
* pipeline size specified by the BTL */
|
||||
if (!mca_pml_ob1.leave_pinned && (btl->btl_flags & MCA_BTL_FLAGS_PUT) &&
|
||||
size > btl->btl_min_rdma_pipeline_size) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if(NULL == reg)
|
||||
/* try to register the memory region with the btl */
|
||||
reg_handle = btl->btl_register_mem (btl, bml_btl->btl_endpoint, base,
|
||||
size, MCA_BTL_REG_FLAG_REMOTE_READ);
|
||||
if (NULL == reg_handle) {
|
||||
/* btl requires registration but the registration failed */
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} /* else no registration is needed with this btl */
|
||||
|
||||
rdma_btls[num_btls_used].bml_btl = bml_btl;
|
||||
rdma_btls[num_btls_used].btl_reg = reg;
|
||||
rdma_btls[num_btls_used].btl_reg = reg_handle;
|
||||
weight_total += bml_btl->btl_weight;
|
||||
num_btls_used++;
|
||||
}
|
||||
@ -83,7 +85,7 @@ size_t mca_pml_ob1_rdma_btls(
|
||||
/* if we don't use leave_pinned and all BTLs that already have this memory
|
||||
* registered amount to less then half of available bandwidth - fall back to
|
||||
* pipeline protocol */
|
||||
if(0 == num_btls_used || (!mca_pml_ob1.leave_pinned && weight_total < 0.5))
|
||||
if (0 == num_btls_used || (!mca_pml_ob1.leave_pinned && weight_total < 0.5))
|
||||
return 0;
|
||||
|
||||
mca_pml_ob1_calc_weighted_length(rdma_btls, num_btls_used, size,
|
||||
@ -103,10 +105,7 @@ size_t mca_pml_ob1_rdma_pipeline_btls( mca_bml_base_endpoint_t* bml_endpoint,
|
||||
for(i = 0; i < num_btls && i < mca_pml_ob1.max_rdma_per_request; i++) {
|
||||
rdma_btls[i].bml_btl =
|
||||
mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma);
|
||||
if(NULL != rdma_btls[i].bml_btl->btl->btl_mpool)
|
||||
rdma_btls[i].btl_reg = NULL;
|
||||
else
|
||||
rdma_btls[i].btl_reg = &pml_ob1_dummy_reg;
|
||||
rdma_btls[i].btl_reg = NULL;
|
||||
|
||||
weight_total += rdma_btls[i].bml_btl->btl_weight;
|
||||
}
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -9,6 +10,8 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -21,9 +24,13 @@
|
||||
#include "pml_ob1.h"
|
||||
#include "pml_ob1_rdmafrag.h"
|
||||
|
||||
static void mca_pml_ob1_rdma_frag_constructor (mca_pml_ob1_rdma_frag_t *frag)
|
||||
{
|
||||
frag->local_handle = NULL;
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
mca_pml_ob1_rdma_frag_t,
|
||||
ompi_free_list_item_t,
|
||||
NULL,
|
||||
mca_pml_ob1_rdma_frag_constructor,
|
||||
NULL);
|
||||
|
@ -10,6 +10,8 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -32,38 +34,52 @@ typedef enum {
|
||||
MCA_PML_OB1_RDMA_GET
|
||||
} mca_pml_ob1_rdma_state_t;
|
||||
|
||||
struct mca_pml_ob1_rdma_frag_t;
|
||||
|
||||
typedef void (*mca_pml_ob1_rdma_frag_callback_t)(struct mca_pml_ob1_rdma_frag_t *frag, int64_t rdma_length);
|
||||
|
||||
/**
|
||||
* Used to keep track of local and remote RDMA operations.
|
||||
*/
|
||||
struct mca_pml_ob1_rdma_frag_t {
|
||||
ompi_free_list_item_t super;
|
||||
mca_bml_base_btl_t* rdma_bml;
|
||||
mca_bml_base_btl_t *rdma_bml;
|
||||
mca_pml_ob1_hdr_t rdma_hdr;
|
||||
mca_pml_ob1_rdma_state_t rdma_state;
|
||||
size_t rdma_length;
|
||||
uint8_t rdma_segs[MCA_BTL_SEG_MAX_SIZE * MCA_BTL_DES_MAX_SEGMENTS];
|
||||
void *rdma_req;
|
||||
struct mca_bml_base_endpoint_t* rdma_ep;
|
||||
opal_convertor_t convertor;
|
||||
mca_mpool_base_registration_t* reg;
|
||||
uint32_t retries;
|
||||
mca_pml_ob1_rdma_frag_callback_t cbfunc;
|
||||
|
||||
uint64_t rdma_offset;
|
||||
void *local_address;
|
||||
mca_btl_base_registration_handle_t *local_handle;
|
||||
|
||||
uint64_t remote_address;
|
||||
uint8_t remote_handle[MCA_BTL_REG_HANDLE_MAX_SIZE];
|
||||
};
|
||||
typedef struct mca_pml_ob1_rdma_frag_t mca_pml_ob1_rdma_frag_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_pml_ob1_rdma_frag_t);
|
||||
|
||||
|
||||
#define MCA_PML_OB1_RDMA_FRAG_ALLOC(frag) \
|
||||
do { \
|
||||
ompi_free_list_item_t* item; \
|
||||
#define MCA_PML_OB1_RDMA_FRAG_ALLOC(frag) \
|
||||
do { \
|
||||
ompi_free_list_item_t* item; \
|
||||
OMPI_FREE_LIST_WAIT_MT(&mca_pml_ob1.rdma_frags, item); \
|
||||
frag = (mca_pml_ob1_rdma_frag_t*)item; \
|
||||
} while(0)
|
||||
|
||||
#define MCA_PML_OB1_RDMA_FRAG_RETURN(frag) \
|
||||
do { \
|
||||
/* return fragment */ \
|
||||
OMPI_FREE_LIST_RETURN_MT(&mca_pml_ob1.rdma_frags, \
|
||||
(ompi_free_list_item_t*)frag); \
|
||||
frag = (mca_pml_ob1_rdma_frag_t*)item; \
|
||||
} while(0)
|
||||
|
||||
#define MCA_PML_OB1_RDMA_FRAG_RETURN(frag) \
|
||||
do { \
|
||||
/* return fragment */ \
|
||||
if (frag->local_handle) { \
|
||||
mca_bml_base_deregister_mem (frag->rdma_bml, frag->local_handle); \
|
||||
frag->local_handle = NULL; \
|
||||
} \
|
||||
OMPI_FREE_LIST_RETURN_MT(&mca_pml_ob1.rdma_frags, \
|
||||
(ompi_free_list_item_t*)frag); \
|
||||
} while (0)
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
|
||||
* Copyright (c) 2006-2008 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -108,13 +108,13 @@ void mca_pml_ob1_recv_frag_callback_match(mca_btl_base_module_t* btl,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
void* cbdata )
|
||||
{
|
||||
mca_btl_base_segment_t* segments = des->des_local;
|
||||
mca_btl_base_segment_t* segments = des->des_segments;
|
||||
mca_pml_ob1_match_hdr_t* hdr = (mca_pml_ob1_match_hdr_t*)segments->seg_addr.pval;
|
||||
ompi_communicator_t *comm_ptr;
|
||||
mca_pml_ob1_recv_request_t *match = NULL;
|
||||
mca_pml_ob1_comm_t *comm;
|
||||
mca_pml_ob1_comm_proc_t *proc;
|
||||
size_t num_segments = des->des_local_count;
|
||||
size_t num_segments = des->des_segment_count;
|
||||
size_t bytes_received = 0;
|
||||
|
||||
assert(num_segments <= MCA_BTL_DES_MAX_SEGMENTS);
|
||||
@ -256,7 +256,7 @@ void mca_pml_ob1_recv_frag_callback_rndv(mca_btl_base_module_t* btl,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
void* cbdata )
|
||||
{
|
||||
mca_btl_base_segment_t* segments = des->des_local;
|
||||
mca_btl_base_segment_t* segments = des->des_segments;
|
||||
mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval;
|
||||
|
||||
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_ob1_common_hdr_t)) ) {
|
||||
@ -264,7 +264,7 @@ void mca_pml_ob1_recv_frag_callback_rndv(mca_btl_base_module_t* btl,
|
||||
}
|
||||
ob1_hdr_ntoh(hdr, MCA_PML_OB1_HDR_TYPE_RNDV);
|
||||
mca_pml_ob1_recv_frag_match(btl, &hdr->hdr_match, segments,
|
||||
des->des_local_count, MCA_PML_OB1_HDR_TYPE_RNDV);
|
||||
des->des_segment_count, MCA_PML_OB1_HDR_TYPE_RNDV);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -273,7 +273,7 @@ void mca_pml_ob1_recv_frag_callback_rget(mca_btl_base_module_t* btl,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
void* cbdata )
|
||||
{
|
||||
mca_btl_base_segment_t* segments = des->des_local;
|
||||
mca_btl_base_segment_t* segments = des->des_segments;
|
||||
mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval;
|
||||
|
||||
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_ob1_common_hdr_t)) ) {
|
||||
@ -281,7 +281,7 @@ void mca_pml_ob1_recv_frag_callback_rget(mca_btl_base_module_t* btl,
|
||||
}
|
||||
ob1_hdr_ntoh(hdr, MCA_PML_OB1_HDR_TYPE_RGET);
|
||||
mca_pml_ob1_recv_frag_match(btl, &hdr->hdr_match, segments,
|
||||
des->des_local_count, MCA_PML_OB1_HDR_TYPE_RGET);
|
||||
des->des_segment_count, MCA_PML_OB1_HDR_TYPE_RGET);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -292,9 +292,10 @@ void mca_pml_ob1_recv_frag_callback_ack(mca_btl_base_module_t* btl,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
void* cbdata )
|
||||
{
|
||||
mca_btl_base_segment_t* segments = des->des_local;
|
||||
mca_btl_base_segment_t* segments = des->des_segments;
|
||||
mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval;
|
||||
mca_pml_ob1_send_request_t* sendreq;
|
||||
size_t size;
|
||||
|
||||
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_ob1_common_hdr_t)) ) {
|
||||
return;
|
||||
@ -307,19 +308,25 @@ void mca_pml_ob1_recv_frag_callback_ack(mca_btl_base_module_t* btl,
|
||||
/* if the request should be delivered entirely by copy in/out
|
||||
* then throttle sends */
|
||||
if(hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NORDMA) {
|
||||
if (NULL != sendreq->src_des) {
|
||||
/* release registered memory */
|
||||
mca_bml_base_free (sendreq->req_rdma[0].bml_btl, sendreq->src_des);
|
||||
sendreq->src_des = NULL;
|
||||
if (NULL != sendreq->rdma_frag) {
|
||||
if (NULL != sendreq->rdma_frag->local_handle) {
|
||||
mca_bml_base_deregister_mem (sendreq->req_rdma[0].bml_btl, sendreq->rdma_frag->local_handle);
|
||||
sendreq->rdma_frag->local_handle = NULL;
|
||||
}
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN(sendreq->rdma_frag);
|
||||
sendreq->rdma_frag = NULL;
|
||||
}
|
||||
|
||||
sendreq->req_throttle_sends = true;
|
||||
}
|
||||
|
||||
mca_pml_ob1_send_request_copy_in_out(sendreq,
|
||||
hdr->hdr_ack.hdr_send_offset,
|
||||
sendreq->req_send.req_bytes_packed -
|
||||
hdr->hdr_ack.hdr_send_offset);
|
||||
|
||||
if (hdr->hdr_ack.hdr_send_size) {
|
||||
size = hdr->hdr_ack.hdr_send_size;
|
||||
} else {
|
||||
size = sendreq->req_send.req_bytes_packed - hdr->hdr_ack.hdr_send_offset;
|
||||
}
|
||||
|
||||
mca_pml_ob1_send_request_copy_in_out(sendreq, hdr->hdr_ack.hdr_send_offset, size);
|
||||
|
||||
if (sendreq->req_state != 0) {
|
||||
/* Typical receipt of an ACK message causes req_state to be
|
||||
@ -355,13 +362,14 @@ void mca_pml_ob1_recv_frag_callback_frag(mca_btl_base_module_t* btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
void* cbdata ) {
|
||||
mca_btl_base_segment_t* segments = des->des_local;
|
||||
mca_btl_base_segment_t* segments = des->des_segments;
|
||||
mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval;
|
||||
mca_pml_ob1_recv_request_t* recvreq;
|
||||
|
||||
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_ob1_common_hdr_t)) ) {
|
||||
return;
|
||||
}
|
||||
|
||||
ob1_hdr_ntoh(hdr, MCA_PML_OB1_HDR_TYPE_FRAG);
|
||||
recvreq = (mca_pml_ob1_recv_request_t*)hdr->hdr_frag.hdr_dst_req.pval;
|
||||
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
|
||||
@ -372,7 +380,7 @@ void mca_pml_ob1_recv_frag_callback_frag(mca_btl_base_module_t* btl,
|
||||
assert(btl->btl_flags & MCA_BTL_FLAGS_CUDA_COPY_ASYNC_RECV);
|
||||
|
||||
/* This will trigger the opal_convertor_pack to start asynchronous copy. */
|
||||
mca_pml_ob1_recv_request_frag_copy_start(recvreq,btl,segments,des->des_local_count,des);
|
||||
mca_pml_ob1_recv_request_frag_copy_start(recvreq,btl,segments,des->des_segment_count,des);
|
||||
|
||||
/* Let BTL know that it CANNOT free the frag */
|
||||
des->des_flags |= MCA_BTL_DES_FLAGS_CUDA_COPY_ASYNC;
|
||||
@ -380,7 +388,8 @@ void mca_pml_ob1_recv_frag_callback_frag(mca_btl_base_module_t* btl,
|
||||
return;
|
||||
}
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
mca_pml_ob1_recv_request_progress_frag(recvreq,btl,segments,des->des_local_count);
|
||||
|
||||
mca_pml_ob1_recv_request_progress_frag(recvreq,btl,segments,des->des_segment_count);
|
||||
|
||||
return;
|
||||
}
|
||||
@ -390,7 +399,7 @@ void mca_pml_ob1_recv_frag_callback_put(mca_btl_base_module_t* btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
void* cbdata ) {
|
||||
mca_btl_base_segment_t* segments = des->des_local;
|
||||
mca_btl_base_segment_t* segments = des->des_segments;
|
||||
mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval;
|
||||
mca_pml_ob1_send_request_t* sendreq;
|
||||
|
||||
@ -410,20 +419,17 @@ void mca_pml_ob1_recv_frag_callback_fin(mca_btl_base_module_t* btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
void* cbdata ) {
|
||||
mca_btl_base_segment_t* segments = des->des_local;
|
||||
mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval;
|
||||
mca_btl_base_descriptor_t* rdma;
|
||||
mca_btl_base_segment_t* segments = des->des_segments;
|
||||
mca_pml_ob1_fin_hdr_t* hdr = (mca_pml_ob1_fin_hdr_t *) segments->seg_addr.pval;
|
||||
mca_pml_ob1_rdma_frag_t *frag;
|
||||
|
||||
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_ob1_common_hdr_t)) ) {
|
||||
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_ob1_fin_hdr_t)) ) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
ob1_hdr_ntoh(hdr, MCA_PML_OB1_HDR_TYPE_FIN);
|
||||
rdma = (mca_btl_base_descriptor_t*)hdr->hdr_fin.hdr_des.pval;
|
||||
rdma->des_cbfunc(btl, NULL, rdma,
|
||||
hdr->hdr_fin.hdr_fail ? OMPI_ERROR : OMPI_SUCCESS);
|
||||
|
||||
return;
|
||||
frag = (mca_pml_ob1_rdma_frag_t *) hdr->hdr_frag.pval;
|
||||
frag->cbfunc (frag, hdr->hdr_size);
|
||||
}
|
||||
|
||||
|
||||
@ -699,7 +705,7 @@ out_of_order_match:
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
|
||||
if(OPAL_LIKELY(match)) {
|
||||
switch(type) {
|
||||
switch(type) {
|
||||
case MCA_PML_OB1_HDR_TYPE_MATCH:
|
||||
mca_pml_ob1_recv_request_progress_match(match, btl, segments, num_segments);
|
||||
break;
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
|
||||
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
|
||||
* Copyright (c) 2012-2013 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2012 FUJITSU LIMITED. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
@ -150,12 +150,17 @@ static void mca_pml_ob1_recv_request_construct(mca_pml_ob1_recv_request_t* reque
|
||||
request->req_recv.req_base.req_ompi.req_free = mca_pml_ob1_recv_request_free;
|
||||
request->req_recv.req_base.req_ompi.req_cancel = mca_pml_ob1_recv_request_cancel;
|
||||
request->req_rdma_cnt = 0;
|
||||
request->local_handle = NULL;
|
||||
OBJ_CONSTRUCT(&request->lock, opal_mutex_t);
|
||||
}
|
||||
|
||||
static void mca_pml_ob1_recv_request_destruct(mca_pml_ob1_recv_request_t* request)
|
||||
{
|
||||
OBJ_DESTRUCT(&request->lock);
|
||||
if (OPAL_UNLIKELY(request->local_handle)) {
|
||||
mca_bml_base_deregister_mem (request->rdma_bml, request->local_handle);
|
||||
request->local_handle = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
@ -183,31 +188,27 @@ static void mca_pml_ob1_recv_ctl_completion( mca_btl_base_module_t* btl,
|
||||
* Put operation has completed remotely - update request status
|
||||
*/
|
||||
|
||||
static void mca_pml_ob1_put_completion( mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* ep,
|
||||
struct mca_btl_base_descriptor_t* des,
|
||||
int status )
|
||||
static void mca_pml_ob1_put_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t rdma_size)
|
||||
{
|
||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
|
||||
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)des->des_cbdata;
|
||||
size_t bytes_received = 0;
|
||||
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t *) frag->rdma_req;
|
||||
mca_bml_base_btl_t *bml_btl = frag->rdma_bml;
|
||||
|
||||
if( OPAL_LIKELY(status == OMPI_SUCCESS) ) {
|
||||
bytes_received = mca_pml_ob1_compute_segment_length (btl->btl_seg_size,
|
||||
(void *) des->des_local,
|
||||
des->des_local_count, 0);
|
||||
}
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth,-1);
|
||||
|
||||
mca_bml_base_free(bml_btl, des);
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
|
||||
|
||||
/* check completion status */
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received);
|
||||
if(recv_request_pml_complete_check(recvreq) == false &&
|
||||
if (OPAL_LIKELY(0 < rdma_size)) {
|
||||
assert (rdma_size == frag->rdma_length);
|
||||
|
||||
/* check completion status */
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, (size_t) rdma_size);
|
||||
if (recv_request_pml_complete_check(recvreq) == false &&
|
||||
recvreq->req_rdma_offset < recvreq->req_send_offset) {
|
||||
/* schedule additional rdma operations */
|
||||
mca_pml_ob1_recv_request_schedule(recvreq, bml_btl);
|
||||
/* schedule additional rdma operations */
|
||||
mca_pml_ob1_recv_request_schedule(recvreq, bml_btl);
|
||||
}
|
||||
}
|
||||
|
||||
MCA_PML_OB1_PROGRESS_PENDING(bml_btl);
|
||||
}
|
||||
|
||||
@ -218,7 +219,7 @@ static void mca_pml_ob1_put_completion( mca_btl_base_module_t* btl,
|
||||
int mca_pml_ob1_recv_request_ack_send_btl(
|
||||
ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl,
|
||||
uint64_t hdr_src_req, void *hdr_dst_req, uint64_t hdr_send_offset,
|
||||
bool nordma)
|
||||
uint64_t size, bool nordma)
|
||||
{
|
||||
mca_btl_base_descriptor_t* des;
|
||||
mca_pml_ob1_ack_hdr_t* ack;
|
||||
@ -234,12 +235,9 @@ int mca_pml_ob1_recv_request_ack_send_btl(
|
||||
}
|
||||
|
||||
/* fill out header */
|
||||
ack = (mca_pml_ob1_ack_hdr_t*)des->des_local->seg_addr.pval;
|
||||
ack->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_ACK;
|
||||
ack->hdr_common.hdr_flags = nordma ? MCA_PML_OB1_HDR_FLAGS_NORDMA : 0;
|
||||
ack->hdr_src_req.lval = hdr_src_req;
|
||||
ack->hdr_dst_req.pval = hdr_dst_req;
|
||||
ack->hdr_send_offset = hdr_send_offset;
|
||||
ack = (mca_pml_ob1_ack_hdr_t*)des->des_segments->seg_addr.pval;
|
||||
mca_pml_ob1_ack_hdr_prepare (ack, nordma ? MCA_PML_OB1_HDR_FLAGS_NORDMA : 0,
|
||||
hdr_src_req, hdr_dst_req, hdr_send_offset, size);
|
||||
|
||||
ob1_hdr_hton(ack, MCA_PML_OB1_HDR_TYPE_ACK, proc);
|
||||
|
||||
@ -313,63 +311,99 @@ static int mca_pml_ob1_recv_request_ack(
|
||||
if(recvreq->req_send_offset == hdr->hdr_msg_length)
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* let know to shedule function there is no need to put ACK flag */
|
||||
recvreq->req_ack_sent = true;
|
||||
return mca_pml_ob1_recv_request_ack_send(proc, hdr->hdr_src_req.lval,
|
||||
recvreq, recvreq->req_send_offset,
|
||||
recvreq, recvreq->req_send_offset, 0,
|
||||
recvreq->req_send_offset == bytes_received);
|
||||
}
|
||||
|
||||
static int mca_pml_ob1_recv_request_put_frag (mca_pml_ob1_rdma_frag_t *frag);
|
||||
|
||||
static int mca_pml_ob1_recv_request_get_frag_failed (mca_pml_ob1_rdma_frag_t *frag, int rc)
|
||||
{
|
||||
mca_pml_ob1_recv_request_t *recvreq = (mca_pml_ob1_recv_request_t *) frag->rdma_req;
|
||||
ompi_proc_t *proc = (ompi_proc_t *) recvreq->req_recv.req_base.req_proc;
|
||||
|
||||
if (OMPI_ERR_NOT_AVAILABLE == rc) {
|
||||
/* get isn't supported for this transfer. tell peer to fallback on put */
|
||||
rc = mca_pml_ob1_recv_request_put_frag (frag);
|
||||
if (OMPI_ERR_OUT_OF_RESOURCE == rc) {
|
||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||
opal_list_append (&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
if (++frag->retries < mca_pml_ob1.rdma_retries_limit &&
|
||||
OMPI_ERR_OUT_OF_RESOURCE == rc) {
|
||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||
opal_list_append(&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* tell peer to fall back on send for this region */
|
||||
rc = mca_pml_ob1_recv_request_ack_send(proc, frag->rdma_hdr.hdr_rget.hdr_rndv.hdr_src_req.lval,
|
||||
recvreq, frag->rdma_offset, frag->rdma_length, false);
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return resources used by the RDMA
|
||||
*/
|
||||
|
||||
static void mca_pml_ob1_rget_completion( mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* ep,
|
||||
struct mca_btl_base_descriptor_t* des,
|
||||
int status )
|
||||
static void mca_pml_ob1_rget_completion (mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* ep,
|
||||
void *local_address, mca_btl_base_registration_handle_t *local_handle,
|
||||
void *context, void *cbdata, int status)
|
||||
{
|
||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
|
||||
mca_pml_ob1_rdma_frag_t* frag = (mca_pml_ob1_rdma_frag_t*)des->des_cbdata;
|
||||
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)frag->rdma_req;
|
||||
mca_bml_base_btl_t *bml_btl = (mca_bml_base_btl_t *) context;
|
||||
mca_pml_ob1_rdma_frag_t *frag = (mca_pml_ob1_rdma_frag_t *) cbdata;
|
||||
mca_pml_ob1_recv_request_t *recvreq = (mca_pml_ob1_recv_request_t *) frag->rdma_req;
|
||||
|
||||
/* check completion status */
|
||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
||||
/* TSW - FIX */
|
||||
OMPI_ERROR_LOG(status);
|
||||
ompi_rte_abort(-1, NULL);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != status)) {
|
||||
status = mca_pml_ob1_recv_request_get_frag_failed (frag, status);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != status)) {
|
||||
/* TSW - FIX */
|
||||
OMPI_ERROR_LOG(status);
|
||||
ompi_rte_abort(-1, NULL);
|
||||
}
|
||||
} else {
|
||||
/* is receive request complete */
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length);
|
||||
/* TODO: re-add order */
|
||||
mca_pml_ob1_send_fin (recvreq->req_recv.req_base.req_proc,
|
||||
bml_btl, frag->rdma_hdr.hdr_rget.hdr_frag,
|
||||
frag->rdma_length, 0, 0);
|
||||
|
||||
recv_request_pml_complete_check(recvreq);
|
||||
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
|
||||
}
|
||||
|
||||
/* is receive request complete */
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length);
|
||||
if (recvreq->req_recv.req_bytes_packed <= recvreq->req_bytes_received) {
|
||||
mca_pml_ob1_send_fin(recvreq->req_recv.req_base.req_proc,
|
||||
bml_btl,
|
||||
frag->rdma_hdr.hdr_rget.hdr_des,
|
||||
des->order, 0);
|
||||
}
|
||||
|
||||
recv_request_pml_complete_check(recvreq);
|
||||
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
|
||||
|
||||
MCA_PML_OB1_PROGRESS_PENDING(bml_btl);
|
||||
}
|
||||
|
||||
|
||||
static int mca_pml_ob1_init_get_fallback (mca_pml_ob1_rdma_frag_t *frag,
|
||||
mca_btl_base_descriptor_t *dst) {
|
||||
static int mca_pml_ob1_recv_request_put_frag (mca_pml_ob1_rdma_frag_t *frag)
|
||||
{
|
||||
mca_pml_ob1_recv_request_t *recvreq = (mca_pml_ob1_recv_request_t *) frag->rdma_req;
|
||||
mca_bml_base_btl_t *bml_btl = frag->rdma_bml;
|
||||
mca_btl_base_descriptor_t *ctl;
|
||||
mca_pml_ob1_rdma_hdr_t *hdr;
|
||||
size_t seg_size;
|
||||
size_t reg_size;
|
||||
int rc;
|
||||
|
||||
seg_size = bml_btl->btl->btl_seg_size * dst->des_local_count;
|
||||
reg_size = bml_btl->btl->btl_registration_handle_size;
|
||||
|
||||
/* prepare a descriptor for rdma control message */
|
||||
mca_bml_base_alloc (bml_btl, &ctl, MCA_BTL_NO_ORDER, sizeof (mca_pml_ob1_rdma_hdr_t) + seg_size,
|
||||
mca_bml_base_alloc (bml_btl, &ctl, MCA_BTL_NO_ORDER, sizeof (mca_pml_ob1_rdma_hdr_t) + reg_size,
|
||||
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP |
|
||||
MCA_BTL_DES_SEND_ALWAYS_CALLBACK | MCA_BTL_DES_FLAGS_SIGNAL);
|
||||
if (OPAL_UNLIKELY(NULL == ctl)) {
|
||||
@ -378,26 +412,19 @@ static int mca_pml_ob1_init_get_fallback (mca_pml_ob1_rdma_frag_t *frag,
|
||||
ctl->des_cbfunc = mca_pml_ob1_recv_ctl_completion;
|
||||
|
||||
/* fill in rdma header */
|
||||
hdr = (mca_pml_ob1_rdma_hdr_t *) ctl->des_local->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_PUT;
|
||||
hdr->hdr_common.hdr_flags =
|
||||
(!recvreq->req_ack_sent) ? MCA_PML_OB1_HDR_TYPE_ACK : 0;
|
||||
hdr = (mca_pml_ob1_rdma_hdr_t *) ctl->des_segments->seg_addr.pval;
|
||||
mca_pml_ob1_rdma_hdr_prepare (hdr, (!recvreq->req_ack_sent) ? MCA_PML_OB1_HDR_TYPE_ACK : 0,
|
||||
recvreq->remote_req_send.lval, frag, recvreq, frag->rdma_offset,
|
||||
frag->local_address, frag->rdma_length, frag->local_handle,
|
||||
reg_size);
|
||||
|
||||
hdr->hdr_req = frag->rdma_hdr.hdr_rget.hdr_rndv.hdr_src_req;
|
||||
hdr->hdr_rdma_offset = recvreq->req_rdma_offset;
|
||||
hdr->hdr_des.pval = dst;
|
||||
hdr->hdr_recv_req.pval = recvreq;
|
||||
frag->cbfunc = mca_pml_ob1_put_completion;
|
||||
|
||||
hdr->hdr_seg_cnt = dst->des_local_count;
|
||||
recvreq->req_ack_sent = true;
|
||||
|
||||
/* copy segments */
|
||||
memcpy (hdr + 1, dst->des_local, seg_size);
|
||||
|
||||
dst->des_cbfunc = mca_pml_ob1_put_completion;
|
||||
dst->des_cbdata = recvreq;
|
||||
|
||||
if (!recvreq->req_ack_sent)
|
||||
recvreq->req_ack_sent = true;
|
||||
PERUSE_TRACE_COMM_OMPI_EVENT( PERUSE_COMM_REQ_XFER_CONTINUE,
|
||||
&(recvreq->req_recv.req_base), size,
|
||||
PERUSE_RECV);
|
||||
|
||||
/* send rdma request to peer */
|
||||
rc = mca_bml_base_send (bml_btl, ctl, MCA_PML_OB1_HDR_TYPE_PUT);
|
||||
@ -412,71 +439,30 @@ static int mca_pml_ob1_init_get_fallback (mca_pml_ob1_rdma_frag_t *frag,
|
||||
/*
|
||||
*
|
||||
*/
|
||||
int mca_pml_ob1_recv_request_get_frag( mca_pml_ob1_rdma_frag_t* frag )
|
||||
int mca_pml_ob1_recv_request_get_frag (mca_pml_ob1_rdma_frag_t *frag)
|
||||
{
|
||||
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)frag->rdma_req;
|
||||
mca_bml_base_btl_t* bml_btl = frag->rdma_bml;
|
||||
mca_btl_base_descriptor_t* descriptor;
|
||||
size_t save_size = frag->rdma_length;
|
||||
mca_bml_base_btl_t *bml_btl = frag->rdma_bml;
|
||||
int rc;
|
||||
|
||||
/* prepare descriptor */
|
||||
mca_bml_base_prepare_dst( bml_btl,
|
||||
NULL,
|
||||
&recvreq->req_recv.req_base.req_convertor,
|
||||
MCA_BTL_NO_ORDER,
|
||||
0,
|
||||
&frag->rdma_length,
|
||||
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK |
|
||||
MCA_BTL_DES_FLAGS_GET,
|
||||
&descriptor );
|
||||
if( OPAL_UNLIKELY(NULL == descriptor) ) {
|
||||
if (frag->retries < mca_pml_ob1.rdma_retries_limit) {
|
||||
frag->rdma_length = save_size;
|
||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||
opal_list_append(&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
} else {
|
||||
ompi_proc_t *proc = (ompi_proc_t *) recvreq->req_recv.req_base.req_proc;
|
||||
|
||||
/* tell peer to fall back on send */
|
||||
recvreq->req_send_offset = 0;
|
||||
rc = mca_pml_ob1_recv_request_ack_send(proc, frag->rdma_hdr.hdr_rget.hdr_rndv.hdr_src_req.lval,
|
||||
recvreq, recvreq->req_send_offset, true);
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
|
||||
return rc;
|
||||
if (bml_btl->btl->btl_register_mem && !frag->local_handle) {
|
||||
mca_bml_base_register_mem (bml_btl, frag->local_address, frag->rdma_length, MCA_BTL_REG_FLAG_LOCAL_WRITE |
|
||||
MCA_BTL_REG_FLAG_REMOTE_WRITE, &frag->local_handle);
|
||||
if (OPAL_UNLIKELY(NULL == frag->local_handle)) {
|
||||
return mca_pml_ob1_recv_request_get_frag_failed (frag, OMPI_ERR_OUT_OF_RESOURCE);
|
||||
}
|
||||
}
|
||||
|
||||
descriptor->des_remote = (mca_btl_base_segment_t *) frag->rdma_segs;
|
||||
descriptor->des_remote_count = frag->rdma_hdr.hdr_rdma.hdr_seg_cnt;
|
||||
descriptor->des_cbfunc = mca_pml_ob1_rget_completion;
|
||||
descriptor->des_cbdata = frag;
|
||||
|
||||
PERUSE_TRACE_COMM_OMPI_EVENT(PERUSE_COMM_REQ_XFER_CONTINUE,
|
||||
&(recvreq->req_recv.req_base),
|
||||
&(((mca_pml_ob1_recv_request_t *) frag->rdma_req)->req_recv.req_base),
|
||||
frag->rdma_length, PERUSE_RECV);
|
||||
|
||||
/* queue up get request */
|
||||
rc = mca_bml_base_get(bml_btl,descriptor);
|
||||
rc = mca_bml_base_get (bml_btl, frag->local_address, frag->remote_address, frag->local_handle,
|
||||
(mca_btl_base_registration_handle_t *) frag->remote_handle, frag->rdma_length,
|
||||
0, MCA_BTL_NO_ORDER, mca_pml_ob1_rget_completion, frag);
|
||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
|
||||
if (OPAL_UNLIKELY(OMPI_ERR_NOT_AVAILABLE == rc)) {
|
||||
/* get isn't supported for this transfer. tell peer to fallback on put */
|
||||
rc = mca_pml_ob1_init_get_fallback (frag, descriptor);
|
||||
}
|
||||
|
||||
if(OMPI_ERR_OUT_OF_RESOURCE == rc) {
|
||||
mca_bml_base_free(bml_btl, descriptor);
|
||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||
opal_list_append(&mca_pml_ob1.rdma_pending,
|
||||
(opal_list_item_t*)frag);
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
} else if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
OMPI_ERROR_LOG(rc);
|
||||
ompi_rte_abort(-1, NULL);
|
||||
}
|
||||
return mca_pml_ob1_recv_request_get_frag_failed (frag, OMPI_ERR_OUT_OF_RESOURCE);
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
@ -502,6 +488,7 @@ void mca_pml_ob1_recv_request_progress_frag( mca_pml_ob1_recv_request_t* recvreq
|
||||
bytes_received = mca_pml_ob1_compute_segment_length_base (segments, num_segments,
|
||||
sizeof(mca_pml_ob1_frag_hdr_t));
|
||||
data_offset = hdr->hdr_frag.hdr_frag_offset;
|
||||
|
||||
/*
|
||||
* Make user buffer accessible(defined) before unpacking.
|
||||
*/
|
||||
@ -573,7 +560,7 @@ void mca_pml_ob1_recv_request_frag_copy_start( mca_pml_ob1_recv_request_t* recvr
|
||||
/* Store the receive request in unused context pointer. */
|
||||
des->des_context = (void *)recvreq;
|
||||
/* Store the amount of bytes in unused remote count value */
|
||||
des->des_remote_count = bytes_delivered;
|
||||
des->des_segment_count = bytes_delivered;
|
||||
/* Then record an event that will get triggered by a PML progress call which
|
||||
* checks the stream events. If we get an error, abort. Should get message
|
||||
* from CUDA code about what went wrong. */
|
||||
@ -598,7 +585,7 @@ void mca_pml_ob1_recv_request_frag_copy_finished( mca_btl_base_module_t* btl,
|
||||
int status )
|
||||
{
|
||||
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)des->des_context;
|
||||
size_t bytes_received = des->des_remote_count;
|
||||
size_t bytes_received = des->des_segment_count;
|
||||
|
||||
OPAL_OUTPUT((-1, "frag_copy_finished (delivered=%d), frag=%p", (int)bytes_received, (void *)des));
|
||||
/* Call into the BTL so it can free the descriptor. At this point, it is
|
||||
@ -629,7 +616,6 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
|
||||
mca_pml_ob1_rget_hdr_t* hdr = (mca_pml_ob1_rget_hdr_t*)segments->seg_addr.pval;
|
||||
mca_bml_base_endpoint_t* bml_endpoint = NULL;
|
||||
size_t bytes_remaining, prev_sent, offset;
|
||||
mca_btl_base_segment_t *r_segments;
|
||||
mca_pml_ob1_rdma_frag_t *frag;
|
||||
mca_bml_base_btl_t *rdma_bml;
|
||||
int rc;
|
||||
@ -637,6 +623,7 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
|
||||
prev_sent = offset = 0;
|
||||
bytes_remaining = hdr->hdr_rndv.hdr_msg_length;
|
||||
recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length;
|
||||
recvreq->req_send_offset = 0;
|
||||
|
||||
MCA_PML_OB1_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_rndv.hdr_match);
|
||||
|
||||
@ -680,8 +667,10 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
|
||||
ompi_rte_abort(-1, NULL);
|
||||
}
|
||||
|
||||
bytes_remaining = mca_pml_ob1_compute_segment_length_remote (btl->btl_seg_size, (void *)(hdr + 1),
|
||||
hdr->hdr_seg_cnt, recvreq->req_recv.req_base.req_proc);
|
||||
bytes_remaining = hdr->hdr_rndv.hdr_msg_length;
|
||||
|
||||
/* save the request for put fallback */
|
||||
recvreq->remote_req_send = hdr->hdr_rndv.hdr_src_req;
|
||||
|
||||
/* The while loop adds a fragmentation mechanism. The variable bytes_remaining holds the num
|
||||
* of bytes left to be send. In each iteration we send the max possible bytes supported
|
||||
@ -690,7 +679,12 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
|
||||
* the next iteration with the updated size.
|
||||
* Also - In each iteration we update the location in the buffer to be used for writing
|
||||
* the message ,and the location to read from. This is done using the offset variable that
|
||||
* accumulates the number of bytes that were sent so far. */
|
||||
* accumulates the number of bytes that were sent so far.
|
||||
*
|
||||
* NTH: This fragmentation may go away if we change the btls to require them to handle
|
||||
* get fragmentation internally. This is a reasonable solution since some btls do not
|
||||
* need any fragmentation (sm, vader, self, etc). Remove this loop if this ends up
|
||||
* being the case. */
|
||||
while (bytes_remaining > 0) {
|
||||
/* allocate/initialize a fragment */
|
||||
MCA_PML_OB1_RDMA_FRAG_ALLOC(frag);
|
||||
@ -700,29 +694,31 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
|
||||
ompi_rte_abort(-1, NULL);
|
||||
}
|
||||
|
||||
assert (btl->btl_seg_size * hdr->hdr_seg_cnt <= sizeof (frag->rdma_segs));
|
||||
memcpy (frag->remote_handle, hdr + 1, btl->btl_registration_handle_size);
|
||||
|
||||
memcpy (frag->rdma_segs, hdr + 1, btl->btl_seg_size * hdr->hdr_seg_cnt);
|
||||
|
||||
/* update the read location -- NTH: note this will only work if there is exactly one
|
||||
segment. TODO -- make this work with multiple segments */
|
||||
r_segments = (mca_btl_base_segment_t *) frag->rdma_segs;
|
||||
r_segments->seg_addr.lval += offset;
|
||||
/* update the read location */
|
||||
frag->remote_address = hdr->hdr_src_ptr + offset;
|
||||
|
||||
/* updating the write location */
|
||||
OPAL_THREAD_LOCK(&recvreq->lock);
|
||||
opal_convertor_set_position( &recvreq->req_recv.req_base.req_convertor, &offset);
|
||||
opal_convertor_get_current_pointer (&recvreq->req_recv.req_base.req_convertor, &frag->local_address);
|
||||
OPAL_THREAD_UNLOCK(&recvreq->lock);
|
||||
|
||||
frag->rdma_bml = rdma_bml;
|
||||
|
||||
frag->rdma_hdr.hdr_rget = *hdr;
|
||||
frag->retries = 0;
|
||||
frag->rdma_req = recvreq;
|
||||
frag->rdma_ep = bml_endpoint;
|
||||
frag->rdma_state = MCA_PML_OB1_RDMA_GET;
|
||||
frag->reg = NULL;
|
||||
frag->rdma_length = bytes_remaining;
|
||||
frag->retries = 0;
|
||||
frag->rdma_req = recvreq;
|
||||
frag->rdma_state = MCA_PML_OB1_RDMA_GET;
|
||||
frag->local_handle = NULL;
|
||||
frag->rdma_offset = offset;
|
||||
|
||||
if (bytes_remaining > rdma_bml->btl->btl_get_limit) {
|
||||
frag->rdma_length = rdma_bml->btl->btl_get_limit;
|
||||
} else {
|
||||
frag->rdma_length = bytes_remaining;
|
||||
}
|
||||
|
||||
/* NTH: TODO -- handle error conditions gracefully */
|
||||
rc = mca_pml_ob1_recv_request_get_frag(frag);
|
||||
@ -921,13 +917,11 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq,
|
||||
|
||||
while(bytes_remaining > 0 &&
|
||||
recvreq->req_pipeline_depth < mca_pml_ob1.recv_pipeline_depth) {
|
||||
size_t size, seg_size;
|
||||
mca_pml_ob1_rdma_hdr_t* hdr;
|
||||
mca_btl_base_descriptor_t* dst;
|
||||
mca_btl_base_descriptor_t* ctl;
|
||||
mca_mpool_base_registration_t * reg = NULL;
|
||||
mca_btl_base_module_t* btl;
|
||||
mca_pml_ob1_rdma_frag_t *frag = NULL;
|
||||
mca_btl_base_module_t *btl;
|
||||
int rc, rdma_idx;
|
||||
void *data_ptr;
|
||||
size_t size;
|
||||
|
||||
if(prev_bytes_remaining == bytes_remaining) {
|
||||
if(++num_fail == num_tries) {
|
||||
@ -948,86 +942,62 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq,
|
||||
do {
|
||||
rdma_idx = recvreq->req_rdma_idx;
|
||||
bml_btl = recvreq->req_rdma[rdma_idx].bml_btl;
|
||||
reg = recvreq->req_rdma[rdma_idx].btl_reg;
|
||||
size = recvreq->req_rdma[rdma_idx].length;
|
||||
if(++recvreq->req_rdma_idx >= recvreq->req_rdma_cnt)
|
||||
recvreq->req_rdma_idx = 0;
|
||||
} while(!size);
|
||||
btl = bml_btl->btl;
|
||||
|
||||
/* makes sure that we don't exceed BTL max rdma size
|
||||
* if memory is not pinned already */
|
||||
if( (NULL == reg) && (btl->btl_rdma_pipeline_frag_size != 0) &&
|
||||
(size > btl->btl_rdma_pipeline_frag_size)) {
|
||||
/* NTH: This conditional used to check if there was a registration in
|
||||
* recvreq->req_rdma[rdma_idx].btl_reg. If once existed it was due to
|
||||
* the btl not needed registration (equivalent to btl->btl_register_mem
|
||||
* != NULL. This new check is equivalent. Note: I feel this protocol
|
||||
* needs work to better improve resource usage when running with a
|
||||
* leave pinned protocol. */
|
||||
if (btl->btl_register_mem && (btl->btl_rdma_pipeline_frag_size != 0) &&
|
||||
(size > btl->btl_rdma_pipeline_frag_size)) {
|
||||
size = btl->btl_rdma_pipeline_frag_size;
|
||||
}
|
||||
|
||||
/* take lock to protect converter against concurrent access
|
||||
MCA_PML_OB1_RDMA_FRAG_ALLOC(frag);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* take lock to protect convertor against concurrent access
|
||||
* from unpack */
|
||||
OPAL_THREAD_LOCK(&recvreq->lock);
|
||||
opal_convertor_set_position( &recvreq->req_recv.req_base.req_convertor,
|
||||
&recvreq->req_rdma_offset );
|
||||
|
||||
/* prepare a descriptor for RDMA */
|
||||
mca_bml_base_prepare_dst(bml_btl, reg,
|
||||
&recvreq->req_recv.req_base.req_convertor,
|
||||
MCA_BTL_NO_ORDER, 0, &size, MCA_BTL_DES_FLAGS_BTL_OWNERSHIP |
|
||||
MCA_BTL_DES_FLAGS_PUT, &dst);
|
||||
opal_convertor_set_position (&recvreq->req_recv.req_base.req_convertor,
|
||||
&recvreq->req_rdma_offset);
|
||||
opal_convertor_get_current_pointer (&recvreq->req_recv.req_base.req_convertor, &data_ptr);
|
||||
OPAL_THREAD_UNLOCK(&recvreq->lock);
|
||||
|
||||
if(OPAL_UNLIKELY(dst == NULL)) {
|
||||
continue;
|
||||
if (btl->btl_register_mem) {
|
||||
mca_bml_base_register_mem (bml_btl, data_ptr, size, MCA_BTL_REG_FLAG_REMOTE_WRITE,
|
||||
&frag->local_handle);
|
||||
if (OPAL_UNLIKELY(NULL == frag->local_handle)) {
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
dst->des_cbfunc = mca_pml_ob1_put_completion;
|
||||
dst->des_cbdata = recvreq;
|
||||
/* fill in the minimum information needed to handle the fin message */
|
||||
frag->cbfunc = mca_pml_ob1_put_completion;
|
||||
frag->rdma_length = size;
|
||||
frag->rdma_req = recvreq;
|
||||
frag->rdma_bml = bml_btl;
|
||||
frag->local_address = data_ptr;
|
||||
frag->rdma_offset = recvreq->req_rdma_offset;
|
||||
|
||||
seg_size = btl->btl_seg_size * dst->des_local_count;
|
||||
|
||||
/* prepare a descriptor for rdma control message */
|
||||
mca_bml_base_alloc(bml_btl, &ctl, MCA_BTL_NO_ORDER, sizeof(mca_pml_ob1_rdma_hdr_t) + seg_size,
|
||||
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP |
|
||||
MCA_BTL_DES_SEND_ALWAYS_CALLBACK | MCA_BTL_DES_FLAGS_SIGNAL);
|
||||
|
||||
if( OPAL_UNLIKELY(NULL == ctl) ) {
|
||||
mca_bml_base_free(bml_btl,dst);
|
||||
continue;
|
||||
}
|
||||
ctl->des_cbfunc = mca_pml_ob1_recv_ctl_completion;
|
||||
|
||||
/* fill in rdma header */
|
||||
hdr = (mca_pml_ob1_rdma_hdr_t*)ctl->des_local->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_PUT;
|
||||
hdr->hdr_common.hdr_flags =
|
||||
(!recvreq->req_ack_sent) ? MCA_PML_OB1_HDR_TYPE_ACK : 0;
|
||||
hdr->hdr_req = recvreq->remote_req_send;
|
||||
hdr->hdr_des.pval = dst;
|
||||
hdr->hdr_recv_req.pval = recvreq;
|
||||
hdr->hdr_rdma_offset = recvreq->req_rdma_offset;
|
||||
hdr->hdr_seg_cnt = dst->des_local_count;
|
||||
|
||||
/* copy segments */
|
||||
memmove (hdr + 1, dst->des_local, seg_size);
|
||||
|
||||
if(!recvreq->req_ack_sent)
|
||||
recvreq->req_ack_sent = true;
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_PUT, recvreq->req_recv.req_base.req_proc);
|
||||
|
||||
PERUSE_TRACE_COMM_OMPI_EVENT( PERUSE_COMM_REQ_XFER_CONTINUE,
|
||||
&(recvreq->req_recv.req_base), size,
|
||||
PERUSE_RECV);
|
||||
|
||||
/* send rdma request to peer */
|
||||
rc = mca_bml_base_send(bml_btl, ctl, MCA_PML_OB1_HDR_TYPE_PUT);
|
||||
if( OPAL_LIKELY( rc >= 0 ) ) {
|
||||
rc = mca_pml_ob1_recv_request_put_frag (frag);
|
||||
if (OPAL_LIKELY(OMPI_SUCCESS == rc)) {
|
||||
/* update request state */
|
||||
recvreq->req_rdma_offset += size;
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth, 1);
|
||||
recvreq->req_rdma[rdma_idx].length -= size;
|
||||
bytes_remaining -= size;
|
||||
} else {
|
||||
mca_bml_base_free(bml_btl,ctl);
|
||||
mca_bml_base_free(bml_btl,dst);
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -10,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
@ -131,7 +132,7 @@ do { \
|
||||
#define MCA_PML_OB1_RECV_REQUEST_RETURN(recvreq) \
|
||||
{ \
|
||||
MCA_PML_BASE_RECV_REQUEST_FINI(&(recvreq)->req_recv); \
|
||||
OMPI_FREE_LIST_RETURN_MT( &mca_pml_base_recv_requests, \
|
||||
OMPI_FREE_LIST_RETURN_MT( &mca_pml_base_recv_requests, \
|
||||
(ompi_free_list_item_t*)(recvreq)); \
|
||||
}
|
||||
|
||||
@ -154,9 +155,11 @@ recv_request_pml_complete(mca_pml_ob1_recv_request_t *recvreq)
|
||||
}
|
||||
|
||||
for(i = 0; i < recvreq->req_rdma_cnt; i++) {
|
||||
mca_mpool_base_registration_t* btl_reg = recvreq->req_rdma[i].btl_reg;
|
||||
if( NULL != btl_reg && btl_reg->mpool != NULL) {
|
||||
btl_reg->mpool->mpool_deregister( btl_reg->mpool, btl_reg );
|
||||
struct mca_btl_base_registration_handle_t *handle = recvreq->req_rdma[i].btl_reg;
|
||||
mca_bml_base_btl_t *bml_btl = recvreq->req_rdma[i].bml_btl;
|
||||
|
||||
if (NULL != handle) {
|
||||
mca_bml_base_deregister_mem (bml_btl, handle);
|
||||
}
|
||||
}
|
||||
recvreq->req_rdma_cnt = 0;
|
||||
@ -178,6 +181,10 @@ recv_request_pml_complete(mca_pml_ob1_recv_request_t *recvreq)
|
||||
recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR =
|
||||
MPI_ERR_TRUNCATE;
|
||||
}
|
||||
if (OPAL_UNLIKELY(recvreq->local_handle)) {
|
||||
mca_bml_base_deregister_mem (recvreq->rdma_bml, recvreq->local_handle);
|
||||
recvreq->local_handle = NULL;
|
||||
}
|
||||
MCA_PML_OB1_RECV_REQUEST_MPI_COMPLETE(recvreq);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||
@ -387,7 +394,7 @@ static inline void mca_pml_ob1_recv_request_schedule(
|
||||
(void)mca_pml_ob1_recv_request_schedule_exclusive(req, start_bml_btl);
|
||||
}
|
||||
|
||||
#define MCA_PML_OB1_ADD_ACK_TO_PENDING(P, S, D, O) \
|
||||
#define MCA_PML_OB1_ADD_ACK_TO_PENDING(P, S, D, O, Sz) \
|
||||
do { \
|
||||
mca_pml_ob1_pckt_pending_t *_pckt; \
|
||||
\
|
||||
@ -396,6 +403,7 @@ static inline void mca_pml_ob1_recv_request_schedule(
|
||||
_pckt->hdr.hdr_ack.hdr_src_req.lval = (S); \
|
||||
_pckt->hdr.hdr_ack.hdr_dst_req.pval = (D); \
|
||||
_pckt->hdr.hdr_ack.hdr_send_offset = (O); \
|
||||
_pckt->hdr.hdr_ack.hdr_send_size = (Sz); \
|
||||
_pckt->proc = (P); \
|
||||
_pckt->bml_btl = NULL; \
|
||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock); \
|
||||
@ -406,11 +414,11 @@ static inline void mca_pml_ob1_recv_request_schedule(
|
||||
|
||||
int mca_pml_ob1_recv_request_ack_send_btl(ompi_proc_t* proc,
|
||||
mca_bml_base_btl_t* bml_btl, uint64_t hdr_src_req, void *hdr_dst_req,
|
||||
uint64_t hdr_rdma_offset, bool nordma);
|
||||
uint64_t hdr_rdma_offset, uint64_t size, bool nordma);
|
||||
|
||||
static inline int mca_pml_ob1_recv_request_ack_send(ompi_proc_t* proc,
|
||||
uint64_t hdr_src_req, void *hdr_dst_req, uint64_t hdr_send_offset,
|
||||
bool nordma)
|
||||
uint64_t size, bool nordma)
|
||||
{
|
||||
size_t i;
|
||||
mca_bml_base_btl_t* bml_btl;
|
||||
@ -420,12 +428,12 @@ static inline int mca_pml_ob1_recv_request_ack_send(ompi_proc_t* proc,
|
||||
for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) {
|
||||
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
|
||||
if(mca_pml_ob1_recv_request_ack_send_btl(proc, bml_btl, hdr_src_req,
|
||||
hdr_dst_req, hdr_send_offset, nordma) == OMPI_SUCCESS)
|
||||
hdr_dst_req, hdr_send_offset, size, nordma) == OMPI_SUCCESS)
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
MCA_PML_OB1_ADD_ACK_TO_PENDING(proc, hdr_src_req, hdr_dst_req,
|
||||
hdr_send_offset);
|
||||
hdr_send_offset, size);
|
||||
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -137,6 +137,7 @@ static void mca_pml_ob1_send_request_construct(mca_pml_ob1_send_request_t* req)
|
||||
req->req_send.req_base.req_ompi.req_cancel = mca_pml_ob1_send_request_cancel;
|
||||
req->req_rdma_cnt = 0;
|
||||
req->req_throttle_sends = false;
|
||||
req->rdma_frag = NULL;
|
||||
OBJ_CONSTRUCT(&req->req_send_ranges, opal_list_t);
|
||||
OBJ_CONSTRUCT(&req->req_send_range_lock, opal_mutex_t);
|
||||
}
|
||||
@ -145,6 +146,10 @@ static void mca_pml_ob1_send_request_destruct(mca_pml_ob1_send_request_t* req)
|
||||
{
|
||||
OBJ_DESTRUCT(&req->req_send_ranges);
|
||||
OBJ_DESTRUCT(&req->req_send_range_lock);
|
||||
if (req->rdma_frag) {
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN(req->rdma_frag);
|
||||
req->rdma_frag = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE( mca_pml_ob1_send_request_t,
|
||||
@ -236,10 +241,9 @@ mca_pml_ob1_rndv_completion( mca_btl_base_module_t* btl,
|
||||
* happens in one thread, the increase of the req_bytes_delivered does not
|
||||
* have to be atomic.
|
||||
*/
|
||||
req_bytes_delivered = mca_pml_ob1_compute_segment_length (btl->btl_seg_size,
|
||||
(void *) des->des_local,
|
||||
des->des_local_count,
|
||||
sizeof(mca_pml_ob1_rendezvous_hdr_t));
|
||||
req_bytes_delivered = mca_pml_ob1_compute_segment_length_base ((void *) des->des_segments,
|
||||
des->des_segment_count,
|
||||
sizeof(mca_pml_ob1_rendezvous_hdr_t));
|
||||
|
||||
mca_pml_ob1_rndv_completion_request( bml_btl, sendreq, req_bytes_delivered );
|
||||
}
|
||||
@ -250,27 +254,18 @@ mca_pml_ob1_rndv_completion( mca_btl_base_module_t* btl,
|
||||
*/
|
||||
|
||||
static void
|
||||
mca_pml_ob1_rget_completion( mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* ep,
|
||||
struct mca_btl_base_descriptor_t* des,
|
||||
int status )
|
||||
mca_pml_ob1_rget_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t rdma_length)
|
||||
{
|
||||
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)des->des_cbdata;
|
||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
|
||||
size_t req_bytes_delivered;
|
||||
mca_pml_ob1_send_request_t *sendreq = (mca_pml_ob1_send_request_t *) frag->rdma_req;
|
||||
mca_bml_base_btl_t *bml_btl = frag->rdma_bml;
|
||||
|
||||
/* count bytes of user data actually delivered and check for request completion */
|
||||
if (OPAL_LIKELY(OMPI_SUCCESS == status)) {
|
||||
req_bytes_delivered = mca_pml_ob1_compute_segment_length (btl->btl_seg_size,
|
||||
(void *) des->des_local,
|
||||
des->des_local_count, 0);
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
|
||||
if (OPAL_LIKELY(0 < rdma_length)) {
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, (size_t) rdma_length);
|
||||
}
|
||||
sendreq->src_des = NULL;
|
||||
|
||||
send_request_pml_complete_check(sendreq);
|
||||
/* free the descriptor */
|
||||
mca_bml_base_free(bml_btl, des);
|
||||
|
||||
MCA_PML_OB1_PROGRESS_PENDING(bml_btl);
|
||||
}
|
||||
|
||||
@ -314,10 +309,9 @@ mca_pml_ob1_frag_completion( mca_btl_base_module_t* btl,
|
||||
}
|
||||
|
||||
/* count bytes of user data actually delivered */
|
||||
req_bytes_delivered = mca_pml_ob1_compute_segment_length (btl->btl_seg_size,
|
||||
(void *) des->des_local,
|
||||
des->des_local_count,
|
||||
sizeof(mca_pml_ob1_frag_hdr_t));
|
||||
req_bytes_delivered = mca_pml_ob1_compute_segment_length_base ((void *) des->des_segments,
|
||||
des->des_segment_count,
|
||||
sizeof(mca_pml_ob1_frag_hdr_t));
|
||||
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth, -1);
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
|
||||
@ -389,7 +383,7 @@ int mca_pml_ob1_send_request_start_buffered(
|
||||
if( OPAL_UNLIKELY(NULL == des) ) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
segment = des->des_local;
|
||||
segment = des->des_segments;
|
||||
|
||||
/* pack the data into the BTL supplied buffer */
|
||||
iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)segment->seg_addr.pval +
|
||||
@ -408,17 +402,14 @@ int mca_pml_ob1_send_request_start_buffered(
|
||||
|
||||
/* build rendezvous header */
|
||||
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_flags = 0;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RNDV;
|
||||
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
||||
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
||||
mca_pml_ob1_rendezvous_hdr_prepare (&hdr->hdr_rndv, MCA_PML_OB1_HDR_TYPE_RNDV, 0,
|
||||
sendreq->req_send.req_base.req_comm->c_contextid,
|
||||
sendreq->req_send.req_base.req_comm->c_my_rank,
|
||||
sendreq->req_send.req_base.req_tag,
|
||||
(uint16_t)sendreq->req_send.req_base.req_sequence,
|
||||
sendreq->req_send.req_bytes_packed, sendreq);
|
||||
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RNDV,
|
||||
sendreq->req_send.req_base.req_proc);
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RNDV, sendreq->req_send.req_base.req_proc);
|
||||
|
||||
/* update lengths */
|
||||
segment->seg_len = sizeof(mca_pml_ob1_rendezvous_hdr_t) + max_data;
|
||||
@ -491,15 +482,13 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq,
|
||||
|
||||
if(NULL != bml_btl->btl->btl_sendi) {
|
||||
mca_pml_ob1_match_hdr_t match;
|
||||
match.hdr_common.hdr_flags = 0;
|
||||
match.hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH;
|
||||
match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||
match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||
match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||
match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
||||
mca_pml_ob1_match_hdr_prepare (&match, MCA_PML_OB1_HDR_TYPE_MATCH, 0,
|
||||
sendreq->req_send.req_base.req_comm->c_contextid,
|
||||
sendreq->req_send.req_base.req_comm->c_my_rank,
|
||||
sendreq->req_send.req_base.req_tag,
|
||||
(uint16_t)sendreq->req_send.req_base.req_sequence);
|
||||
|
||||
ob1_hdr_hton(&match, MCA_PML_OB1_HDR_TYPE_MATCH,
|
||||
sendreq->req_send.req_base.req_proc);
|
||||
ob1_hdr_hton (&match, MCA_PML_OB1_HDR_TYPE_MATCH, sendreq->req_send.req_base.req_proc);
|
||||
|
||||
/* try to send immediately */
|
||||
rc = mca_bml_base_sendi( bml_btl, &sendreq->req_send.req_base.req_convertor,
|
||||
@ -532,7 +521,7 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq,
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
segment = des->des_local;
|
||||
segment = des->des_segments;
|
||||
|
||||
if(size > 0) {
|
||||
/* pack the data into the supplied buffer */
|
||||
@ -566,15 +555,13 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq,
|
||||
|
||||
/* build match header */
|
||||
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_flags = 0;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH;
|
||||
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
||||
mca_pml_ob1_match_hdr_prepare (&hdr->hdr_match, MCA_PML_OB1_HDR_TYPE_MATCH, 0,
|
||||
sendreq->req_send.req_base.req_comm->c_contextid,
|
||||
sendreq->req_send.req_base.req_comm->c_my_rank,
|
||||
sendreq->req_send.req_base.req_tag,
|
||||
(uint16_t)sendreq->req_send.req_base.req_sequence);
|
||||
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_MATCH,
|
||||
sendreq->req_send.req_base.req_proc);
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_MATCH, sendreq->req_send.req_base.req_proc);
|
||||
|
||||
/* update lengths */
|
||||
segment->seg_len = OMPI_PML_OB1_MATCH_HDR_LEN + max_data;
|
||||
@ -618,7 +605,6 @@ int mca_pml_ob1_send_request_start_prepare( mca_pml_ob1_send_request_t* sendreq,
|
||||
|
||||
/* prepare descriptor */
|
||||
mca_bml_base_prepare_src( bml_btl,
|
||||
NULL,
|
||||
&sendreq->req_send.req_base.req_convertor,
|
||||
MCA_BTL_NO_ORDER,
|
||||
OMPI_PML_OB1_MATCH_HDR_LEN,
|
||||
@ -628,19 +614,17 @@ int mca_pml_ob1_send_request_start_prepare( mca_pml_ob1_send_request_t* sendreq,
|
||||
if( OPAL_UNLIKELY(NULL == des) ) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
segment = des->des_local;
|
||||
segment = des->des_segments;
|
||||
|
||||
/* build match header */
|
||||
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_flags = 0;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH;
|
||||
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
||||
mca_pml_ob1_match_hdr_prepare (&hdr->hdr_match, MCA_PML_OB1_HDR_TYPE_MATCH, 0,
|
||||
sendreq->req_send.req_base.req_comm->c_contextid,
|
||||
sendreq->req_send.req_base.req_comm->c_my_rank,
|
||||
sendreq->req_send.req_base.req_tag,
|
||||
(uint16_t)sendreq->req_send.req_base.req_sequence);
|
||||
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_MATCH,
|
||||
sendreq->req_send.req_base.req_proc);
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_MATCH, sendreq->req_send.req_base.req_proc);
|
||||
|
||||
/* short message */
|
||||
des->des_cbfunc = mca_pml_ob1_match_completion_free;
|
||||
@ -674,80 +658,68 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq,
|
||||
* one RDMA capable BTLs). This way round robin distribution of RDMA
|
||||
* operation is achieved.
|
||||
*/
|
||||
|
||||
mca_btl_base_descriptor_t *des, *src = NULL;
|
||||
mca_btl_base_registration_handle_t *local_handle;
|
||||
mca_btl_base_descriptor_t *des;
|
||||
mca_pml_ob1_rdma_frag_t *frag;
|
||||
mca_pml_ob1_rget_hdr_t *hdr;
|
||||
size_t seg_size;
|
||||
size_t reg_size;
|
||||
void *data_ptr;
|
||||
int rc;
|
||||
|
||||
sendreq->src_des = NULL;
|
||||
|
||||
bml_btl = sendreq->req_rdma[0].bml_btl;
|
||||
if (!(bml_btl->btl_flags & (MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_CUDA_GET))) {
|
||||
sendreq->rdma_frag = NULL;
|
||||
/* This BTL does not support get. Use rendezvous to start the RDMA operation using put instead. */
|
||||
return mca_pml_ob1_send_request_start_rndv (sendreq, bml_btl, 0, MCA_PML_OB1_HDR_FLAGS_CONTIG |
|
||||
MCA_PML_OB1_HDR_FLAGS_PIN);
|
||||
}
|
||||
|
||||
MEMCHECKER(
|
||||
memchecker_call(&opal_memchecker_base_mem_defined,
|
||||
sendreq->req_send.req_base.req_addr,
|
||||
sendreq->req_send.req_base.req_count,
|
||||
sendreq->req_send.req_base.req_datatype);
|
||||
);
|
||||
/* prepare source descriptor/segment(s) */
|
||||
/* PML owns this descriptor and will free it in */
|
||||
/* mca_pml_ob1_rget_completion */
|
||||
mca_bml_base_prepare_src( bml_btl, sendreq->req_rdma[0].btl_reg,
|
||||
&sendreq->req_send.req_base.req_convertor,
|
||||
MCA_BTL_NO_ORDER, 0, &size, MCA_BTL_DES_FLAGS_GET |
|
||||
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, &src );
|
||||
MEMCHECKER(
|
||||
memchecker_call(&opal_memchecker_base_mem_noaccess,
|
||||
sendreq->req_send.req_base.req_addr,
|
||||
sendreq->req_send.req_base.req_count,
|
||||
sendreq->req_send.req_base.req_datatype);
|
||||
);
|
||||
if( OPAL_UNLIKELY(NULL == src) ) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
/* at this time ob1 does not support non-contiguous gets. the convertor represents a
|
||||
* contiguous block of memory */
|
||||
opal_convertor_get_current_pointer (&sendreq->req_send.req_base.req_convertor, &data_ptr);
|
||||
|
||||
local_handle = sendreq->req_rdma[0].btl_reg;
|
||||
|
||||
/* allocate an rdma fragment to keep track of the request size for use in the fin message */
|
||||
MCA_PML_OB1_RDMA_FRAG_ALLOC(frag);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
src->des_cbfunc = mca_pml_ob1_rget_completion;
|
||||
src->des_cbdata = sendreq;
|
||||
|
||||
sendreq->src_des = src;
|
||||
/* fill in necessary fragment data */
|
||||
frag->rdma_req = sendreq;
|
||||
frag->rdma_bml = bml_btl;
|
||||
frag->rdma_length = size;
|
||||
frag->cbfunc = mca_pml_ob1_rget_completion;
|
||||
/* do not store the local handle in the fragment. it will be released by mca_pml_ob1_free_rdma_resources */
|
||||
|
||||
seg_size = bml_btl->btl->btl_seg_size * src->des_local_count;
|
||||
/* save the fragment for get->put fallback */
|
||||
sendreq->rdma_frag = frag;
|
||||
|
||||
reg_size = bml_btl->btl->btl_registration_handle_size;
|
||||
|
||||
/* allocate space for get hdr + segment list */
|
||||
mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, sizeof (*hdr) + seg_size,
|
||||
mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, sizeof (*hdr) + reg_size,
|
||||
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP |
|
||||
MCA_BTL_DES_FLAGS_SIGNAL);
|
||||
if( OPAL_UNLIKELY(NULL == des) ) {
|
||||
/* NTH: no need to reset the converter here. it will be reset before it is retried */
|
||||
mca_bml_base_free(bml_btl, src);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* build match header */
|
||||
hdr = (mca_pml_ob1_rget_hdr_t *) des->des_local->seg_addr.pval;
|
||||
|
||||
hdr->hdr_rndv.hdr_match.hdr_common.hdr_flags = MCA_PML_OB1_HDR_FLAGS_CONTIG|MCA_PML_OB1_HDR_FLAGS_PIN;
|
||||
hdr->hdr_rndv.hdr_match.hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RGET;
|
||||
hdr->hdr_rndv.hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||
hdr->hdr_rndv.hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||
hdr->hdr_rndv.hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||
hdr->hdr_rndv.hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
||||
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
||||
hdr->hdr_des.pval = src;
|
||||
hdr->hdr_seg_cnt = src->des_local_count;
|
||||
hdr = (mca_pml_ob1_rget_hdr_t *) des->des_segments->seg_addr.pval;
|
||||
/* TODO -- Add support for multiple segments for get */
|
||||
mca_pml_ob1_rget_hdr_prepare (hdr, MCA_PML_OB1_HDR_FLAGS_CONTIG | MCA_PML_OB1_HDR_FLAGS_PIN,
|
||||
sendreq->req_send.req_base.req_comm->c_contextid,
|
||||
sendreq->req_send.req_base.req_comm->c_my_rank,
|
||||
sendreq->req_send.req_base.req_tag,
|
||||
(uint16_t)sendreq->req_send.req_base.req_sequence,
|
||||
sendreq->req_send.req_bytes_packed, sendreq,
|
||||
frag, data_ptr, local_handle, reg_size);
|
||||
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RGET, sendreq->req_send.req_base.req_proc);
|
||||
|
||||
/* copy segment data */
|
||||
memcpy (hdr + 1, src->des_local, seg_size);
|
||||
|
||||
des->des_cbfunc = mca_pml_ob1_send_ctl_completion;
|
||||
des->des_cbdata = sendreq;
|
||||
|
||||
@ -765,12 +737,6 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq,
|
||||
rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_RGET);
|
||||
if (OPAL_UNLIKELY(rc < 0)) {
|
||||
mca_bml_base_free(bml_btl, des);
|
||||
|
||||
if (sendreq->src_des) {
|
||||
mca_bml_base_free (bml_btl, sendreq->src_des);
|
||||
sendreq->src_des = NULL;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -808,7 +774,6 @@ int mca_pml_ob1_send_request_start_rndv( mca_pml_ob1_send_request_t* sendreq,
|
||||
sendreq->req_send.req_base.req_datatype);
|
||||
);
|
||||
mca_bml_base_prepare_src( bml_btl,
|
||||
NULL,
|
||||
&sendreq->req_send.req_base.req_convertor,
|
||||
MCA_BTL_NO_ORDER,
|
||||
sizeof(mca_pml_ob1_rendezvous_hdr_t),
|
||||
@ -827,21 +792,19 @@ int mca_pml_ob1_send_request_start_rndv( mca_pml_ob1_send_request_t* sendreq,
|
||||
if( OPAL_UNLIKELY(NULL == des) ) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
segment = des->des_local;
|
||||
segment = des->des_segments;
|
||||
|
||||
/* build hdr */
|
||||
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_flags = flags | MCA_PML_OB1_HDR_FLAGS_SIGNAL;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RNDV;
|
||||
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
||||
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
||||
mca_pml_ob1_rendezvous_hdr_prepare (&hdr->hdr_rndv, MCA_PML_OB1_HDR_TYPE_RNDV, flags |
|
||||
MCA_PML_OB1_HDR_FLAGS_SIGNAL,
|
||||
sendreq->req_send.req_base.req_comm->c_contextid,
|
||||
sendreq->req_send.req_base.req_comm->c_my_rank,
|
||||
sendreq->req_send.req_base.req_tag,
|
||||
(uint16_t)sendreq->req_send.req_base.req_sequence,
|
||||
sendreq->req_send.req_bytes_packed, sendreq);
|
||||
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RNDV,
|
||||
sendreq->req_send.req_base.req_proc);
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RNDV, sendreq->req_send.req_base.req_proc);
|
||||
|
||||
/* first fragment of a long message */
|
||||
des->des_cbdata = sendreq;
|
||||
@ -1022,13 +985,10 @@ cannot_pack:
|
||||
sendreq->req_send.req_base.req_count,
|
||||
sendreq->req_send.req_base.req_datatype);
|
||||
);
|
||||
mca_bml_base_prepare_src(bml_btl, NULL,
|
||||
&sendreq->req_send.req_base.req_convertor,
|
||||
MCA_BTL_NO_ORDER,
|
||||
sizeof(mca_pml_ob1_frag_hdr_t),
|
||||
mca_bml_base_prepare_src(bml_btl, &sendreq->req_send.req_base.req_convertor,
|
||||
MCA_BTL_NO_ORDER, sizeof(mca_pml_ob1_frag_hdr_t),
|
||||
&size, MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK |
|
||||
MCA_BTL_DES_FLAGS_SIGNAL,
|
||||
&des);
|
||||
MCA_BTL_DES_FLAGS_SIGNAL, &des);
|
||||
MEMCHECKER(
|
||||
memchecker_call(&opal_memchecker_base_mem_noaccess,
|
||||
sendreq->req_send.req_base.req_addr,
|
||||
@ -1051,12 +1011,9 @@ cannot_pack:
|
||||
des->des_cbdata = sendreq;
|
||||
|
||||
/* setup header */
|
||||
hdr = (mca_pml_ob1_frag_hdr_t*)des->des_local->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_flags = 0;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_FRAG;
|
||||
hdr->hdr_frag_offset = range->range_send_offset;
|
||||
hdr->hdr_src_req.pval = sendreq;
|
||||
hdr->hdr_dst_req = sendreq->req_recv;
|
||||
hdr = (mca_pml_ob1_frag_hdr_t*)des->des_segments->seg_addr.pval;
|
||||
mca_pml_ob1_frag_hdr_prepare (hdr, 0, range->range_send_offset, sendreq,
|
||||
sendreq->req_recv.lval);
|
||||
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_FRAG,
|
||||
sendreq->req_send.req_base.req_proc);
|
||||
@ -1113,38 +1070,66 @@ cannot_pack:
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A put fragment could not be started. Queue the fragment to be retried later or
|
||||
* fall back on send/recv.
|
||||
*/
|
||||
static void mca_pml_ob1_send_request_put_frag_failed (mca_pml_ob1_rdma_frag_t *frag, int rc)
|
||||
{
|
||||
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t *) frag->rdma_req;
|
||||
mca_bml_base_btl_t *bml_btl = frag->rdma_bml;
|
||||
|
||||
if (++frag->retries < mca_pml_ob1.rdma_retries_limit && OMPI_ERR_OUT_OF_RESOURCE == rc) {
|
||||
/* queue the frag for later if there was a resource error */
|
||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||
opal_list_append(&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
||||
} else {
|
||||
/* tell receiver to deregister memory */
|
||||
mca_pml_ob1_send_fin (sendreq->req_send.req_base.req_proc, bml_btl,
|
||||
frag->rdma_hdr.hdr_rdma.hdr_frag, 0, MCA_BTL_NO_ORDER,
|
||||
OPAL_ERR_TEMP_OUT_OF_RESOURCE);
|
||||
|
||||
/* send fragment by copy in/out */
|
||||
mca_pml_ob1_send_request_copy_in_out(sendreq, frag->rdma_hdr.hdr_rdma.hdr_rdma_offset,
|
||||
frag->rdma_length);
|
||||
/* if a pointer to a receive request is not set it means that
|
||||
* ACK was not yet received. Don't schedule sends before ACK */
|
||||
if (NULL != sendreq->req_recv.pval)
|
||||
mca_pml_ob1_send_request_schedule (sendreq);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An RDMA put operation has completed:
|
||||
* (1) Update request status and if required set completed
|
||||
* (2) Send FIN control message to the destination
|
||||
* (2) Send FIN control message to the destination
|
||||
*/
|
||||
|
||||
static void mca_pml_ob1_put_completion( mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* ep,
|
||||
struct mca_btl_base_descriptor_t* des,
|
||||
int status )
|
||||
static void mca_pml_ob1_put_completion (mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* ep,
|
||||
void *local_address, mca_btl_base_registration_handle_t *local_handle,
|
||||
void *context, void *cbdata, int status)
|
||||
{
|
||||
mca_pml_ob1_rdma_frag_t* frag = (mca_pml_ob1_rdma_frag_t*)des->des_cbdata;
|
||||
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)frag->rdma_req;
|
||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
|
||||
mca_pml_ob1_rdma_frag_t *frag = (mca_pml_ob1_rdma_frag_t *) cbdata;
|
||||
mca_pml_ob1_send_request_t *sendreq = (mca_pml_ob1_send_request_t *) frag->rdma_req;
|
||||
mca_bml_base_btl_t *bml_btl = (mca_bml_base_btl_t *) context;
|
||||
|
||||
/* check completion status */
|
||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
||||
/* TSW - FIX */
|
||||
OMPI_ERROR_LOG(status);
|
||||
ompi_rte_abort(-1, NULL);
|
||||
if( OPAL_UNLIKELY(OMPI_SUCCESS == status) ) {
|
||||
/* TODO -- readd ordering */
|
||||
mca_pml_ob1_send_fin (sendreq->req_send.req_base.req_proc, bml_btl,
|
||||
frag->rdma_hdr.hdr_rdma.hdr_frag, frag->rdma_length,
|
||||
0, 0);
|
||||
|
||||
/* check for request completion */
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length);
|
||||
|
||||
send_request_pml_complete_check(sendreq);
|
||||
} else {
|
||||
/* try to fall back on send/recv */
|
||||
mca_pml_ob1_send_request_put_frag_failed (frag, status);
|
||||
}
|
||||
|
||||
mca_pml_ob1_send_fin(sendreq->req_send.req_base.req_proc,
|
||||
bml_btl,
|
||||
frag->rdma_hdr.hdr_rdma.hdr_des,
|
||||
des->order, 0);
|
||||
|
||||
/* check for request completion */
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length);
|
||||
|
||||
send_request_pml_complete_check(sendreq);
|
||||
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
|
||||
|
||||
MCA_PML_OB1_PROGRESS_PENDING(bml_btl);
|
||||
@ -1152,81 +1137,45 @@ static void mca_pml_ob1_put_completion( mca_btl_base_module_t* btl,
|
||||
|
||||
int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t *frag )
|
||||
{
|
||||
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)frag->rdma_req;
|
||||
mca_mpool_base_registration_t *reg = NULL;
|
||||
mca_pml_ob1_send_request_t *sendreq = (mca_pml_ob1_send_request_t *) frag->rdma_req;
|
||||
mca_btl_base_registration_handle_t *local_handle = NULL;
|
||||
mca_bml_base_btl_t *bml_btl = frag->rdma_bml;
|
||||
mca_btl_base_descriptor_t *des;
|
||||
size_t save_size = frag->rdma_length;
|
||||
int rc;
|
||||
|
||||
if (OPAL_LIKELY(NULL == sendreq->src_des)) {
|
||||
/* setup descriptor */
|
||||
mca_bml_base_prepare_src( bml_btl,
|
||||
reg,
|
||||
&frag->convertor,
|
||||
MCA_BTL_NO_ORDER,
|
||||
0,
|
||||
&frag->rdma_length,
|
||||
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP |
|
||||
MCA_BTL_DES_FLAGS_PUT,
|
||||
&des );
|
||||
|
||||
if( OPAL_UNLIKELY(NULL == des) ) {
|
||||
if(frag->retries < mca_pml_ob1.rdma_retries_limit) {
|
||||
size_t offset = (size_t)frag->rdma_hdr.hdr_rdma.hdr_rdma_offset;
|
||||
frag->rdma_length = save_size;
|
||||
opal_convertor_set_position(&frag->convertor, &offset);
|
||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||
opal_list_append(&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
||||
} else {
|
||||
mca_pml_ob1_send_request_t *sendreq =
|
||||
(mca_pml_ob1_send_request_t*)frag->rdma_req;
|
||||
if (bml_btl->btl->btl_register_mem && NULL == frag->local_handle) {
|
||||
/* Check if the segment is already registered */
|
||||
for (size_t i = 0 ; i < sendreq->req_rdma_cnt ; ++i) {
|
||||
if (sendreq->req_rdma[i].bml_btl == frag->rdma_bml) {
|
||||
/* do not copy the handle to the fragment to avoid deregistring it twice */
|
||||
local_handle = sendreq->req_rdma[i].btl_reg;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* tell receiver to unregister memory */
|
||||
mca_pml_ob1_send_fin(sendreq->req_send.req_base.req_proc,
|
||||
bml_btl, frag->rdma_hdr.hdr_rdma.hdr_des,
|
||||
MCA_BTL_NO_ORDER, 1);
|
||||
if (NULL == frag->local_handle) {
|
||||
/* Not already registered. Register the region with the BTL. */
|
||||
mca_bml_base_register_mem (bml_btl, frag->local_address, frag->rdma_length, 0,
|
||||
&frag->local_handle);
|
||||
|
||||
/* send fragment by copy in/out */
|
||||
mca_pml_ob1_send_request_copy_in_out(sendreq,
|
||||
frag->rdma_hdr.hdr_rdma.hdr_rdma_offset, frag->rdma_length);
|
||||
/* if a pointer to a receive request is not set it means that
|
||||
* ACK was not yet received. Don't schedule sends before ACK */
|
||||
if(NULL != sendreq->req_recv.pval)
|
||||
mca_pml_ob1_send_request_schedule(sendreq);
|
||||
if (OPAL_UNLIKELY(NULL == frag->local_handle)) {
|
||||
mca_pml_ob1_send_request_put_frag_failed (frag, OMPI_ERR_OUT_OF_RESOURCE);
|
||||
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
local_handle = frag->local_handle;
|
||||
}
|
||||
} else {
|
||||
/* already have a source descriptor */
|
||||
des = sendreq->src_des;
|
||||
sendreq->src_des = NULL;
|
||||
}
|
||||
|
||||
des->des_remote = (mca_btl_base_segment_t *) frag->rdma_segs;
|
||||
des->des_remote_count = frag->rdma_hdr.hdr_rdma.hdr_seg_cnt;
|
||||
des->des_cbfunc = mca_pml_ob1_put_completion;
|
||||
des->des_cbdata = frag;
|
||||
|
||||
PERUSE_TRACE_COMM_OMPI_EVENT( PERUSE_COMM_REQ_XFER_CONTINUE,
|
||||
&(((mca_pml_ob1_send_request_t*)frag->rdma_req)->req_send.req_base), save_size, PERUSE_SEND );
|
||||
|
||||
rc = mca_bml_base_put(bml_btl, des);
|
||||
rc = mca_bml_base_put (bml_btl, frag->local_address, frag->remote_address, local_handle,
|
||||
(mca_btl_base_registration_handle_t *) frag->remote_handle, frag->rdma_length,
|
||||
0, MCA_BTL_NO_ORDER, mca_pml_ob1_put_completion, frag);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
mca_bml_base_free(bml_btl, des);
|
||||
frag->rdma_length = save_size;
|
||||
if(OMPI_ERR_OUT_OF_RESOURCE == rc) {
|
||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||
opal_list_append(&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
} else {
|
||||
/* TSW - FIX */
|
||||
OMPI_ERROR_LOG(rc);
|
||||
ompi_rte_abort(-1, NULL);
|
||||
}
|
||||
mca_pml_ob1_send_request_put_frag_failed (frag, rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
@ -1240,12 +1189,11 @@ int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t *frag )
|
||||
*/
|
||||
|
||||
void mca_pml_ob1_send_request_put( mca_pml_ob1_send_request_t* sendreq,
|
||||
mca_btl_base_module_t* btl,
|
||||
mca_btl_base_module_t* btl,
|
||||
mca_pml_ob1_rdma_hdr_t* hdr )
|
||||
{
|
||||
mca_bml_base_endpoint_t *bml_endpoint = sendreq->req_endpoint;
|
||||
mca_pml_ob1_rdma_frag_t* frag;
|
||||
size_t i, size = 0;
|
||||
|
||||
if(hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_TYPE_ACK) {
|
||||
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
|
||||
@ -1253,61 +1201,36 @@ void mca_pml_ob1_send_request_put( mca_pml_ob1_send_request_t* sendreq,
|
||||
|
||||
sendreq->req_recv.pval = hdr->hdr_recv_req.pval;
|
||||
|
||||
MCA_PML_OB1_RDMA_FRAG_ALLOC(frag);
|
||||
if (NULL == sendreq->rdma_frag) {
|
||||
MCA_PML_OB1_RDMA_FRAG_ALLOC(frag);
|
||||
|
||||
if( OPAL_UNLIKELY(NULL == frag) ) {
|
||||
/* TSW - FIX */
|
||||
OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
|
||||
ompi_rte_abort(-1, NULL);
|
||||
}
|
||||
|
||||
assert (btl->btl_seg_size * hdr->hdr_seg_cnt <= sizeof (frag->rdma_segs));
|
||||
|
||||
/* setup fragment */
|
||||
memcpy (frag->rdma_segs, hdr + 1, btl->btl_seg_size * hdr->hdr_seg_cnt);
|
||||
|
||||
for( i = 0; i < hdr->hdr_seg_cnt; i++ ) {
|
||||
mca_btl_base_segment_t *seg = (mca_btl_base_segment_t *) ((uintptr_t)(frag->rdma_segs) + i * btl->btl_seg_size);
|
||||
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
if ((sendreq->req_send.req_base.req_proc->super.proc_arch & OPAL_ARCH_ISBIGENDIAN) !=
|
||||
(ompi_proc_local()->super.proc_arch & OPAL_ARCH_ISBIGENDIAN)) {
|
||||
size += opal_swap_bytes4(seg->seg_len);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
size += seg->seg_len;
|
||||
if( OPAL_UNLIKELY(NULL == frag) ) {
|
||||
/* TSW - FIX */
|
||||
OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
|
||||
ompi_rte_abort(-1, NULL);
|
||||
}
|
||||
} else {
|
||||
/* rget fallback on put */
|
||||
frag = sendreq->rdma_frag;
|
||||
sendreq->rdma_frag = NULL;
|
||||
sendreq->req_state = 0;
|
||||
}
|
||||
|
||||
/* copy registration data */
|
||||
memcpy (frag->remote_handle, hdr + 1, btl->btl_registration_handle_size);
|
||||
|
||||
frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
|
||||
frag->rdma_hdr.hdr_rdma = *hdr;
|
||||
frag->rdma_req = sendreq;
|
||||
frag->rdma_ep = bml_endpoint;
|
||||
frag->rdma_length = size;
|
||||
frag->rdma_length = hdr->hdr_dst_size;
|
||||
frag->rdma_state = MCA_PML_OB1_RDMA_PUT;
|
||||
frag->reg = NULL;
|
||||
frag->remote_address = hdr->hdr_dst_ptr;
|
||||
frag->retries = 0;
|
||||
|
||||
if (OPAL_UNLIKELY(NULL != sendreq->src_des)) {
|
||||
/* get fallback path */
|
||||
sendreq->req_state = 0;
|
||||
}
|
||||
|
||||
/* lookup the corresponding registration */
|
||||
for(i=0; i<sendreq->req_rdma_cnt; i++) {
|
||||
if(sendreq->req_rdma[i].bml_btl == frag->rdma_bml) {
|
||||
frag->reg = sendreq->req_rdma[i].btl_reg;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* RDMA writes may proceed in parallel to send and to each other, so
|
||||
* create clone of the convertor for each RDMA fragment
|
||||
*/
|
||||
size = hdr->hdr_rdma_offset;
|
||||
opal_convertor_clone_with_position(&sendreq->req_send.req_base.req_convertor,
|
||||
&frag->convertor, 0, &size);
|
||||
/* Get the address of the current offset. Note: at this time ob1 CAN NOT handle
|
||||
* non-contiguous RDMA. If that changes this code will be wrong. */
|
||||
opal_convertor_get_offset_pointer (&sendreq->req_send.req_base.req_convertor,
|
||||
hdr->hdr_rdma_offset, &frag->local_address);
|
||||
|
||||
mca_pml_ob1_send_request_put_frag(frag);
|
||||
}
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2012 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -54,7 +54,7 @@ struct mca_pml_ob1_send_request_t {
|
||||
mca_pml_ob1_send_pending_t req_pending;
|
||||
opal_mutex_t req_send_range_lock;
|
||||
opal_list_t req_send_ranges;
|
||||
mca_btl_base_descriptor_t *src_des;
|
||||
mca_pml_ob1_rdma_frag_t *rdma_frag;
|
||||
mca_pml_ob1_com_btl_t req_rdma[1];
|
||||
};
|
||||
typedef struct mca_pml_ob1_send_request_t mca_pml_ob1_send_request_t;
|
||||
@ -124,10 +124,9 @@ get_request_from_send_pending(mca_pml_ob1_send_pending_t *type)
|
||||
ompi_free_list_item_t* item; \
|
||||
\
|
||||
if( OPAL_LIKELY(NULL != proc) ) { \
|
||||
OMPI_FREE_LIST_WAIT_MT(&mca_pml_base_send_requests, item); \
|
||||
OMPI_FREE_LIST_WAIT_MT(&mca_pml_base_send_requests, item); \
|
||||
sendreq = (mca_pml_ob1_send_request_t*)item; \
|
||||
sendreq->req_send.req_base.req_proc = proc; \
|
||||
sendreq->src_des = NULL; \
|
||||
} \
|
||||
}
|
||||
|
||||
@ -163,15 +162,18 @@ get_request_from_send_pending(mca_pml_ob1_send_pending_t *type)
|
||||
assert( 0 == _position ); \
|
||||
}
|
||||
|
||||
static inline void mca_pml_ob1_free_rdma_resources(mca_pml_ob1_send_request_t* sendreq)
|
||||
static inline void mca_pml_ob1_free_rdma_resources (mca_pml_ob1_send_request_t* sendreq)
|
||||
{
|
||||
size_t r;
|
||||
|
||||
/* return mpool resources */
|
||||
for(r = 0; r < sendreq->req_rdma_cnt; r++) {
|
||||
mca_mpool_base_registration_t* reg = sendreq->req_rdma[r].btl_reg;
|
||||
if( NULL != reg && reg->mpool != NULL ) {
|
||||
reg->mpool->mpool_deregister(reg->mpool, reg);
|
||||
struct mca_btl_base_registration_handle_t *handle = sendreq->req_rdma[r].btl_reg;
|
||||
mca_bml_base_btl_t *bml_btl = sendreq->req_rdma[r].bml_btl;
|
||||
|
||||
if (NULL != handle) {
|
||||
mca_bml_base_deregister_mem (bml_btl, handle);
|
||||
sendreq->req_rdma[r].btl_reg = NULL;
|
||||
}
|
||||
}
|
||||
sendreq->req_rdma_cnt = 0;
|
||||
@ -218,10 +220,14 @@ do {
|
||||
|
||||
#define MCA_PML_OB1_SEND_REQUEST_RETURN(sendreq) \
|
||||
do { \
|
||||
/* Let the base handle the reference counts */ \
|
||||
MCA_PML_BASE_SEND_REQUEST_FINI((&(sendreq)->req_send)); \
|
||||
OMPI_FREE_LIST_RETURN_MT( &mca_pml_base_send_requests, \
|
||||
(ompi_free_list_item_t*)sendreq); \
|
||||
/* Let the base handle the reference counts */ \
|
||||
MCA_PML_BASE_SEND_REQUEST_FINI((&(sendreq)->req_send)); \
|
||||
if (sendreq->rdma_frag) { \
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN (sendreq->rdma_frag); \
|
||||
sendreq->rdma_frag = NULL; \
|
||||
} \
|
||||
OMPI_FREE_LIST_RETURN_MT( &mca_pml_base_send_requests, \
|
||||
(ompi_free_list_item_t*)sendreq); \
|
||||
} while(0)
|
||||
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user