Update pml for btl changes
Этот коммит содержится в:
родитель
66bd698eaf
Коммит
b75bb8aea7
@ -14,7 +14,7 @@
|
||||
* Copyright (c) 2006-2008 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved
|
||||
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -500,17 +500,17 @@ static void mca_pml_ob1_dump_hdr(mca_pml_ob1_hdr_t* hdr)
|
||||
case MCA_PML_OB1_HDR_TYPE_RGET:
|
||||
type = "RGET";
|
||||
snprintf( header, 128, "ctx %5d src %d tag %d seq %d msg_length %" PRIu64
|
||||
"seg_cnt %d hdr_des %" PRIu64,
|
||||
"seg_cnt %d frag %" PRIu64 " src_ptr %" PRIu64,
|
||||
hdr->hdr_rndv.hdr_match.hdr_ctx, hdr->hdr_rndv.hdr_match.hdr_src,
|
||||
hdr->hdr_rndv.hdr_match.hdr_tag, hdr->hdr_rndv.hdr_match.hdr_seq,
|
||||
hdr->hdr_rndv.hdr_msg_length,
|
||||
hdr->hdr_rget.hdr_seg_cnt, hdr->hdr_rget.hdr_des.lval);
|
||||
hdr->hdr_rndv.hdr_msg_length, hdr->hdr_rget.hdr_frag.lval,
|
||||
hdr->hdr_rget.hdr_src_ptr);
|
||||
break;
|
||||
case MCA_PML_OB1_HDR_TYPE_ACK:
|
||||
type = "ACK";
|
||||
snprintf( header, 128, "src_req %p dst_req %p offset %" PRIu64,
|
||||
snprintf( header, 128, "src_req %p dst_req %p offset %" PRIu64 " size %" PRIu64,
|
||||
hdr->hdr_ack.hdr_src_req.pval, hdr->hdr_ack.hdr_dst_req.pval,
|
||||
hdr->hdr_ack.hdr_send_offset);
|
||||
hdr->hdr_ack.hdr_send_offset, hdr->hdr_ack.hdr_send_size);
|
||||
break;
|
||||
case MCA_PML_OB1_HDR_TYPE_FRAG:
|
||||
type = "FRAG";
|
||||
@ -520,10 +520,11 @@ static void mca_pml_ob1_dump_hdr(mca_pml_ob1_hdr_t* hdr)
|
||||
break;
|
||||
case MCA_PML_OB1_HDR_TYPE_PUT:
|
||||
type = "PUT";
|
||||
snprintf( header, 128, "seg_cnt %d dst_req %p src_des %p recv_req %p offset %" PRIu64 " [%p %" PRIu64 "]",
|
||||
hdr->hdr_rdma.hdr_seg_cnt, hdr->hdr_rdma.hdr_req.pval, hdr->hdr_rdma.hdr_des.pval,
|
||||
snprintf( header, 128, "seg_cnt %d dst_req %p src_frag %p recv_req %p offset %" PRIu64
|
||||
" dst_ptr %" PRIu64 " dst_size %" PRIu64,
|
||||
hdr->hdr_rdma.hdr_req.pval, hdr->hdr_rdma.hdr_frag.pval,
|
||||
hdr->hdr_rdma.hdr_recv_req.pval, hdr->hdr_rdma.hdr_rdma_offset,
|
||||
hdr->hdr_rdma.hdr_segs[0].seg_addr.pval, hdr->hdr_rdma.hdr_segs[0].seg_len);
|
||||
hdr->hdr_rdma.hdr_dst_ptr, hdr->hdr_rdma.hdr_dst_size);
|
||||
break;
|
||||
case MCA_PML_OB1_HDR_TYPE_FIN:
|
||||
type = "FIN";
|
||||
@ -638,7 +639,8 @@ static void mca_pml_ob1_fin_completion( mca_btl_base_module_t* btl,
|
||||
*/
|
||||
int mca_pml_ob1_send_fin( ompi_proc_t* proc,
|
||||
mca_bml_base_btl_t* bml_btl,
|
||||
opal_ptr_t hdr_des,
|
||||
opal_ptr_t hdr_frag,
|
||||
uint64_t rdma_size,
|
||||
uint8_t order,
|
||||
uint32_t status )
|
||||
{
|
||||
@ -650,18 +652,15 @@ int mca_pml_ob1_send_fin( ompi_proc_t* proc,
|
||||
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
|
||||
|
||||
if(NULL == fin) {
|
||||
MCA_PML_OB1_ADD_FIN_TO_PENDING(proc, hdr_des, bml_btl, order, status);
|
||||
MCA_PML_OB1_ADD_FIN_TO_PENDING(proc, hdr_frag, rdma_size, bml_btl, order, status);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
fin->des_cbfunc = mca_pml_ob1_fin_completion;
|
||||
fin->des_cbdata = NULL;
|
||||
|
||||
/* fill in header */
|
||||
hdr = (mca_pml_ob1_fin_hdr_t*)fin->des_local->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_flags = 0;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_FIN;
|
||||
hdr->hdr_des = hdr_des;
|
||||
hdr->hdr_fail = status;
|
||||
mca_pml_ob1_fin_hdr_prepare ((mca_pml_ob1_fin_hdr_t *) fin->des_local->seg_addr.pval,
|
||||
0, hdr_frag.lval, status ? status : rdma_size);
|
||||
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_FIN, proc);
|
||||
|
||||
@ -676,7 +675,7 @@ int mca_pml_ob1_send_fin( ompi_proc_t* proc,
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
mca_bml_base_free(bml_btl, fin);
|
||||
MCA_PML_OB1_ADD_FIN_TO_PENDING(proc, hdr_des, bml_btl, order, status);
|
||||
MCA_PML_OB1_ADD_FIN_TO_PENDING(proc, hdr_frag, rdma_size, bml_btl, order, status);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
@ -717,6 +716,7 @@ void mca_pml_ob1_process_pending_packets(mca_bml_base_btl_t* bml_btl)
|
||||
pckt->hdr.hdr_ack.hdr_src_req.lval,
|
||||
pckt->hdr.hdr_ack.hdr_dst_req.pval,
|
||||
pckt->hdr.hdr_ack.hdr_send_offset,
|
||||
pckt->hdr.hdr_ack.hdr_send_size,
|
||||
pckt->hdr.hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NORDMA);
|
||||
if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) {
|
||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||
@ -728,9 +728,10 @@ void mca_pml_ob1_process_pending_packets(mca_bml_base_btl_t* bml_btl)
|
||||
break;
|
||||
case MCA_PML_OB1_HDR_TYPE_FIN:
|
||||
rc = mca_pml_ob1_send_fin(pckt->proc, send_dst,
|
||||
pckt->hdr.hdr_fin.hdr_des,
|
||||
pckt->hdr.hdr_fin.hdr_frag,
|
||||
pckt->hdr.hdr_fin.hdr_size,
|
||||
pckt->order,
|
||||
pckt->hdr.hdr_fin.hdr_fail);
|
||||
pckt->status);
|
||||
if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) {
|
||||
return;
|
||||
}
|
||||
|
@ -216,6 +216,7 @@ struct mca_pml_ob1_pckt_pending_t {
|
||||
mca_pml_ob1_hdr_t hdr;
|
||||
struct mca_bml_base_btl_t *bml_btl;
|
||||
uint8_t order;
|
||||
int status;
|
||||
};
|
||||
typedef struct mca_pml_ob1_pckt_pending_t mca_pml_ob1_pckt_pending_t;
|
||||
OBJ_CLASS_DECLARATION(mca_pml_ob1_pckt_pending_t);
|
||||
@ -234,17 +235,17 @@ do { \
|
||||
(ompi_free_list_item_t*)pckt); \
|
||||
} while(0)
|
||||
|
||||
#define MCA_PML_OB1_ADD_FIN_TO_PENDING(P, D, B, O, S) \
|
||||
#define MCA_PML_OB1_ADD_FIN_TO_PENDING(P, D, Sz, B, O, S) \
|
||||
do { \
|
||||
mca_pml_ob1_pckt_pending_t *_pckt; \
|
||||
\
|
||||
MCA_PML_OB1_PCKT_PENDING_ALLOC(_pckt); \
|
||||
_pckt->hdr.hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_FIN; \
|
||||
_pckt->hdr.hdr_fin.hdr_des = (D); \
|
||||
_pckt->hdr.hdr_fin.hdr_fail = (S); \
|
||||
mca_pml_ob1_fin_hdr_prepare (&_pckt->hdr.hdr_fin, 0, \
|
||||
(D).lval, (Sz)); \
|
||||
_pckt->proc = (P); \
|
||||
_pckt->bml_btl = (B); \
|
||||
_pckt->order = (O); \
|
||||
_pckt->status = (S); \
|
||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock); \
|
||||
opal_list_append(&mca_pml_ob1.pckt_pending, \
|
||||
(opal_list_item_t*)_pckt); \
|
||||
@ -253,7 +254,7 @@ do { \
|
||||
|
||||
|
||||
int mca_pml_ob1_send_fin(ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl,
|
||||
opal_ptr_t hdr_des, uint8_t order, uint32_t status);
|
||||
opal_ptr_t hdr_frag, uint64_t size, uint8_t order, uint32_t status);
|
||||
|
||||
/* This function tries to resend FIN/ACK packets from pckt_pending queue.
|
||||
* Packets are added to the queue when sending of FIN or ACK is failed due to
|
||||
@ -338,7 +339,7 @@ mca_pml_ob1_compute_segment_length_remote (size_t seg_size, void *segments,
|
||||
/* represent BTL chosen for sending request */
|
||||
struct mca_pml_ob1_com_btl_t {
|
||||
mca_bml_base_btl_t *bml_btl;
|
||||
struct mca_mpool_base_registration_t* btl_reg;
|
||||
struct mca_btl_base_registration_handle_t *btl_reg;
|
||||
size_t length;
|
||||
};
|
||||
typedef struct mca_pml_ob1_com_btl_t mca_pml_ob1_com_btl_t;
|
||||
|
@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -63,6 +63,13 @@ struct mca_pml_ob1_common_hdr_t {
|
||||
};
|
||||
typedef struct mca_pml_ob1_common_hdr_t mca_pml_ob1_common_hdr_t;
|
||||
|
||||
static inline void mca_pml_ob1_common_hdr_prepare (mca_pml_ob1_common_hdr_t *hdr, uint8_t hdr_type,
|
||||
uint8_t hdr_flags)
|
||||
{
|
||||
hdr->hdr_type = hdr_type;
|
||||
hdr->hdr_flags = hdr_flags;
|
||||
}
|
||||
|
||||
#define MCA_PML_OB1_COMMON_HDR_NTOH(h)
|
||||
#define MCA_PML_OB1_COMMON_HDR_HTON(h)
|
||||
|
||||
@ -88,15 +95,19 @@ struct mca_pml_ob1_match_hdr_t {
|
||||
|
||||
typedef struct mca_pml_ob1_match_hdr_t mca_pml_ob1_match_hdr_t;
|
||||
|
||||
static inline void mca_pml_ob1_match_hdr_prepare (mca_pml_ob1_match_hdr_t *hdr, uint8_t hdr_type, uint8_t hdr_flags,
|
||||
uint16_t hdr_ctx, int32_t hdr_src, int32_t hdr_tag, uint16_t hdr_seq)
|
||||
{
|
||||
mca_pml_ob1_common_hdr_prepare (&hdr->hdr_common, hdr_type, hdr_flags);
|
||||
hdr->hdr_ctx = hdr_ctx;
|
||||
hdr->hdr_src = hdr_src;
|
||||
hdr->hdr_tag = hdr_tag;
|
||||
hdr->hdr_seq = hdr_seq;
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
|
||||
#define MCA_PML_OB1_MATCH_HDR_FILL(h) \
|
||||
do { \
|
||||
(h).hdr_padding[0] = 0; \
|
||||
(h).hdr_padding[1] = 0; \
|
||||
} while(0)
|
||||
#else
|
||||
#define MCA_PML_OB1_MATCH_HDR_FILL(h)
|
||||
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
|
||||
hdr->hdr_padding[0] = 0;
|
||||
hdr->hdr_padding[1] = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define MCA_PML_OB1_MATCH_HDR_NTOH(h) \
|
||||
do { \
|
||||
@ -110,7 +121,6 @@ do { \
|
||||
#define MCA_PML_OB1_MATCH_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \
|
||||
MCA_PML_OB1_MATCH_HDR_FILL(h); \
|
||||
(h).hdr_ctx = htons((h).hdr_ctx); \
|
||||
(h).hdr_src = htonl((h).hdr_src); \
|
||||
(h).hdr_tag = htonl((h).hdr_tag); \
|
||||
@ -129,12 +139,14 @@ struct mca_pml_ob1_rendezvous_hdr_t {
|
||||
};
|
||||
typedef struct mca_pml_ob1_rendezvous_hdr_t mca_pml_ob1_rendezvous_hdr_t;
|
||||
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
|
||||
#define MCA_PML_OB1_RNDV_HDR_FILL(h) \
|
||||
MCA_PML_OB1_MATCH_HDR_FILL((h).hdr_match)
|
||||
#else
|
||||
#define MCA_PML_OB1_RNDV_HDR_FILL(h)
|
||||
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
|
||||
static inline void mca_pml_ob1_rendezvous_hdr_prepare (mca_pml_ob1_rendezvous_hdr_t *hdr, uint8_t hdr_type, uint8_t hdr_flags,
|
||||
uint16_t hdr_ctx, int32_t hdr_src, int32_t hdr_tag, uint16_t hdr_seq,
|
||||
uint64_t hdr_msg_length, void *hdr_src_req)
|
||||
{
|
||||
mca_pml_ob1_match_hdr_prepare (&hdr->hdr_match, hdr_type, hdr_flags, hdr_ctx, hdr_src, hdr_tag, hdr_seq);
|
||||
hdr->hdr_msg_length = hdr_msg_length;
|
||||
hdr->hdr_src_req.pval = hdr_src_req;
|
||||
}
|
||||
|
||||
/* Note that hdr_src_req is not put in network byte order because it
|
||||
is never processed by the receiver, other than being copied into
|
||||
@ -148,7 +160,6 @@ typedef struct mca_pml_ob1_rendezvous_hdr_t mca_pml_ob1_rendezvous_hdr_t;
|
||||
#define MCA_PML_OB1_RNDV_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_MATCH_HDR_HTON((h).hdr_match); \
|
||||
MCA_PML_OB1_RNDV_HDR_FILL(h); \
|
||||
(h).hdr_msg_length = hton64((h).hdr_msg_length); \
|
||||
} while (0)
|
||||
|
||||
@ -157,38 +168,47 @@ typedef struct mca_pml_ob1_rendezvous_hdr_t mca_pml_ob1_rendezvous_hdr_t;
|
||||
*/
|
||||
struct mca_pml_ob1_rget_hdr_t {
|
||||
mca_pml_ob1_rendezvous_hdr_t hdr_rndv;
|
||||
uint32_t hdr_seg_cnt; /**< number of segments for rdma */
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
uint8_t hdr_padding[4];
|
||||
#endif
|
||||
opal_ptr_t hdr_des; /**< source descriptor */
|
||||
opal_ptr_t hdr_frag; /**< source fragment (for fin) */
|
||||
uint64_t hdr_src_ptr; /**< source pointer */
|
||||
/* btl registration handle data follows */
|
||||
};
|
||||
typedef struct mca_pml_ob1_rget_hdr_t mca_pml_ob1_rget_hdr_t;
|
||||
|
||||
static inline void mca_pml_ob1_rget_hdr_prepare (mca_pml_ob1_rget_hdr_t *hdr, uint8_t hdr_flags,
|
||||
uint16_t hdr_ctx, int32_t hdr_src, int32_t hdr_tag, uint16_t hdr_seq,
|
||||
uint64_t hdr_msg_length, void *hdr_src_req, void *hdr_frag,
|
||||
void *hdr_src_ptr, void *local_handle, size_t local_handle_size)
|
||||
{
|
||||
mca_pml_ob1_rendezvous_hdr_prepare (&hdr->hdr_rndv, MCA_PML_OB1_HDR_TYPE_RGET, hdr_flags,
|
||||
hdr_ctx, hdr_src, hdr_tag, hdr_seq, hdr_msg_length, hdr_src_req);
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
|
||||
#define MCA_PML_OB1_RGET_HDR_FILL(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_RNDV_HDR_FILL((h).hdr_rndv); \
|
||||
(h).hdr_padding[0] = 0; \
|
||||
(h).hdr_padding[1] = 0; \
|
||||
(h).hdr_padding[2] = 0; \
|
||||
(h).hdr_padding[3] = 0; \
|
||||
} while(0)
|
||||
#else
|
||||
#define MCA_PML_OB1_RGET_HDR_FILL(h)
|
||||
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
|
||||
hdr->hdr_padding[0] = 0;
|
||||
hdr->hdr_padding[1] = 0;
|
||||
hdr->hdr_padding[2] = 0;
|
||||
hdr->hdr_padding[3] = 0;
|
||||
#endif
|
||||
hdr->hdr_frag.pval = hdr_frag;
|
||||
hdr->hdr_src_ptr = (uint64_t)(intptr_t) hdr_src_ptr;
|
||||
|
||||
#define MCA_PML_OB1_RGET_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_RNDV_HDR_NTOH((h).hdr_rndv); \
|
||||
(h).hdr_seg_cnt = ntohl((h).hdr_seg_cnt); \
|
||||
/* copy registration handle */
|
||||
memcpy (hdr + 1, local_handle, local_handle_size);
|
||||
}
|
||||
|
||||
#define MCA_PML_OB1_RGET_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_RNDV_HDR_NTOH((h).hdr_rndv); \
|
||||
(h).hdr_seg_cnt = ntohl((h).hdr_seg_cnt); \
|
||||
(h).hdr_src_ptr = ntoh64((h).hdr_src_ptr); \
|
||||
} while (0)
|
||||
|
||||
#define MCA_PML_OB1_RGET_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_RNDV_HDR_HTON((h).hdr_rndv); \
|
||||
MCA_PML_OB1_RGET_HDR_FILL(h); \
|
||||
(h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \
|
||||
#define MCA_PML_OB1_RGET_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_RNDV_HDR_HTON((h).hdr_rndv); \
|
||||
(h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \
|
||||
(h).hdr_src_ptr = hton64((h).hdr_src_ptr); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
@ -205,19 +225,23 @@ struct mca_pml_ob1_frag_hdr_t {
|
||||
};
|
||||
typedef struct mca_pml_ob1_frag_hdr_t mca_pml_ob1_frag_hdr_t;
|
||||
|
||||
static inline void mca_pml_ob1_frag_hdr_prepare (mca_pml_ob1_frag_hdr_t *hdr, uint8_t hdr_flags,
|
||||
uint64_t hdr_frag_offset, void *hdr_src_req,
|
||||
uint64_t hdr_dst_req)
|
||||
{
|
||||
mca_pml_ob1_common_hdr_prepare (&hdr->hdr_common, MCA_PML_OB1_HDR_TYPE_FRAG, hdr_flags);
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
|
||||
#define MCA_PML_OB1_FRAG_HDR_FILL(h) \
|
||||
do { \
|
||||
(h).hdr_padding[0] = 0; \
|
||||
(h).hdr_padding[1] = 0; \
|
||||
(h).hdr_padding[2] = 0; \
|
||||
(h).hdr_padding[3] = 0; \
|
||||
(h).hdr_padding[4] = 0; \
|
||||
(h).hdr_padding[5] = 0; \
|
||||
} while(0)
|
||||
#else
|
||||
#define MCA_PML_OB1_FRAG_HDR_FILL(h)
|
||||
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
|
||||
hdr->hdr_padding[0] = 0;
|
||||
hdr->hdr_padding[1] = 0;
|
||||
hdr->hdr_padding[2] = 0;
|
||||
hdr->hdr_padding[3] = 0;
|
||||
hdr->hdr_padding[4] = 0;
|
||||
hdr->hdr_padding[5] = 0;
|
||||
#endif
|
||||
hdr->hdr_frag_offset = hdr_frag_offset;
|
||||
hdr->hdr_src_req.pval = hdr_src_req;
|
||||
hdr->hdr_dst_req.lval = hdr_dst_req;
|
||||
}
|
||||
|
||||
#define MCA_PML_OB1_FRAG_HDR_NTOH(h) \
|
||||
do { \
|
||||
@ -228,7 +252,6 @@ do { \
|
||||
#define MCA_PML_OB1_FRAG_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \
|
||||
MCA_PML_OB1_FRAG_HDR_FILL(h); \
|
||||
(h).hdr_frag_offset = hton64((h).hdr_frag_offset); \
|
||||
} while (0)
|
||||
|
||||
@ -244,38 +267,45 @@ struct mca_pml_ob1_ack_hdr_t {
|
||||
opal_ptr_t hdr_src_req; /**< source request */
|
||||
opal_ptr_t hdr_dst_req; /**< matched receive request */
|
||||
uint64_t hdr_send_offset; /**< starting point of copy in/out */
|
||||
uint64_t hdr_send_size; /**< number of bytes requested (0: all remaining) */
|
||||
};
|
||||
typedef struct mca_pml_ob1_ack_hdr_t mca_pml_ob1_ack_hdr_t;
|
||||
|
||||
static inline void mca_pml_ob1_ack_hdr_prepare (mca_pml_ob1_ack_hdr_t *hdr, uint8_t hdr_flags,
|
||||
uint64_t hdr_src_req, void *hdr_dst_req,
|
||||
uint64_t hdr_send_offset, uint64_t hdr_send_size)
|
||||
{
|
||||
mca_pml_ob1_common_hdr_prepare (&hdr->hdr_common, MCA_PML_OB1_HDR_TYPE_ACK, hdr_flags);
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
|
||||
#define MCA_PML_OB1_ACK_HDR_FILL(h) \
|
||||
do { \
|
||||
(h).hdr_padding[0] = 0; \
|
||||
(h).hdr_padding[1] = 0; \
|
||||
(h).hdr_padding[2] = 0; \
|
||||
(h).hdr_padding[3] = 0; \
|
||||
(h).hdr_padding[4] = 0; \
|
||||
(h).hdr_padding[5] = 0; \
|
||||
} while (0)
|
||||
#else
|
||||
#define MCA_PML_OB1_ACK_HDR_FILL(h)
|
||||
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
|
||||
hdr->hdr_padding[0] = 0;
|
||||
hdr->hdr_padding[1] = 0;
|
||||
hdr->hdr_padding[2] = 0;
|
||||
hdr->hdr_padding[3] = 0;
|
||||
hdr->hdr_padding[4] = 0;
|
||||
hdr->hdr_padding[5] = 0;
|
||||
#endif
|
||||
hdr->hdr_src_req.lval = hdr_src_req;
|
||||
hdr->hdr_dst_req.pval = hdr_dst_req;
|
||||
hdr->hdr_send_offset = hdr_send_offset;
|
||||
hdr->hdr_send_size = hdr_send_size;
|
||||
}
|
||||
|
||||
/* Note that the request headers are not put in NBO because the
|
||||
src_req is already in receiver's byte order and the dst_req is not
|
||||
used by the receiver for anything other than backpointers in return
|
||||
headers */
|
||||
#define MCA_PML_OB1_ACK_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_NTOH((h).hdr_common); \
|
||||
#define MCA_PML_OB1_ACK_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_NTOH((h).hdr_common); \
|
||||
(h).hdr_send_offset = ntoh64((h).hdr_send_offset); \
|
||||
(h).hdr_send_size = ntoh64((h).hdr_send_size); \
|
||||
} while (0)
|
||||
|
||||
#define MCA_PML_OB1_ACK_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \
|
||||
MCA_PML_OB1_ACK_HDR_FILL(h); \
|
||||
#define MCA_PML_OB1_ACK_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \
|
||||
(h).hdr_send_offset = hton64((h).hdr_send_offset); \
|
||||
(h).hdr_send_size = hton64((h).hdr_send_size); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
@ -287,38 +317,55 @@ struct mca_pml_ob1_rdma_hdr_t {
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
uint8_t hdr_padding[2]; /** two to pad out the hdr to a 4 byte alignment. hdr_req will then be 8 byte aligned after 4 for hdr_seg_cnt */
|
||||
#endif
|
||||
uint32_t hdr_seg_cnt; /**< number of segments for rdma */
|
||||
/* TODO: add real support for multiple destination segments */
|
||||
opal_ptr_t hdr_req; /**< destination request */
|
||||
opal_ptr_t hdr_des; /**< source descriptor */
|
||||
opal_ptr_t hdr_frag; /**< receiver fragment */
|
||||
opal_ptr_t hdr_recv_req; /**< receive request (NTH: needed for put fallback on send) */
|
||||
uint64_t hdr_rdma_offset; /**< current offset into user buffer */
|
||||
mca_btl_base_segment_t hdr_segs[1]; /**< list of segments for rdma */
|
||||
uint64_t hdr_rdma_offset; /**< current offset into user buffer */
|
||||
uint64_t hdr_dst_ptr; /**< destination address */
|
||||
uint64_t hdr_dst_size; /**< destination size */
|
||||
/* registration data follows */
|
||||
};
|
||||
typedef struct mca_pml_ob1_rdma_hdr_t mca_pml_ob1_rdma_hdr_t;
|
||||
|
||||
static inline void mca_pml_ob1_rdma_hdr_prepare (mca_pml_ob1_rdma_hdr_t *hdr, uint8_t hdr_flags,
|
||||
uint64_t hdr_req, void *hdr_frag, void *hdr_recv_req,
|
||||
uint64_t hdr_rdma_offset, void *hdr_dst_ptr,
|
||||
uint64_t hdr_dst_size, void *local_handle,
|
||||
size_t local_handle_size)
|
||||
{
|
||||
mca_pml_ob1_common_hdr_prepare (&hdr->hdr_common, MCA_PML_OB1_HDR_TYPE_PUT, hdr_flags);
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
|
||||
#define MCA_PML_OB1_RDMA_HDR_FILL(h) \
|
||||
do { \
|
||||
(h).hdr_padding[0] = 0; \
|
||||
(h).hdr_padding[1] = 0; \
|
||||
} while(0)
|
||||
#else
|
||||
#define MCA_PML_OB1_RDMA_HDR_FILL(h)
|
||||
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
|
||||
hdr->hdr_padding[0] = 0;
|
||||
hdr->hdr_padding[1] = 0;
|
||||
#endif
|
||||
hdr->hdr_req.lval = hdr_req;
|
||||
hdr->hdr_frag.pval = hdr_frag;
|
||||
hdr->hdr_recv_req.pval = hdr_recv_req;
|
||||
hdr->hdr_rdma_offset = hdr_rdma_offset;
|
||||
hdr->hdr_dst_ptr = (uint64_t)(intptr_t) hdr_dst_ptr;
|
||||
hdr->hdr_dst_size = hdr_dst_size;
|
||||
|
||||
#define MCA_PML_OB1_RDMA_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_NTOH((h).hdr_common); \
|
||||
(h).hdr_seg_cnt = ntohl((h).hdr_seg_cnt); \
|
||||
/* copy segments */
|
||||
memcpy (hdr + 1, local_handle, local_handle_size);
|
||||
}
|
||||
|
||||
#define MCA_PML_OB1_RDMA_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_NTOH((h).hdr_common); \
|
||||
(h).hdr_seg_cnt = ntohl((h).hdr_seg_cnt); \
|
||||
(h).hdr_rdma_offset = ntoh64((h).hdr_rdma_offset); \
|
||||
(h).hdr_dst_ptr = ntoh64((h).hdr_dst_ptr); \
|
||||
(h).hdr_dst_size = ntoh64((h).hdr_dst_size); \
|
||||
} while (0)
|
||||
|
||||
#define MCA_PML_OB1_RDMA_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \
|
||||
MCA_PML_OB1_RDMA_HDR_FILL(h); \
|
||||
(h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \
|
||||
#define MCA_PML_OB1_RDMA_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \
|
||||
(h).hdr_seg_cnt = htonl((h).hdr_seg_cnt); \
|
||||
(h).hdr_rdma_offset = hton64((h).hdr_rdma_offset); \
|
||||
(h).hdr_dst_ptr = hton64((h).hdr_dst_ptr); \
|
||||
(h).hdr_dst_size = hton64((h).hdr_dst_size); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
@ -330,31 +377,34 @@ struct mca_pml_ob1_fin_hdr_t {
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
uint8_t hdr_padding[2];
|
||||
#endif
|
||||
uint32_t hdr_fail; /**< RDMA operation failed */
|
||||
opal_ptr_t hdr_des; /**< completed descriptor */
|
||||
int64_t hdr_size; /**< number of bytes completed (positive), error code (negative) */
|
||||
opal_ptr_t hdr_frag; /**< completed RDMA fragment */
|
||||
};
|
||||
typedef struct mca_pml_ob1_fin_hdr_t mca_pml_ob1_fin_hdr_t;
|
||||
|
||||
static inline void mca_pml_ob1_fin_hdr_prepare (mca_pml_ob1_fin_hdr_t *hdr, uint8_t hdr_flags,
|
||||
uint64_t hdr_frag, int64_t hdr_size)
|
||||
{
|
||||
mca_pml_ob1_common_hdr_prepare (&hdr->hdr_common, MCA_PML_OB1_HDR_TYPE_FIN, hdr_flags);
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG
|
||||
#define MCA_PML_OB1_FIN_HDR_FILL(h) \
|
||||
do { \
|
||||
(h).hdr_padding[0] = 0; \
|
||||
(h).hdr_padding[1] = 0; \
|
||||
} while (0)
|
||||
#else
|
||||
#define MCA_PML_OB1_FIN_HDR_FILL(h)
|
||||
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT && OPAL_ENABLE_DEBUG */
|
||||
hdr->hdr_padding[0] = 0;
|
||||
hdr->hdr_padding[1] = 0;
|
||||
#endif
|
||||
hdr->hdr_frag.lval = hdr_frag;
|
||||
hdr->hdr_size = hdr_size;
|
||||
}
|
||||
|
||||
#define MCA_PML_OB1_FIN_HDR_NTOH(h) \
|
||||
do { \
|
||||
#define MCA_PML_OB1_FIN_HDR_NTOH(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_NTOH((h).hdr_common); \
|
||||
(h).hdr_size = ntoh64((h).hdr_size); \
|
||||
} while (0)
|
||||
|
||||
#define MCA_PML_OB1_FIN_HDR_HTON(h) \
|
||||
do { \
|
||||
#define MCA_PML_OB1_FIN_HDR_HTON(h) \
|
||||
do { \
|
||||
MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \
|
||||
MCA_PML_OB1_FIN_HDR_FILL(h); \
|
||||
} while (0)
|
||||
(h).hdr_size = hton64((h).hdr_size); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* Union of defined hdr types.
|
||||
|
@ -94,12 +94,9 @@ static inline int mca_pml_ob1_send_inline (void *buf, size_t count,
|
||||
opal_convertor_get_packed_size (&convertor, &size);
|
||||
}
|
||||
|
||||
match.hdr_common.hdr_flags = 0;
|
||||
match.hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH;
|
||||
match.hdr_ctx = comm->c_contextid;
|
||||
match.hdr_src = comm->c_my_rank;
|
||||
match.hdr_tag = tag;
|
||||
match.hdr_seq = seqn;
|
||||
mca_pml_ob1_match_hdr_prepare (&match, MCA_PML_OB1_HDR_TYPE_MATCH, 0,
|
||||
comm->c_contextid, comm->c_my_rank,
|
||||
tag, seqn);
|
||||
|
||||
ob1_hdr_hton(&match, MCA_PML_OB1_HDR_TYPE_MATCH, dst_proc);
|
||||
|
||||
@ -220,7 +217,7 @@ int mca_pml_ob1_send(void *buf,
|
||||
|
||||
OBJ_CONSTRUCT(sendreq, mca_pml_ob1_send_request_t);
|
||||
sendreq->req_send.req_base.req_proc = dst_proc;
|
||||
sendreq->src_des = NULL;
|
||||
sendreq->rdma_frag = NULL;
|
||||
|
||||
MCA_PML_OB1_SEND_REQUEST_INIT(sendreq,
|
||||
buf,
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -9,6 +10,8 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -27,11 +30,6 @@
|
||||
#include "pml_ob1.h"
|
||||
#include "pml_ob1_rdma.h"
|
||||
|
||||
/* Use this registration if no registration needed for a BTL instead of NULL.
|
||||
* This will help other code to distinguish case when memory is not registered
|
||||
* from case when registration is not needed */
|
||||
static mca_mpool_base_registration_t pml_ob1_dummy_reg;
|
||||
|
||||
/*
|
||||
* Check to see if memory is registered or can be registered. Build a
|
||||
* set of registrations on the request.
|
||||
@ -45,7 +43,7 @@ size_t mca_pml_ob1_rdma_btls(
|
||||
{
|
||||
int num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
|
||||
double weight_total = 0;
|
||||
int num_btls_used = 0, n;
|
||||
int num_btls_used = 0;
|
||||
|
||||
/* shortcut when there are no rdma capable btls */
|
||||
if(num_btls == 0) {
|
||||
@ -53,29 +51,25 @@ size_t mca_pml_ob1_rdma_btls(
|
||||
}
|
||||
|
||||
/* check to see if memory is registered */
|
||||
for(n = 0; n < num_btls && num_btls_used < mca_pml_ob1.max_rdma_per_request;
|
||||
n++) {
|
||||
for (int n = 0; n < num_btls && num_btls_used < mca_pml_ob1.max_rdma_per_request; n++) {
|
||||
mca_bml_base_btl_t* bml_btl =
|
||||
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma,
|
||||
(bml_endpoint->btl_rdma_index + n) % num_btls);
|
||||
mca_mpool_base_registration_t* reg = &pml_ob1_dummy_reg;
|
||||
mca_mpool_base_module_t *btl_mpool = bml_btl->btl->btl_mpool;
|
||||
(bml_endpoint->btl_rdma_index + n) % num_btls);
|
||||
mca_btl_base_registration_handle_t *reg_handle = NULL;
|
||||
mca_btl_base_module_t *btl = bml_btl->btl;
|
||||
|
||||
if( NULL != btl_mpool ) {
|
||||
if(!mca_pml_ob1.leave_pinned) {
|
||||
/* look through existing registrations */
|
||||
btl_mpool->mpool_find(btl_mpool, base, size, ®);
|
||||
} else {
|
||||
/* register the memory */
|
||||
btl_mpool->mpool_register(btl_mpool, base, size, 0, ®);
|
||||
}
|
||||
|
||||
if(NULL == reg)
|
||||
if (btl->btl_register_mem) {
|
||||
/* try to register the memory with the btl */
|
||||
reg_handle = btl->btl_register_mem (btl, bml_btl->btl_endpoint, base,
|
||||
size, MCA_BTL_REG_FLAG_REMOTE_READ);
|
||||
if (NULL == reg_handle) {
|
||||
/* btl requires registration but the registration failed */
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} /* else no registration is needed */
|
||||
|
||||
rdma_btls[num_btls_used].bml_btl = bml_btl;
|
||||
rdma_btls[num_btls_used].btl_reg = reg;
|
||||
rdma_btls[num_btls_used].btl_reg = reg_handle;
|
||||
weight_total += bml_btl->btl_weight;
|
||||
num_btls_used++;
|
||||
}
|
||||
@ -83,7 +77,7 @@ size_t mca_pml_ob1_rdma_btls(
|
||||
/* if we don't use leave_pinned and all BTLs that already have this memory
|
||||
* registered amount to less then half of available bandwidth - fall back to
|
||||
* pipeline protocol */
|
||||
if(0 == num_btls_used || (!mca_pml_ob1.leave_pinned && weight_total < 0.5))
|
||||
if (0 == num_btls_used || (!mca_pml_ob1.leave_pinned && weight_total < 0.5))
|
||||
return 0;
|
||||
|
||||
mca_pml_ob1_calc_weighted_length(rdma_btls, num_btls_used, size,
|
||||
@ -103,10 +97,6 @@ size_t mca_pml_ob1_rdma_pipeline_btls( mca_bml_base_endpoint_t* bml_endpoint,
|
||||
for(i = 0; i < num_btls && i < mca_pml_ob1.max_rdma_per_request; i++) {
|
||||
rdma_btls[i].bml_btl =
|
||||
mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma);
|
||||
if(NULL != rdma_btls[i].bml_btl->btl->btl_mpool)
|
||||
rdma_btls[i].btl_reg = NULL;
|
||||
else
|
||||
rdma_btls[i].btl_reg = &pml_ob1_dummy_reg;
|
||||
|
||||
weight_total += rdma_btls[i].bml_btl->btl_weight;
|
||||
}
|
||||
|
@ -32,38 +32,52 @@ typedef enum {
|
||||
MCA_PML_OB1_RDMA_GET
|
||||
} mca_pml_ob1_rdma_state_t;
|
||||
|
||||
struct mca_pml_ob1_rdma_frag_t;
|
||||
|
||||
typedef void (*mca_pml_ob1_rdma_frag_callback_t)(struct mca_pml_ob1_rdma_frag_t *frag, int64_t rdma_length);
|
||||
|
||||
/**
|
||||
* Used to keep track of local and remote RDMA operations.
|
||||
*/
|
||||
struct mca_pml_ob1_rdma_frag_t {
|
||||
ompi_free_list_item_t super;
|
||||
mca_bml_base_btl_t* rdma_bml;
|
||||
mca_bml_base_btl_t *rdma_bml;
|
||||
mca_pml_ob1_hdr_t rdma_hdr;
|
||||
mca_pml_ob1_rdma_state_t rdma_state;
|
||||
size_t rdma_length;
|
||||
uint8_t rdma_segs[MCA_BTL_SEG_MAX_SIZE * MCA_BTL_DES_MAX_SEGMENTS];
|
||||
void *rdma_req;
|
||||
struct mca_bml_base_endpoint_t* rdma_ep;
|
||||
opal_convertor_t convertor;
|
||||
mca_mpool_base_registration_t* reg;
|
||||
uint32_t retries;
|
||||
mca_pml_ob1_rdma_frag_callback_t cbfunc;
|
||||
|
||||
uint64_t rdma_offset;
|
||||
void *local_address;
|
||||
mca_btl_base_registration_handle_t *local_handle;
|
||||
|
||||
uint64_t remote_address;
|
||||
uint8_t remote_handle[MCA_BTL_REG_HANDLE_MAX_SIZE];
|
||||
};
|
||||
typedef struct mca_pml_ob1_rdma_frag_t mca_pml_ob1_rdma_frag_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_pml_ob1_rdma_frag_t);
|
||||
|
||||
|
||||
#define MCA_PML_OB1_RDMA_FRAG_ALLOC(frag) \
|
||||
do { \
|
||||
ompi_free_list_item_t* item; \
|
||||
#define MCA_PML_OB1_RDMA_FRAG_ALLOC(frag) \
|
||||
do { \
|
||||
ompi_free_list_item_t* item; \
|
||||
OMPI_FREE_LIST_WAIT_MT(&mca_pml_ob1.rdma_frags, item); \
|
||||
frag = (mca_pml_ob1_rdma_frag_t*)item; \
|
||||
} while(0)
|
||||
|
||||
#define MCA_PML_OB1_RDMA_FRAG_RETURN(frag) \
|
||||
do { \
|
||||
/* return fragment */ \
|
||||
OMPI_FREE_LIST_RETURN_MT(&mca_pml_ob1.rdma_frags, \
|
||||
(ompi_free_list_item_t*)frag); \
|
||||
frag = (mca_pml_ob1_rdma_frag_t*)item; \
|
||||
} while(0)
|
||||
|
||||
#define MCA_PML_OB1_RDMA_FRAG_RETURN(frag) \
|
||||
do { \
|
||||
/* return fragment */ \
|
||||
if (frag->local_handle) { \
|
||||
mca_bml_base_deregister_mem (frag->rdma_bml, frag->local_handle); \
|
||||
frag->local_handle = NULL; \
|
||||
} \
|
||||
OMPI_FREE_LIST_RETURN_MT(&mca_pml_ob1.rdma_frags, \
|
||||
(ompi_free_list_item_t*)frag); \
|
||||
} while (0)
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
|
@ -295,6 +295,7 @@ void mca_pml_ob1_recv_frag_callback_ack(mca_btl_base_module_t* btl,
|
||||
mca_btl_base_segment_t* segments = des->des_local;
|
||||
mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval;
|
||||
mca_pml_ob1_send_request_t* sendreq;
|
||||
size_t size;
|
||||
|
||||
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_ob1_common_hdr_t)) ) {
|
||||
return;
|
||||
@ -307,19 +308,25 @@ void mca_pml_ob1_recv_frag_callback_ack(mca_btl_base_module_t* btl,
|
||||
/* if the request should be delivered entirely by copy in/out
|
||||
* then throttle sends */
|
||||
if(hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NORDMA) {
|
||||
if (NULL != sendreq->src_des) {
|
||||
/* release registered memory */
|
||||
mca_bml_base_free (sendreq->req_rdma[0].bml_btl, sendreq->src_des);
|
||||
sendreq->src_des = NULL;
|
||||
if (NULL != sendreq->rdma_frag) {
|
||||
if (NULL != sendreq->rdma_frag->local_handle) {
|
||||
mca_bml_base_deregister_mem (sendreq->req_rdma[0].bml_btl, sendreq->rdma_frag->local_handle);
|
||||
sendreq->rdma_frag->local_handle = NULL;
|
||||
}
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN(sendreq->rdma_frag);
|
||||
sendreq->rdma_frag = NULL;
|
||||
}
|
||||
|
||||
sendreq->req_throttle_sends = true;
|
||||
}
|
||||
|
||||
mca_pml_ob1_send_request_copy_in_out(sendreq,
|
||||
hdr->hdr_ack.hdr_send_offset,
|
||||
sendreq->req_send.req_bytes_packed -
|
||||
hdr->hdr_ack.hdr_send_offset);
|
||||
|
||||
if (hdr->hdr_ack.hdr_send_size) {
|
||||
size = hdr->hdr_ack.hdr_send_size;
|
||||
} else {
|
||||
size = sendreq->req_send.req_bytes_packed - hdr->hdr_ack.hdr_send_offset;
|
||||
}
|
||||
|
||||
mca_pml_ob1_send_request_copy_in_out(sendreq, hdr->hdr_ack.hdr_send_offset, size);
|
||||
|
||||
if (sendreq->req_state != 0) {
|
||||
/* Typical receipt of an ACK message causes req_state to be
|
||||
@ -362,6 +369,7 @@ void mca_pml_ob1_recv_frag_callback_frag(mca_btl_base_module_t* btl,
|
||||
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_ob1_common_hdr_t)) ) {
|
||||
return;
|
||||
}
|
||||
|
||||
ob1_hdr_ntoh(hdr, MCA_PML_OB1_HDR_TYPE_FRAG);
|
||||
recvreq = (mca_pml_ob1_recv_request_t*)hdr->hdr_frag.hdr_dst_req.pval;
|
||||
#if OPAL_CUDA_SUPPORT /* CUDA_ASYNC_RECV */
|
||||
@ -380,6 +388,7 @@ void mca_pml_ob1_recv_frag_callback_frag(mca_btl_base_module_t* btl,
|
||||
return;
|
||||
}
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
|
||||
mca_pml_ob1_recv_request_progress_frag(recvreq,btl,segments,des->des_local_count);
|
||||
|
||||
return;
|
||||
@ -411,19 +420,16 @@ void mca_pml_ob1_recv_frag_callback_fin(mca_btl_base_module_t* btl,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
void* cbdata ) {
|
||||
mca_btl_base_segment_t* segments = des->des_local;
|
||||
mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval;
|
||||
mca_btl_base_descriptor_t* rdma;
|
||||
mca_pml_ob1_fin_hdr_t* hdr = (mca_pml_ob1_fin_hdr_t *) segments->seg_addr.pval;
|
||||
mca_pml_ob1_rdma_frag_t *frag;
|
||||
|
||||
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_ob1_common_hdr_t)) ) {
|
||||
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_ob1_fin_hdr_t)) ) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
ob1_hdr_ntoh(hdr, MCA_PML_OB1_HDR_TYPE_FIN);
|
||||
rdma = (mca_btl_base_descriptor_t*)hdr->hdr_fin.hdr_des.pval;
|
||||
rdma->des_cbfunc(btl, NULL, rdma,
|
||||
hdr->hdr_fin.hdr_fail ? OMPI_ERROR : OMPI_SUCCESS);
|
||||
|
||||
return;
|
||||
frag = (mca_pml_ob1_rdma_frag_t *) hdr->hdr_frag.pval;
|
||||
frag->cbfunc (frag, hdr->hdr_size);
|
||||
}
|
||||
|
||||
|
||||
@ -699,7 +705,7 @@ out_of_order_match:
|
||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||
|
||||
if(OPAL_LIKELY(match)) {
|
||||
switch(type) {
|
||||
switch(type) {
|
||||
case MCA_PML_OB1_HDR_TYPE_MATCH:
|
||||
mca_pml_ob1_recv_request_progress_match(match, btl, segments, num_segments);
|
||||
break;
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
|
||||
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
|
||||
* Copyright (c) 2012-2013 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2012 FUJITSU LIMITED. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
@ -183,31 +183,27 @@ static void mca_pml_ob1_recv_ctl_completion( mca_btl_base_module_t* btl,
|
||||
* Put operation has completed remotely - update request status
|
||||
*/
|
||||
|
||||
static void mca_pml_ob1_put_completion( mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* ep,
|
||||
struct mca_btl_base_descriptor_t* des,
|
||||
int status )
|
||||
static void mca_pml_ob1_put_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t rdma_size)
|
||||
{
|
||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
|
||||
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)des->des_cbdata;
|
||||
size_t bytes_received = 0;
|
||||
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t *) frag->rdma_req;
|
||||
mca_bml_base_btl_t *bml_btl = frag->rdma_bml;
|
||||
|
||||
assert (rdma_size == frag->rdma_length);
|
||||
|
||||
if( OPAL_LIKELY(status == OMPI_SUCCESS) ) {
|
||||
bytes_received = mca_pml_ob1_compute_segment_length (btl->btl_seg_size,
|
||||
(void *) des->des_local,
|
||||
des->des_local_count, 0);
|
||||
}
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth,-1);
|
||||
|
||||
mca_bml_base_free(bml_btl, des);
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
|
||||
|
||||
/* check completion status */
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, bytes_received);
|
||||
if(recv_request_pml_complete_check(recvreq) == false &&
|
||||
if (OPAL_LIKELY(0 < rdma_size)) {
|
||||
/* check completion status */
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, (size_t) rdma_size);
|
||||
if (recv_request_pml_complete_check(recvreq) == false &&
|
||||
recvreq->req_rdma_offset < recvreq->req_send_offset) {
|
||||
/* schedule additional rdma operations */
|
||||
mca_pml_ob1_recv_request_schedule(recvreq, bml_btl);
|
||||
/* schedule additional rdma operations */
|
||||
mca_pml_ob1_recv_request_schedule(recvreq, bml_btl);
|
||||
}
|
||||
}
|
||||
|
||||
MCA_PML_OB1_PROGRESS_PENDING(bml_btl);
|
||||
}
|
||||
|
||||
@ -218,7 +214,7 @@ static void mca_pml_ob1_put_completion( mca_btl_base_module_t* btl,
|
||||
int mca_pml_ob1_recv_request_ack_send_btl(
|
||||
ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl,
|
||||
uint64_t hdr_src_req, void *hdr_dst_req, uint64_t hdr_send_offset,
|
||||
bool nordma)
|
||||
size_t size, bool nordma)
|
||||
{
|
||||
mca_btl_base_descriptor_t* des;
|
||||
mca_pml_ob1_ack_hdr_t* ack;
|
||||
@ -234,11 +230,8 @@ int mca_pml_ob1_recv_request_ack_send_btl(
|
||||
|
||||
/* fill out header */
|
||||
ack = (mca_pml_ob1_ack_hdr_t*)des->des_local->seg_addr.pval;
|
||||
ack->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_ACK;
|
||||
ack->hdr_common.hdr_flags = nordma ? MCA_PML_OB1_HDR_FLAGS_NORDMA : 0;
|
||||
ack->hdr_src_req.lval = hdr_src_req;
|
||||
ack->hdr_dst_req.pval = hdr_dst_req;
|
||||
ack->hdr_send_offset = hdr_send_offset;
|
||||
mca_pml_ob1_ack_hdr_prepare (ack, nordma ? MCA_PML_OB1_HDR_FLAGS_NORDMA : 0,
|
||||
hdr_src_req, hdr_dst_req, hdr_send_offset, size);
|
||||
|
||||
ob1_hdr_hton(ack, MCA_PML_OB1_HDR_TYPE_ACK, proc);
|
||||
|
||||
@ -312,63 +305,99 @@ static int mca_pml_ob1_recv_request_ack(
|
||||
if(recvreq->req_send_offset == hdr->hdr_msg_length)
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* let know to shedule function there is no need to put ACK flag */
|
||||
recvreq->req_ack_sent = true;
|
||||
return mca_pml_ob1_recv_request_ack_send(proc, hdr->hdr_src_req.lval,
|
||||
recvreq, recvreq->req_send_offset,
|
||||
recvreq, recvreq->req_send_offset, 0,
|
||||
recvreq->req_send_offset == bytes_received);
|
||||
}
|
||||
|
||||
static int mca_pml_ob1_recv_request_put_frag (mca_pml_ob1_rdma_frag_t *frag);
|
||||
|
||||
static int mca_pml_ob1_recv_request_get_frag_failed (mca_pml_ob1_rdma_frag_t *frag, int rc)
|
||||
{
|
||||
mca_pml_ob1_recv_request_t *recvreq = (mca_pml_ob1_recv_request_t *) frag->rdma_req;
|
||||
ompi_proc_t *proc = (ompi_proc_t *) recvreq->req_recv.req_base.req_proc;
|
||||
|
||||
if (OMPI_ERR_NOT_AVAILABLE == rc) {
|
||||
/* get isn't supported for this transfer. tell peer to fallback on put */
|
||||
rc = mca_pml_ob1_recv_request_put_frag (frag);
|
||||
if (OMPI_ERR_OUT_OF_RESOURCE == rc) {
|
||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||
opal_list_append (&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
if (++frag->retries < mca_pml_ob1.rdma_retries_limit &&
|
||||
OMPI_ERR_OUT_OF_RESOURCE == rc) {
|
||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||
opal_list_append(&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* tell peer to fall back on send for this region */
|
||||
rc = mca_pml_ob1_recv_request_ack_send(proc, frag->rdma_hdr.hdr_rget.hdr_rndv.hdr_src_req.lval,
|
||||
recvreq, frag->rdma_offset, frag->rdma_length, false);
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return resources used by the RDMA
|
||||
*/
|
||||
|
||||
static void mca_pml_ob1_rget_completion( mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* ep,
|
||||
struct mca_btl_base_descriptor_t* des,
|
||||
int status )
|
||||
static void mca_pml_ob1_rget_completion (mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* ep,
|
||||
void *local_address, mca_btl_base_registration_handle_t *local_handle,
|
||||
void *context, void *cbdata, int status)
|
||||
{
|
||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
|
||||
mca_pml_ob1_rdma_frag_t* frag = (mca_pml_ob1_rdma_frag_t*)des->des_cbdata;
|
||||
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)frag->rdma_req;
|
||||
mca_bml_base_btl_t *bml_btl = (mca_bml_base_btl_t *) context;
|
||||
mca_pml_ob1_rdma_frag_t *frag = (mca_pml_ob1_rdma_frag_t *) cbdata;
|
||||
mca_pml_ob1_recv_request_t *recvreq = (mca_pml_ob1_recv_request_t *) frag->rdma_req;
|
||||
|
||||
/* check completion status */
|
||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
||||
/* TSW - FIX */
|
||||
OMPI_ERROR_LOG(status);
|
||||
ompi_rte_abort(-1, NULL);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != status)) {
|
||||
status = mca_pml_ob1_recv_request_get_frag_failed (frag, status);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != status)) {
|
||||
/* TSW - FIX */
|
||||
OMPI_ERROR_LOG(status);
|
||||
ompi_rte_abort(-1, NULL);
|
||||
}
|
||||
} else {
|
||||
/* is receive request complete */
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length);
|
||||
/* TODO: re-add order */
|
||||
mca_pml_ob1_send_fin (recvreq->req_recv.req_base.req_proc,
|
||||
bml_btl, frag->rdma_hdr.hdr_rget.hdr_frag,
|
||||
frag->rdma_length, 0, 0);
|
||||
|
||||
recv_request_pml_complete_check(recvreq);
|
||||
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
|
||||
}
|
||||
|
||||
/* is receive request complete */
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_bytes_received, frag->rdma_length);
|
||||
if (recvreq->req_recv.req_bytes_packed <= recvreq->req_bytes_received) {
|
||||
mca_pml_ob1_send_fin(recvreq->req_recv.req_base.req_proc,
|
||||
bml_btl,
|
||||
frag->rdma_hdr.hdr_rget.hdr_des,
|
||||
des->order, 0);
|
||||
}
|
||||
|
||||
recv_request_pml_complete_check(recvreq);
|
||||
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
|
||||
|
||||
MCA_PML_OB1_PROGRESS_PENDING(bml_btl);
|
||||
}
|
||||
|
||||
|
||||
static int mca_pml_ob1_init_get_fallback (mca_pml_ob1_rdma_frag_t *frag,
|
||||
mca_btl_base_descriptor_t *dst) {
|
||||
static int mca_pml_ob1_recv_request_put_frag (mca_pml_ob1_rdma_frag_t *frag)
|
||||
{
|
||||
mca_pml_ob1_recv_request_t *recvreq = (mca_pml_ob1_recv_request_t *) frag->rdma_req;
|
||||
mca_bml_base_btl_t *bml_btl = frag->rdma_bml;
|
||||
mca_btl_base_descriptor_t *ctl;
|
||||
mca_pml_ob1_rdma_hdr_t *hdr;
|
||||
size_t seg_size;
|
||||
size_t reg_size;
|
||||
int rc;
|
||||
|
||||
seg_size = bml_btl->btl->btl_seg_size * dst->des_local_count;
|
||||
reg_size = bml_btl->btl->btl_registration_handle_size;
|
||||
|
||||
/* prepare a descriptor for rdma control message */
|
||||
mca_bml_base_alloc (bml_btl, &ctl, MCA_BTL_NO_ORDER, sizeof (mca_pml_ob1_rdma_hdr_t) + seg_size,
|
||||
mca_bml_base_alloc (bml_btl, &ctl, MCA_BTL_NO_ORDER, sizeof (mca_pml_ob1_rdma_hdr_t) + reg_size,
|
||||
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP |
|
||||
MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
|
||||
if (OPAL_UNLIKELY(NULL == ctl)) {
|
||||
@ -378,25 +407,18 @@ static int mca_pml_ob1_init_get_fallback (mca_pml_ob1_rdma_frag_t *frag,
|
||||
|
||||
/* fill in rdma header */
|
||||
hdr = (mca_pml_ob1_rdma_hdr_t *) ctl->des_local->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_PUT;
|
||||
hdr->hdr_common.hdr_flags =
|
||||
(!recvreq->req_ack_sent) ? MCA_PML_OB1_HDR_TYPE_ACK : 0;
|
||||
mca_pml_ob1_rdma_hdr_prepare (hdr, (!recvreq->req_ack_sent) ? MCA_PML_OB1_HDR_TYPE_ACK : 0,
|
||||
recvreq->remote_req_send.lval, frag, recvreq, frag->rdma_offset,
|
||||
frag->local_address, frag->rdma_length, frag->local_handle,
|
||||
reg_size);
|
||||
|
||||
hdr->hdr_req = frag->rdma_hdr.hdr_rget.hdr_rndv.hdr_src_req;
|
||||
hdr->hdr_rdma_offset = recvreq->req_rdma_offset;
|
||||
hdr->hdr_des.pval = dst;
|
||||
hdr->hdr_recv_req.pval = recvreq;
|
||||
frag->cbfunc = mca_pml_ob1_put_completion;
|
||||
|
||||
hdr->hdr_seg_cnt = dst->des_local_count;
|
||||
recvreq->req_ack_sent = true;
|
||||
|
||||
/* copy segments */
|
||||
memcpy (hdr + 1, dst->des_local, seg_size);
|
||||
|
||||
dst->des_cbfunc = mca_pml_ob1_put_completion;
|
||||
dst->des_cbdata = recvreq;
|
||||
|
||||
if (!recvreq->req_ack_sent)
|
||||
recvreq->req_ack_sent = true;
|
||||
PERUSE_TRACE_COMM_OMPI_EVENT( PERUSE_COMM_REQ_XFER_CONTINUE,
|
||||
&(recvreq->req_recv.req_base), size,
|
||||
PERUSE_RECV);
|
||||
|
||||
/* send rdma request to peer */
|
||||
rc = mca_bml_base_send (bml_btl, ctl, MCA_PML_OB1_HDR_TYPE_PUT);
|
||||
@ -411,71 +433,30 @@ static int mca_pml_ob1_init_get_fallback (mca_pml_ob1_rdma_frag_t *frag,
|
||||
/*
|
||||
*
|
||||
*/
|
||||
int mca_pml_ob1_recv_request_get_frag( mca_pml_ob1_rdma_frag_t* frag )
|
||||
int mca_pml_ob1_recv_request_get_frag (mca_pml_ob1_rdma_frag_t *frag)
|
||||
{
|
||||
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)frag->rdma_req;
|
||||
mca_bml_base_btl_t* bml_btl = frag->rdma_bml;
|
||||
mca_btl_base_descriptor_t* descriptor;
|
||||
size_t save_size = frag->rdma_length;
|
||||
mca_bml_base_btl_t *bml_btl = frag->rdma_bml;
|
||||
int rc;
|
||||
|
||||
/* prepare descriptor */
|
||||
mca_bml_base_prepare_dst( bml_btl,
|
||||
NULL,
|
||||
&recvreq->req_recv.req_base.req_convertor,
|
||||
MCA_BTL_NO_ORDER,
|
||||
0,
|
||||
&frag->rdma_length,
|
||||
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK |
|
||||
MCA_BTL_DES_FLAGS_GET,
|
||||
&descriptor );
|
||||
if( OPAL_UNLIKELY(NULL == descriptor) ) {
|
||||
if (frag->retries < mca_pml_ob1.rdma_retries_limit) {
|
||||
frag->rdma_length = save_size;
|
||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||
opal_list_append(&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
} else {
|
||||
ompi_proc_t *proc = (ompi_proc_t *) recvreq->req_recv.req_base.req_proc;
|
||||
|
||||
/* tell peer to fall back on send */
|
||||
recvreq->req_send_offset = 0;
|
||||
rc = mca_pml_ob1_recv_request_ack_send(proc, frag->rdma_hdr.hdr_rget.hdr_rndv.hdr_src_req.lval,
|
||||
recvreq, recvreq->req_send_offset, true);
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
|
||||
return rc;
|
||||
if (bml_btl->btl->btl_register_mem && !frag->local_handle) {
|
||||
mca_bml_base_register_mem (bml_btl, frag->local_address, frag->rdma_length, MCA_BTL_REG_FLAG_LOCAL_WRITE |
|
||||
MCA_BTL_REG_FLAG_REMOTE_WRITE, &frag->local_handle);
|
||||
if (OPAL_UNLIKELY(NULL == frag->local_handle)) {
|
||||
return mca_pml_ob1_recv_request_get_frag_failed (frag, OMPI_ERR_OUT_OF_RESOURCE);
|
||||
}
|
||||
}
|
||||
|
||||
descriptor->des_remote = (mca_btl_base_segment_t *) frag->rdma_segs;
|
||||
descriptor->des_remote_count = frag->rdma_hdr.hdr_rdma.hdr_seg_cnt;
|
||||
descriptor->des_cbfunc = mca_pml_ob1_rget_completion;
|
||||
descriptor->des_cbdata = frag;
|
||||
|
||||
PERUSE_TRACE_COMM_OMPI_EVENT(PERUSE_COMM_REQ_XFER_CONTINUE,
|
||||
&(recvreq->req_recv.req_base),
|
||||
&(((mca_pml_ob1_recv_request_t *) frag->rdma_req)->req_recv.req_base),
|
||||
frag->rdma_length, PERUSE_RECV);
|
||||
|
||||
/* queue up get request */
|
||||
rc = mca_bml_base_get(bml_btl,descriptor);
|
||||
rc = mca_bml_base_get (bml_btl, frag->local_address, frag->remote_address, frag->local_handle,
|
||||
(mca_btl_base_registration_handle_t *) frag->remote_handle, frag->rdma_length,
|
||||
0, mca_pml_ob1_rget_completion, frag);
|
||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != rc) ) {
|
||||
if (OPAL_UNLIKELY(OMPI_ERR_NOT_AVAILABLE == rc)) {
|
||||
/* get isn't supported for this transfer. tell peer to fallback on put */
|
||||
rc = mca_pml_ob1_init_get_fallback (frag, descriptor);
|
||||
}
|
||||
|
||||
if(OMPI_ERR_OUT_OF_RESOURCE == rc) {
|
||||
mca_bml_base_free(bml_btl, descriptor);
|
||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||
opal_list_append(&mca_pml_ob1.rdma_pending,
|
||||
(opal_list_item_t*)frag);
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
} else if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
OMPI_ERROR_LOG(rc);
|
||||
ompi_rte_abort(-1, NULL);
|
||||
}
|
||||
return mca_pml_ob1_recv_request_get_frag_failed (frag, OMPI_ERR_OUT_OF_RESOURCE);
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
@ -501,6 +482,7 @@ void mca_pml_ob1_recv_request_progress_frag( mca_pml_ob1_recv_request_t* recvreq
|
||||
bytes_received = mca_pml_ob1_compute_segment_length_base (segments, num_segments,
|
||||
sizeof(mca_pml_ob1_frag_hdr_t));
|
||||
data_offset = hdr->hdr_frag.hdr_frag_offset;
|
||||
|
||||
/*
|
||||
* Make user buffer accessible(defined) before unpacking.
|
||||
*/
|
||||
@ -628,7 +610,6 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
|
||||
mca_pml_ob1_rget_hdr_t* hdr = (mca_pml_ob1_rget_hdr_t*)segments->seg_addr.pval;
|
||||
mca_bml_base_endpoint_t* bml_endpoint = NULL;
|
||||
size_t bytes_remaining, prev_sent, offset;
|
||||
mca_btl_base_segment_t *r_segments;
|
||||
mca_pml_ob1_rdma_frag_t *frag;
|
||||
mca_bml_base_btl_t *rdma_bml;
|
||||
int rc;
|
||||
@ -636,6 +617,7 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
|
||||
prev_sent = offset = 0;
|
||||
bytes_remaining = hdr->hdr_rndv.hdr_msg_length;
|
||||
recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length;
|
||||
recvreq->req_send_offset = 0;
|
||||
|
||||
MCA_PML_OB1_RECV_REQUEST_MATCHED(recvreq, &hdr->hdr_rndv.hdr_match);
|
||||
|
||||
@ -679,8 +661,10 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
|
||||
ompi_rte_abort(-1, NULL);
|
||||
}
|
||||
|
||||
bytes_remaining = mca_pml_ob1_compute_segment_length_remote (btl->btl_seg_size, (void *)(hdr + 1),
|
||||
hdr->hdr_seg_cnt, recvreq->req_recv.req_base.req_proc);
|
||||
bytes_remaining = hdr->hdr_rndv.hdr_msg_length;
|
||||
|
||||
/* save the request for put fallback */
|
||||
recvreq->remote_req_send = hdr->hdr_rndv.hdr_src_req;
|
||||
|
||||
/* The while loop adds a fragmentation mechanism. The variable bytes_remaining holds the num
|
||||
* of bytes left to be send. In each iteration we send the max possible bytes supported
|
||||
@ -689,7 +673,12 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
|
||||
* the next iteration with the updated size.
|
||||
* Also - In each iteration we update the location in the buffer to be used for writing
|
||||
* the message ,and the location to read from. This is done using the offset variable that
|
||||
* accumulates the number of bytes that were sent so far. */
|
||||
* accumulates the number of bytes that were sent so far.
|
||||
*
|
||||
* NTH: This fragmentation may go away if we change the btls to require them to handle
|
||||
* get fragmentation internally. This is a reasonable solution since some btls do not
|
||||
* need any fragmentation (sm, vader, self, etc). Remove this loop if this ends up
|
||||
* being the case. */
|
||||
while (bytes_remaining > 0) {
|
||||
/* allocate/initialize a fragment */
|
||||
MCA_PML_OB1_RDMA_FRAG_ALLOC(frag);
|
||||
@ -699,29 +688,31 @@ void mca_pml_ob1_recv_request_progress_rget( mca_pml_ob1_recv_request_t* recvreq
|
||||
ompi_rte_abort(-1, NULL);
|
||||
}
|
||||
|
||||
assert (btl->btl_seg_size * hdr->hdr_seg_cnt <= sizeof (frag->rdma_segs));
|
||||
memcpy (frag->remote_handle, hdr + 1, btl->btl_registration_handle_size);
|
||||
|
||||
memcpy (frag->rdma_segs, hdr + 1, btl->btl_seg_size * hdr->hdr_seg_cnt);
|
||||
|
||||
/* update the read location -- NTH: note this will only work if there is exactly one
|
||||
segment. TODO -- make this work with multiple segments */
|
||||
r_segments = (mca_btl_base_segment_t *) frag->rdma_segs;
|
||||
r_segments->seg_addr.lval += offset;
|
||||
/* update the read location */
|
||||
frag->remote_address = hdr->hdr_src_ptr + offset;
|
||||
|
||||
/* updating the write location */
|
||||
OPAL_THREAD_LOCK(&recvreq->lock);
|
||||
opal_convertor_set_position( &recvreq->req_recv.req_base.req_convertor, &offset);
|
||||
opal_convertor_get_current_pointer (&recvreq->req_recv.req_base.req_convertor, &frag->local_address);
|
||||
OPAL_THREAD_UNLOCK(&recvreq->lock);
|
||||
|
||||
frag->rdma_bml = rdma_bml;
|
||||
|
||||
frag->rdma_hdr.hdr_rget = *hdr;
|
||||
frag->retries = 0;
|
||||
frag->rdma_req = recvreq;
|
||||
frag->rdma_ep = bml_endpoint;
|
||||
frag->rdma_state = MCA_PML_OB1_RDMA_GET;
|
||||
frag->reg = NULL;
|
||||
frag->rdma_length = bytes_remaining;
|
||||
frag->retries = 0;
|
||||
frag->rdma_req = recvreq;
|
||||
frag->rdma_state = MCA_PML_OB1_RDMA_GET;
|
||||
frag->local_handle = NULL;
|
||||
frag->rdma_offset = offset;
|
||||
|
||||
if (bytes_remaining > rdma_bml->btl->btl_get_limit) {
|
||||
frag->rdma_length = rdma_bml->btl->btl_get_limit;
|
||||
} else {
|
||||
frag->rdma_length = bytes_remaining;
|
||||
}
|
||||
|
||||
/* NTH: TODO -- handle error conditions gracefully */
|
||||
rc = mca_pml_ob1_recv_request_get_frag(frag);
|
||||
@ -920,13 +911,11 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq,
|
||||
|
||||
while(bytes_remaining > 0 &&
|
||||
recvreq->req_pipeline_depth < mca_pml_ob1.recv_pipeline_depth) {
|
||||
size_t size, seg_size;
|
||||
mca_pml_ob1_rdma_hdr_t* hdr;
|
||||
mca_btl_base_descriptor_t* dst;
|
||||
mca_btl_base_descriptor_t* ctl;
|
||||
mca_mpool_base_registration_t * reg = NULL;
|
||||
mca_btl_base_module_t* btl;
|
||||
mca_pml_ob1_rdma_frag_t *frag = NULL;
|
||||
mca_btl_base_module_t *btl;
|
||||
int rc, rdma_idx;
|
||||
void *data_ptr;
|
||||
size_t size;
|
||||
|
||||
if(prev_bytes_remaining == bytes_remaining) {
|
||||
if(++num_fail == num_tries) {
|
||||
@ -947,85 +936,62 @@ int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq,
|
||||
do {
|
||||
rdma_idx = recvreq->req_rdma_idx;
|
||||
bml_btl = recvreq->req_rdma[rdma_idx].bml_btl;
|
||||
reg = recvreq->req_rdma[rdma_idx].btl_reg;
|
||||
size = recvreq->req_rdma[rdma_idx].length;
|
||||
if(++recvreq->req_rdma_idx >= recvreq->req_rdma_cnt)
|
||||
recvreq->req_rdma_idx = 0;
|
||||
} while(!size);
|
||||
btl = bml_btl->btl;
|
||||
|
||||
/* makes sure that we don't exceed BTL max rdma size
|
||||
* if memory is not pinned already */
|
||||
if( (NULL == reg) && (btl->btl_rdma_pipeline_frag_size != 0) &&
|
||||
(size > btl->btl_rdma_pipeline_frag_size)) {
|
||||
/* NTH: This conditional used to check if there was a registration in
|
||||
* recvreq->req_rdma[rdma_idx].btl_reg. If once existed it was due to
|
||||
* the btl not needed registration (equivalent to btl->btl_register_mem
|
||||
* != NULL. This new check is equivalent. Note: I feel this protocol
|
||||
* needs work to better improve resource usage when running with a
|
||||
* leave pinned protocol. */
|
||||
if (btl->btl_register_mem && (btl->btl_rdma_pipeline_frag_size != 0) &&
|
||||
(size > btl->btl_rdma_pipeline_frag_size)) {
|
||||
size = btl->btl_rdma_pipeline_frag_size;
|
||||
}
|
||||
|
||||
/* take lock to protect converter against concurrent access
|
||||
MCA_PML_OB1_RDMA_FRAG_ALLOC(frag);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* take lock to protect convertor against concurrent access
|
||||
* from unpack */
|
||||
OPAL_THREAD_LOCK(&recvreq->lock);
|
||||
opal_convertor_set_position( &recvreq->req_recv.req_base.req_convertor,
|
||||
&recvreq->req_rdma_offset );
|
||||
|
||||
/* prepare a descriptor for RDMA */
|
||||
mca_bml_base_prepare_dst(bml_btl, reg,
|
||||
&recvreq->req_recv.req_base.req_convertor,
|
||||
MCA_BTL_NO_ORDER, 0, &size, MCA_BTL_DES_FLAGS_BTL_OWNERSHIP |
|
||||
MCA_BTL_DES_FLAGS_PUT, &dst);
|
||||
opal_convertor_set_position (&recvreq->req_recv.req_base.req_convertor,
|
||||
&recvreq->req_rdma_offset);
|
||||
opal_convertor_get_current_pointer (&recvreq->req_recv.req_base.req_convertor, &data_ptr);
|
||||
OPAL_THREAD_UNLOCK(&recvreq->lock);
|
||||
|
||||
if(OPAL_UNLIKELY(dst == NULL)) {
|
||||
continue;
|
||||
if (btl->btl_register_mem) {
|
||||
mca_bml_base_register_mem (bml_btl, data_ptr, size, MCA_BTL_REG_FLAG_REMOTE_WRITE,
|
||||
&frag->local_handle);
|
||||
if (OPAL_UNLIKELY(NULL == frag->local_handle)) {
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
dst->des_cbfunc = mca_pml_ob1_put_completion;
|
||||
dst->des_cbdata = recvreq;
|
||||
/* fill in the minimum information needed to handle the fin message */
|
||||
frag->cbfunc = mca_pml_ob1_put_completion;
|
||||
frag->rdma_length = size;
|
||||
frag->rdma_req = recvreq;
|
||||
frag->rdma_bml = bml_btl;
|
||||
frag->local_address = data_ptr;
|
||||
frag->rdma_offset = recvreq->req_rdma_offset;
|
||||
|
||||
seg_size = btl->btl_seg_size * dst->des_local_count;
|
||||
|
||||
/* prepare a descriptor for rdma control message */
|
||||
mca_bml_base_alloc(bml_btl, &ctl, MCA_BTL_NO_ORDER, sizeof(mca_pml_ob1_rdma_hdr_t) + seg_size,
|
||||
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
|
||||
|
||||
if( OPAL_UNLIKELY(NULL == ctl) ) {
|
||||
mca_bml_base_free(bml_btl,dst);
|
||||
continue;
|
||||
}
|
||||
ctl->des_cbfunc = mca_pml_ob1_recv_ctl_completion;
|
||||
|
||||
/* fill in rdma header */
|
||||
hdr = (mca_pml_ob1_rdma_hdr_t*)ctl->des_local->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_PUT;
|
||||
hdr->hdr_common.hdr_flags =
|
||||
(!recvreq->req_ack_sent) ? MCA_PML_OB1_HDR_TYPE_ACK : 0;
|
||||
hdr->hdr_req = recvreq->remote_req_send;
|
||||
hdr->hdr_des.pval = dst;
|
||||
hdr->hdr_recv_req.pval = recvreq;
|
||||
hdr->hdr_rdma_offset = recvreq->req_rdma_offset;
|
||||
hdr->hdr_seg_cnt = dst->des_local_count;
|
||||
|
||||
/* copy segments */
|
||||
memmove (hdr + 1, dst->des_local, seg_size);
|
||||
|
||||
if(!recvreq->req_ack_sent)
|
||||
recvreq->req_ack_sent = true;
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_PUT, recvreq->req_recv.req_base.req_proc);
|
||||
|
||||
PERUSE_TRACE_COMM_OMPI_EVENT( PERUSE_COMM_REQ_XFER_CONTINUE,
|
||||
&(recvreq->req_recv.req_base), size,
|
||||
PERUSE_RECV);
|
||||
|
||||
/* send rdma request to peer */
|
||||
rc = mca_bml_base_send(bml_btl, ctl, MCA_PML_OB1_HDR_TYPE_PUT);
|
||||
if( OPAL_LIKELY( rc >= 0 ) ) {
|
||||
rc = mca_pml_ob1_recv_request_put_frag (frag);
|
||||
if (OPAL_LIKELY(OMPI_SUCCESS == rc)) {
|
||||
/* update request state */
|
||||
recvreq->req_rdma_offset += size;
|
||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth, 1);
|
||||
recvreq->req_rdma[rdma_idx].length -= size;
|
||||
bytes_remaining -= size;
|
||||
} else {
|
||||
mca_bml_base_free(bml_btl,ctl);
|
||||
mca_bml_base_free(bml_btl,dst);
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -10,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 UT-Battelle, LLC. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
@ -131,7 +132,7 @@ do { \
|
||||
#define MCA_PML_OB1_RECV_REQUEST_RETURN(recvreq) \
|
||||
{ \
|
||||
MCA_PML_BASE_RECV_REQUEST_FINI(&(recvreq)->req_recv); \
|
||||
OMPI_FREE_LIST_RETURN_MT( &mca_pml_base_recv_requests, \
|
||||
OMPI_FREE_LIST_RETURN_MT( &mca_pml_base_recv_requests, \
|
||||
(ompi_free_list_item_t*)(recvreq)); \
|
||||
}
|
||||
|
||||
@ -154,9 +155,11 @@ recv_request_pml_complete(mca_pml_ob1_recv_request_t *recvreq)
|
||||
}
|
||||
|
||||
for(i = 0; i < recvreq->req_rdma_cnt; i++) {
|
||||
mca_mpool_base_registration_t* btl_reg = recvreq->req_rdma[i].btl_reg;
|
||||
if( NULL != btl_reg && btl_reg->mpool != NULL) {
|
||||
btl_reg->mpool->mpool_deregister( btl_reg->mpool, btl_reg );
|
||||
struct mca_btl_base_registration_handle_t *handle = recvreq->req_rdma[i].btl_reg;
|
||||
mca_bml_base_btl_t *bml_btl = recvreq->req_rdma[i].bml_btl;
|
||||
|
||||
if (NULL != handle) {
|
||||
mca_bml_base_deregister_mem (bml_btl, handle);
|
||||
}
|
||||
}
|
||||
recvreq->req_rdma_cnt = 0;
|
||||
@ -387,7 +390,7 @@ static inline void mca_pml_ob1_recv_request_schedule(
|
||||
(void)mca_pml_ob1_recv_request_schedule_exclusive(req, start_bml_btl);
|
||||
}
|
||||
|
||||
#define MCA_PML_OB1_ADD_ACK_TO_PENDING(P, S, D, O) \
|
||||
#define MCA_PML_OB1_ADD_ACK_TO_PENDING(P, S, D, O, Sz) \
|
||||
do { \
|
||||
mca_pml_ob1_pckt_pending_t *_pckt; \
|
||||
\
|
||||
@ -396,6 +399,7 @@ static inline void mca_pml_ob1_recv_request_schedule(
|
||||
_pckt->hdr.hdr_ack.hdr_src_req.lval = (S); \
|
||||
_pckt->hdr.hdr_ack.hdr_dst_req.pval = (D); \
|
||||
_pckt->hdr.hdr_ack.hdr_send_offset = (O); \
|
||||
_pckt->hdr.hdr_ack.hdr_send_size = (Sz); \
|
||||
_pckt->proc = (P); \
|
||||
_pckt->bml_btl = NULL; \
|
||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock); \
|
||||
@ -406,11 +410,11 @@ static inline void mca_pml_ob1_recv_request_schedule(
|
||||
|
||||
int mca_pml_ob1_recv_request_ack_send_btl(ompi_proc_t* proc,
|
||||
mca_bml_base_btl_t* bml_btl, uint64_t hdr_src_req, void *hdr_dst_req,
|
||||
uint64_t hdr_rdma_offset, bool nordma);
|
||||
uint64_t hdr_rdma_offset, uint64_t size, bool nordma);
|
||||
|
||||
static inline int mca_pml_ob1_recv_request_ack_send(ompi_proc_t* proc,
|
||||
uint64_t hdr_src_req, void *hdr_dst_req, uint64_t hdr_send_offset,
|
||||
bool nordma)
|
||||
uint64_t size, bool nordma)
|
||||
{
|
||||
size_t i;
|
||||
mca_bml_base_btl_t* bml_btl;
|
||||
@ -420,12 +424,12 @@ static inline int mca_pml_ob1_recv_request_ack_send(ompi_proc_t* proc,
|
||||
for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) {
|
||||
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
|
||||
if(mca_pml_ob1_recv_request_ack_send_btl(proc, bml_btl, hdr_src_req,
|
||||
hdr_dst_req, hdr_send_offset, nordma) == OMPI_SUCCESS)
|
||||
hdr_dst_req, hdr_send_offset, size, nordma) == OMPI_SUCCESS)
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
MCA_PML_OB1_ADD_ACK_TO_PENDING(proc, hdr_src_req, hdr_dst_req,
|
||||
hdr_send_offset);
|
||||
hdr_send_offset, size);
|
||||
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
@ -250,27 +250,18 @@ mca_pml_ob1_rndv_completion( mca_btl_base_module_t* btl,
|
||||
*/
|
||||
|
||||
static void
|
||||
mca_pml_ob1_rget_completion( mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* ep,
|
||||
struct mca_btl_base_descriptor_t* des,
|
||||
int status )
|
||||
mca_pml_ob1_rget_completion (mca_pml_ob1_rdma_frag_t *frag, int64_t rdma_length)
|
||||
{
|
||||
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)des->des_cbdata;
|
||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
|
||||
size_t req_bytes_delivered;
|
||||
mca_pml_ob1_send_request_t *sendreq = (mca_pml_ob1_send_request_t *) frag->rdma_req;
|
||||
mca_bml_base_btl_t *bml_btl = frag->rdma_bml;
|
||||
|
||||
/* count bytes of user data actually delivered and check for request completion */
|
||||
if (OPAL_LIKELY(OMPI_SUCCESS == status)) {
|
||||
req_bytes_delivered = mca_pml_ob1_compute_segment_length (btl->btl_seg_size,
|
||||
(void *) des->des_local,
|
||||
des->des_local_count, 0);
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, req_bytes_delivered);
|
||||
if (OPAL_LIKELY(0 < rdma_length)) {
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, (size_t) rdma_length);
|
||||
}
|
||||
sendreq->src_des = NULL;
|
||||
|
||||
send_request_pml_complete_check(sendreq);
|
||||
/* free the descriptor */
|
||||
mca_bml_base_free(bml_btl, des);
|
||||
|
||||
MCA_PML_OB1_PROGRESS_PENDING(bml_btl);
|
||||
}
|
||||
|
||||
@ -407,17 +398,14 @@ int mca_pml_ob1_send_request_start_buffered(
|
||||
|
||||
/* build rendezvous header */
|
||||
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_flags = 0;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RNDV;
|
||||
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
||||
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
||||
mca_pml_ob1_rendezvous_hdr_prepare (&hdr->hdr_rndv, MCA_PML_OB1_HDR_TYPE_RNDV, 0,
|
||||
sendreq->req_send.req_base.req_comm->c_contextid,
|
||||
sendreq->req_send.req_base.req_comm->c_my_rank,
|
||||
sendreq->req_send.req_base.req_tag,
|
||||
(uint16_t)sendreq->req_send.req_base.req_sequence,
|
||||
sendreq->req_send.req_bytes_packed, sendreq);
|
||||
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RNDV,
|
||||
sendreq->req_send.req_base.req_proc);
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RNDV, sendreq->req_send.req_base.req_proc);
|
||||
|
||||
/* update lengths */
|
||||
segment->seg_len = sizeof(mca_pml_ob1_rendezvous_hdr_t) + max_data;
|
||||
@ -490,15 +478,13 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq,
|
||||
|
||||
if(NULL != bml_btl->btl->btl_sendi) {
|
||||
mca_pml_ob1_match_hdr_t match;
|
||||
match.hdr_common.hdr_flags = 0;
|
||||
match.hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH;
|
||||
match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||
match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||
match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||
match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
||||
mca_pml_ob1_match_hdr_prepare (&match, MCA_PML_OB1_HDR_TYPE_MATCH, 0,
|
||||
sendreq->req_send.req_base.req_comm->c_contextid,
|
||||
sendreq->req_send.req_base.req_comm->c_my_rank,
|
||||
sendreq->req_send.req_base.req_tag,
|
||||
(uint16_t)sendreq->req_send.req_base.req_sequence);
|
||||
|
||||
ob1_hdr_hton(&match, MCA_PML_OB1_HDR_TYPE_MATCH,
|
||||
sendreq->req_send.req_base.req_proc);
|
||||
ob1_hdr_hton (&match, MCA_PML_OB1_HDR_TYPE_MATCH, sendreq->req_send.req_base.req_proc);
|
||||
|
||||
/* try to send immediately */
|
||||
rc = mca_bml_base_sendi( bml_btl, &sendreq->req_send.req_base.req_convertor,
|
||||
@ -565,15 +551,13 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq,
|
||||
|
||||
/* build match header */
|
||||
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_flags = 0;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH;
|
||||
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
||||
mca_pml_ob1_match_hdr_prepare (&hdr->hdr_match, MCA_PML_OB1_HDR_TYPE_MATCH, 0,
|
||||
sendreq->req_send.req_base.req_comm->c_contextid,
|
||||
sendreq->req_send.req_base.req_comm->c_my_rank,
|
||||
sendreq->req_send.req_base.req_tag,
|
||||
(uint16_t)sendreq->req_send.req_base.req_sequence);
|
||||
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_MATCH,
|
||||
sendreq->req_send.req_base.req_proc);
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_MATCH, sendreq->req_send.req_base.req_proc);
|
||||
|
||||
/* update lengths */
|
||||
segment->seg_len = OMPI_PML_OB1_MATCH_HDR_LEN + max_data;
|
||||
@ -631,15 +615,13 @@ int mca_pml_ob1_send_request_start_prepare( mca_pml_ob1_send_request_t* sendreq,
|
||||
|
||||
/* build match header */
|
||||
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_flags = 0;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH;
|
||||
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
||||
mca_pml_ob1_match_hdr_prepare (&hdr->hdr_match, MCA_PML_OB1_HDR_TYPE_MATCH, 0,
|
||||
sendreq->req_send.req_base.req_comm->c_contextid,
|
||||
sendreq->req_send.req_base.req_comm->c_my_rank,
|
||||
sendreq->req_send.req_base.req_tag,
|
||||
(uint16_t)sendreq->req_send.req_base.req_sequence);
|
||||
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_MATCH,
|
||||
sendreq->req_send.req_base.req_proc);
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_MATCH, sendreq->req_send.req_base.req_proc);
|
||||
|
||||
/* short message */
|
||||
des->des_cbfunc = mca_pml_ob1_match_completion_free;
|
||||
@ -673,79 +655,67 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq,
|
||||
* one RDMA capable BTLs). This way round robin distribution of RDMA
|
||||
* operation is achieved.
|
||||
*/
|
||||
|
||||
mca_btl_base_descriptor_t *des, *src = NULL;
|
||||
mca_btl_base_registration_handle_t *local_handle;
|
||||
mca_btl_base_descriptor_t *des;
|
||||
mca_pml_ob1_rdma_frag_t *frag;
|
||||
mca_pml_ob1_rget_hdr_t *hdr;
|
||||
size_t seg_size;
|
||||
size_t reg_size;
|
||||
void *data_ptr;
|
||||
int rc;
|
||||
|
||||
sendreq->src_des = NULL;
|
||||
|
||||
bml_btl = sendreq->req_rdma[0].bml_btl;
|
||||
if (!(bml_btl->btl_flags & (MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_CUDA_GET))) {
|
||||
sendreq->rdma_frag = NULL;
|
||||
/* This BTL does not support get. Use rendezvous to start the RDMA operation using put instead. */
|
||||
return mca_pml_ob1_send_request_start_rndv (sendreq, bml_btl, 0, MCA_PML_OB1_HDR_FLAGS_CONTIG |
|
||||
MCA_PML_OB1_HDR_FLAGS_PIN);
|
||||
}
|
||||
|
||||
MEMCHECKER(
|
||||
memchecker_call(&opal_memchecker_base_mem_defined,
|
||||
sendreq->req_send.req_base.req_addr,
|
||||
sendreq->req_send.req_base.req_count,
|
||||
sendreq->req_send.req_base.req_datatype);
|
||||
);
|
||||
/* prepare source descriptor/segment(s) */
|
||||
/* PML owns this descriptor and will free it in */
|
||||
/* mca_pml_ob1_rget_completion */
|
||||
mca_bml_base_prepare_src( bml_btl, sendreq->req_rdma[0].btl_reg,
|
||||
&sendreq->req_send.req_base.req_convertor,
|
||||
MCA_BTL_NO_ORDER, 0, &size, MCA_BTL_DES_FLAGS_GET |
|
||||
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP, &src );
|
||||
MEMCHECKER(
|
||||
memchecker_call(&opal_memchecker_base_mem_noaccess,
|
||||
sendreq->req_send.req_base.req_addr,
|
||||
sendreq->req_send.req_base.req_count,
|
||||
sendreq->req_send.req_base.req_datatype);
|
||||
);
|
||||
if( OPAL_UNLIKELY(NULL == src) ) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
/* at this time ob1 does not support non-contiguous gets. the convertor represents a
|
||||
* contiguous block of memory */
|
||||
opal_convertor_get_current_pointer (&sendreq->req_send.req_base.req_convertor, &data_ptr);
|
||||
|
||||
local_handle = sendreq->req_rdma[0].btl_reg;
|
||||
|
||||
/* allocate an rdma fragment to keep track of the request size for use in the fin message */
|
||||
MCA_PML_OB1_RDMA_FRAG_ALLOC(frag);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
src->des_cbfunc = mca_pml_ob1_rget_completion;
|
||||
src->des_cbdata = sendreq;
|
||||
|
||||
sendreq->src_des = src;
|
||||
/* fill in necessary fragment data */
|
||||
frag->rdma_req = sendreq;
|
||||
frag->rdma_bml = bml_btl;
|
||||
frag->rdma_length = size;
|
||||
frag->cbfunc = mca_pml_ob1_rget_completion;
|
||||
/* do not store the local handle in the fragment. it will be released by mca_pml_ob1_free_rdma_resources */
|
||||
|
||||
seg_size = bml_btl->btl->btl_seg_size * src->des_local_count;
|
||||
/* save the fragment for get->put fallback */
|
||||
sendreq->rdma_frag = frag;
|
||||
|
||||
reg_size = bml_btl->btl->btl_registration_handle_size;
|
||||
|
||||
/* allocate space for get hdr + segment list */
|
||||
mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, sizeof (*hdr) + seg_size,
|
||||
mca_bml_base_alloc(bml_btl, &des, MCA_BTL_NO_ORDER, sizeof (*hdr) + reg_size,
|
||||
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
|
||||
if( OPAL_UNLIKELY(NULL == des) ) {
|
||||
/* NTH: no need to reset the converter here. it will be reset before it is retried */
|
||||
mca_bml_base_free(bml_btl, src);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* build match header */
|
||||
hdr = (mca_pml_ob1_rget_hdr_t *) des->des_local->seg_addr.pval;
|
||||
|
||||
hdr->hdr_rndv.hdr_match.hdr_common.hdr_flags = MCA_PML_OB1_HDR_FLAGS_CONTIG|MCA_PML_OB1_HDR_FLAGS_PIN;
|
||||
hdr->hdr_rndv.hdr_match.hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RGET;
|
||||
hdr->hdr_rndv.hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||
hdr->hdr_rndv.hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||
hdr->hdr_rndv.hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||
hdr->hdr_rndv.hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
||||
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
||||
hdr->hdr_des.pval = src;
|
||||
hdr->hdr_seg_cnt = src->des_local_count;
|
||||
/* TODO -- Add support for multiple segments for get */
|
||||
mca_pml_ob1_rget_hdr_prepare (hdr, MCA_PML_OB1_HDR_FLAGS_CONTIG | MCA_PML_OB1_HDR_FLAGS_PIN,
|
||||
sendreq->req_send.req_base.req_comm->c_contextid,
|
||||
sendreq->req_send.req_base.req_comm->c_my_rank,
|
||||
sendreq->req_send.req_base.req_tag,
|
||||
(uint16_t)sendreq->req_send.req_base.req_sequence,
|
||||
sendreq->req_send.req_bytes_packed, sendreq,
|
||||
frag, data_ptr, local_handle, reg_size);
|
||||
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RGET, sendreq->req_send.req_base.req_proc);
|
||||
|
||||
/* copy segment data */
|
||||
memcpy (hdr + 1, src->des_local, seg_size);
|
||||
|
||||
des->des_cbfunc = mca_pml_ob1_send_ctl_completion;
|
||||
des->des_cbdata = sendreq;
|
||||
|
||||
@ -763,12 +733,6 @@ int mca_pml_ob1_send_request_start_rdma( mca_pml_ob1_send_request_t* sendreq,
|
||||
rc = mca_bml_base_send(bml_btl, des, MCA_PML_OB1_HDR_TYPE_RGET);
|
||||
if (OPAL_UNLIKELY(rc < 0)) {
|
||||
mca_bml_base_free(bml_btl, des);
|
||||
|
||||
if (sendreq->src_des) {
|
||||
mca_bml_base_free (bml_btl, sendreq->src_des);
|
||||
sendreq->src_des = NULL;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -828,17 +792,14 @@ int mca_pml_ob1_send_request_start_rndv( mca_pml_ob1_send_request_t* sendreq,
|
||||
|
||||
/* build hdr */
|
||||
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_flags = flags;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RNDV;
|
||||
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||
hdr->hdr_match.hdr_seq = (uint16_t)sendreq->req_send.req_base.req_sequence;
|
||||
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
||||
mca_pml_ob1_rendezvous_hdr_prepare (&hdr->hdr_rndv, MCA_PML_OB1_HDR_TYPE_RNDV, flags,
|
||||
sendreq->req_send.req_base.req_comm->c_contextid,
|
||||
sendreq->req_send.req_base.req_comm->c_my_rank,
|
||||
sendreq->req_send.req_base.req_tag,
|
||||
(uint16_t)sendreq->req_send.req_base.req_sequence,
|
||||
sendreq->req_send.req_bytes_packed, sendreq);
|
||||
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RNDV,
|
||||
sendreq->req_send.req_base.req_proc);
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_RNDV, sendreq->req_send.req_base.req_proc);
|
||||
|
||||
/* first fragment of a long message */
|
||||
des->des_cbdata = sendreq;
|
||||
@ -1047,11 +1008,8 @@ cannot_pack:
|
||||
|
||||
/* setup header */
|
||||
hdr = (mca_pml_ob1_frag_hdr_t*)des->des_local->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_flags = 0;
|
||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_FRAG;
|
||||
hdr->hdr_frag_offset = range->range_send_offset;
|
||||
hdr->hdr_src_req.pval = sendreq;
|
||||
hdr->hdr_dst_req = sendreq->req_recv;
|
||||
mca_pml_ob1_frag_hdr_prepare (hdr, 0, range->range_send_offset, sendreq,
|
||||
sendreq->req_recv.lval);
|
||||
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_FRAG,
|
||||
sendreq->req_send.req_base.req_proc);
|
||||
@ -1108,38 +1066,66 @@ cannot_pack:
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A put fragment could not be started. Queue the fragment to be retried later or
|
||||
* fall back on send/recv.
|
||||
*/
|
||||
static void mca_pml_ob1_send_request_put_frag_failed (mca_pml_ob1_rdma_frag_t *frag, int rc)
|
||||
{
|
||||
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t *) frag->rdma_req;
|
||||
mca_bml_base_btl_t *bml_btl = frag->rdma_bml;
|
||||
|
||||
if (++frag->retries < mca_pml_ob1.rdma_retries_limit && OMPI_ERR_OUT_OF_RESOURCE == rc) {
|
||||
/* queue the frag for later if there was a resource error */
|
||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||
opal_list_append(&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
||||
} else {
|
||||
/* tell receiver to deregister memory */
|
||||
mca_pml_ob1_send_fin (sendreq->req_send.req_base.req_proc,
|
||||
bml_btl, frag->rdma_hdr.hdr_rdma.hdr_frag,
|
||||
0, MCA_BTL_NO_ORDER, 1);
|
||||
|
||||
/* send fragment by copy in/out */
|
||||
mca_pml_ob1_send_request_copy_in_out(sendreq, frag->rdma_hdr.hdr_rdma.hdr_rdma_offset,
|
||||
frag->rdma_length);
|
||||
/* if a pointer to a receive request is not set it means that
|
||||
* ACK was not yet received. Don't schedule sends before ACK */
|
||||
if (NULL != sendreq->req_recv.pval)
|
||||
mca_pml_ob1_send_request_schedule (sendreq);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An RDMA put operation has completed:
|
||||
* (1) Update request status and if required set completed
|
||||
* (2) Send FIN control message to the destination
|
||||
* (2) Send FIN control message to the destination
|
||||
*/
|
||||
|
||||
static void mca_pml_ob1_put_completion( mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* ep,
|
||||
struct mca_btl_base_descriptor_t* des,
|
||||
int status )
|
||||
static void mca_pml_ob1_put_completion (mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* ep,
|
||||
void *local_address, mca_btl_base_registration_handle_t *local_handle,
|
||||
void *context, void *cbdata, int status)
|
||||
{
|
||||
mca_pml_ob1_rdma_frag_t* frag = (mca_pml_ob1_rdma_frag_t*)des->des_cbdata;
|
||||
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)frag->rdma_req;
|
||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
|
||||
mca_pml_ob1_rdma_frag_t *frag = (mca_pml_ob1_rdma_frag_t *) cbdata;
|
||||
mca_pml_ob1_send_request_t *sendreq = (mca_pml_ob1_send_request_t *) frag->rdma_req;
|
||||
mca_bml_base_btl_t *bml_btl = (mca_bml_base_btl_t *) context;
|
||||
|
||||
/* check completion status */
|
||||
if( OPAL_UNLIKELY(OMPI_SUCCESS != status) ) {
|
||||
/* TSW - FIX */
|
||||
OMPI_ERROR_LOG(status);
|
||||
ompi_rte_abort(-1, NULL);
|
||||
if( OPAL_UNLIKELY(OMPI_SUCCESS == status) ) {
|
||||
/* TODO -- readd ordering */
|
||||
mca_pml_ob1_send_fin (sendreq->req_send.req_base.req_proc, bml_btl,
|
||||
frag->rdma_hdr.hdr_rdma.hdr_frag, frag->rdma_length,
|
||||
0, 0);
|
||||
|
||||
/* check for request completion */
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length);
|
||||
|
||||
send_request_pml_complete_check(sendreq);
|
||||
} else {
|
||||
/* try to fall back on send/recv */
|
||||
mca_pml_ob1_send_request_put_frag_failed (frag, status);
|
||||
}
|
||||
|
||||
mca_pml_ob1_send_fin(sendreq->req_send.req_base.req_proc,
|
||||
bml_btl,
|
||||
frag->rdma_hdr.hdr_rdma.hdr_des,
|
||||
des->order, 0);
|
||||
|
||||
/* check for request completion */
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_bytes_delivered, frag->rdma_length);
|
||||
|
||||
send_request_pml_complete_check(sendreq);
|
||||
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
|
||||
|
||||
MCA_PML_OB1_PROGRESS_PENDING(bml_btl);
|
||||
@ -1147,81 +1133,45 @@ static void mca_pml_ob1_put_completion( mca_btl_base_module_t* btl,
|
||||
|
||||
int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t *frag )
|
||||
{
|
||||
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)frag->rdma_req;
|
||||
mca_mpool_base_registration_t *reg = NULL;
|
||||
mca_pml_ob1_send_request_t *sendreq = (mca_pml_ob1_send_request_t *) frag->rdma_req;
|
||||
mca_btl_base_registration_handle_t *local_handle = NULL;
|
||||
mca_bml_base_btl_t *bml_btl = frag->rdma_bml;
|
||||
mca_btl_base_descriptor_t *des;
|
||||
size_t save_size = frag->rdma_length;
|
||||
int rc;
|
||||
|
||||
if (OPAL_LIKELY(NULL == sendreq->src_des)) {
|
||||
/* setup descriptor */
|
||||
mca_bml_base_prepare_src( bml_btl,
|
||||
reg,
|
||||
&frag->convertor,
|
||||
MCA_BTL_NO_ORDER,
|
||||
0,
|
||||
&frag->rdma_length,
|
||||
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP |
|
||||
MCA_BTL_DES_FLAGS_PUT,
|
||||
&des );
|
||||
|
||||
if( OPAL_UNLIKELY(NULL == des) ) {
|
||||
if(frag->retries < mca_pml_ob1.rdma_retries_limit) {
|
||||
size_t offset = (size_t)frag->rdma_hdr.hdr_rdma.hdr_rdma_offset;
|
||||
frag->rdma_length = save_size;
|
||||
opal_convertor_set_position(&frag->convertor, &offset);
|
||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||
opal_list_append(&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
||||
} else {
|
||||
mca_pml_ob1_send_request_t *sendreq =
|
||||
(mca_pml_ob1_send_request_t*)frag->rdma_req;
|
||||
if (bml_btl->btl->btl_register_mem && NULL == frag->local_handle) {
|
||||
/* Check if the segment is already registered */
|
||||
for (size_t i = 0 ; i < sendreq->req_rdma_cnt ; ++i) {
|
||||
if (sendreq->req_rdma[i].bml_btl == frag->rdma_bml) {
|
||||
/* do not copy the handle to the fragment to avoid deregistring it twice */
|
||||
local_handle = sendreq->req_rdma[i].btl_reg;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* tell receiver to unregister memory */
|
||||
mca_pml_ob1_send_fin(sendreq->req_send.req_base.req_proc,
|
||||
bml_btl, frag->rdma_hdr.hdr_rdma.hdr_des,
|
||||
MCA_BTL_NO_ORDER, 1);
|
||||
if (NULL == frag->local_handle) {
|
||||
/* Not already registered. Register the region with the BTL. */
|
||||
mca_bml_base_register_mem (bml_btl, frag->local_address, frag->rdma_length, 0,
|
||||
&frag->local_handle);
|
||||
|
||||
/* send fragment by copy in/out */
|
||||
mca_pml_ob1_send_request_copy_in_out(sendreq,
|
||||
frag->rdma_hdr.hdr_rdma.hdr_rdma_offset, frag->rdma_length);
|
||||
/* if a pointer to a receive request is not set it means that
|
||||
* ACK was not yet received. Don't schedule sends before ACK */
|
||||
if(NULL != sendreq->req_recv.pval)
|
||||
mca_pml_ob1_send_request_schedule(sendreq);
|
||||
if (OPAL_UNLIKELY(NULL == frag->local_handle)) {
|
||||
mca_pml_ob1_send_request_put_frag_failed (frag, OMPI_ERR_OUT_OF_RESOURCE);
|
||||
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
local_handle = frag->local_handle;
|
||||
}
|
||||
} else {
|
||||
/* already have a source descriptor */
|
||||
des = sendreq->src_des;
|
||||
sendreq->src_des = NULL;
|
||||
}
|
||||
|
||||
des->des_remote = (mca_btl_base_segment_t *) frag->rdma_segs;
|
||||
des->des_remote_count = frag->rdma_hdr.hdr_rdma.hdr_seg_cnt;
|
||||
des->des_cbfunc = mca_pml_ob1_put_completion;
|
||||
des->des_cbdata = frag;
|
||||
|
||||
PERUSE_TRACE_COMM_OMPI_EVENT( PERUSE_COMM_REQ_XFER_CONTINUE,
|
||||
&(((mca_pml_ob1_send_request_t*)frag->rdma_req)->req_send.req_base), save_size, PERUSE_SEND );
|
||||
|
||||
rc = mca_bml_base_put(bml_btl, des);
|
||||
rc = mca_bml_base_put (bml_btl, frag->local_address, frag->remote_address, local_handle,
|
||||
(mca_btl_base_registration_handle_t *) frag->remote_handle, frag->rdma_length,
|
||||
0, mca_pml_ob1_put_completion, frag);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
mca_bml_base_free(bml_btl, des);
|
||||
frag->rdma_length = save_size;
|
||||
if(OMPI_ERR_OUT_OF_RESOURCE == rc) {
|
||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||
opal_list_append(&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);
|
||||
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
} else {
|
||||
/* TSW - FIX */
|
||||
OMPI_ERROR_LOG(rc);
|
||||
ompi_rte_abort(-1, NULL);
|
||||
}
|
||||
mca_pml_ob1_send_request_put_frag_failed (frag, rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
@ -1235,12 +1185,11 @@ int mca_pml_ob1_send_request_put_frag( mca_pml_ob1_rdma_frag_t *frag )
|
||||
*/
|
||||
|
||||
void mca_pml_ob1_send_request_put( mca_pml_ob1_send_request_t* sendreq,
|
||||
mca_btl_base_module_t* btl,
|
||||
mca_btl_base_module_t* btl,
|
||||
mca_pml_ob1_rdma_hdr_t* hdr )
|
||||
{
|
||||
mca_bml_base_endpoint_t *bml_endpoint = sendreq->req_endpoint;
|
||||
mca_pml_ob1_rdma_frag_t* frag;
|
||||
size_t i, size = 0;
|
||||
|
||||
if(hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_TYPE_ACK) {
|
||||
OPAL_THREAD_ADD32(&sendreq->req_state, -1);
|
||||
@ -1248,61 +1197,36 @@ void mca_pml_ob1_send_request_put( mca_pml_ob1_send_request_t* sendreq,
|
||||
|
||||
sendreq->req_recv.pval = hdr->hdr_recv_req.pval;
|
||||
|
||||
MCA_PML_OB1_RDMA_FRAG_ALLOC(frag);
|
||||
if (NULL == sendreq->rdma_frag) {
|
||||
MCA_PML_OB1_RDMA_FRAG_ALLOC(frag);
|
||||
|
||||
if( OPAL_UNLIKELY(NULL == frag) ) {
|
||||
/* TSW - FIX */
|
||||
OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
|
||||
ompi_rte_abort(-1, NULL);
|
||||
}
|
||||
|
||||
assert (btl->btl_seg_size * hdr->hdr_seg_cnt <= sizeof (frag->rdma_segs));
|
||||
|
||||
/* setup fragment */
|
||||
memcpy (frag->rdma_segs, hdr + 1, btl->btl_seg_size * hdr->hdr_seg_cnt);
|
||||
|
||||
for( i = 0; i < hdr->hdr_seg_cnt; i++ ) {
|
||||
mca_btl_base_segment_t *seg = (mca_btl_base_segment_t *) ((uintptr_t)(frag->rdma_segs) + i * btl->btl_seg_size);
|
||||
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
if ((sendreq->req_send.req_base.req_proc->super.proc_arch & OPAL_ARCH_ISBIGENDIAN) !=
|
||||
(ompi_proc_local()->super.proc_arch & OPAL_ARCH_ISBIGENDIAN)) {
|
||||
size += opal_swap_bytes4(seg->seg_len);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
size += seg->seg_len;
|
||||
if( OPAL_UNLIKELY(NULL == frag) ) {
|
||||
/* TSW - FIX */
|
||||
OMPI_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
|
||||
ompi_rte_abort(-1, NULL);
|
||||
}
|
||||
} else {
|
||||
/* rget fallback on put */
|
||||
frag = sendreq->rdma_frag;
|
||||
sendreq->rdma_frag = NULL;
|
||||
sendreq->req_state = 0;
|
||||
}
|
||||
|
||||
/* copy registration data */
|
||||
memcpy (frag->remote_handle, hdr + 1, btl->btl_registration_handle_size);
|
||||
|
||||
frag->rdma_bml = mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma, btl);
|
||||
frag->rdma_hdr.hdr_rdma = *hdr;
|
||||
frag->rdma_req = sendreq;
|
||||
frag->rdma_ep = bml_endpoint;
|
||||
frag->rdma_length = size;
|
||||
frag->rdma_length = hdr->hdr_dst_size;
|
||||
frag->rdma_state = MCA_PML_OB1_RDMA_PUT;
|
||||
frag->reg = NULL;
|
||||
frag->remote_address = hdr->hdr_dst_ptr;
|
||||
frag->retries = 0;
|
||||
|
||||
if (OPAL_UNLIKELY(NULL != sendreq->src_des)) {
|
||||
/* get fallback path */
|
||||
sendreq->req_state = 0;
|
||||
}
|
||||
|
||||
/* lookup the corresponding registration */
|
||||
for(i=0; i<sendreq->req_rdma_cnt; i++) {
|
||||
if(sendreq->req_rdma[i].bml_btl == frag->rdma_bml) {
|
||||
frag->reg = sendreq->req_rdma[i].btl_reg;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* RDMA writes may proceed in parallel to send and to each other, so
|
||||
* create clone of the convertor for each RDMA fragment
|
||||
*/
|
||||
size = hdr->hdr_rdma_offset;
|
||||
opal_convertor_clone_with_position(&sendreq->req_send.req_base.req_convertor,
|
||||
&frag->convertor, 0, &size);
|
||||
/* Get the address of the current offset. Note: at this time ob1 CAN NOT handle
|
||||
* non-contiguous RDMA. If that changes this code will be wrong. */
|
||||
opal_convertor_get_offset_pointer (&sendreq->req_send.req_base.req_convertor,
|
||||
hdr->hdr_rdma_offset, &frag->local_address);
|
||||
|
||||
mca_pml_ob1_send_request_put_frag(frag);
|
||||
}
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2012 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -54,7 +54,7 @@ struct mca_pml_ob1_send_request_t {
|
||||
mca_pml_ob1_send_pending_t req_pending;
|
||||
opal_mutex_t req_send_range_lock;
|
||||
opal_list_t req_send_ranges;
|
||||
mca_btl_base_descriptor_t *src_des;
|
||||
mca_pml_ob1_rdma_frag_t *rdma_frag;
|
||||
mca_pml_ob1_com_btl_t req_rdma[1];
|
||||
};
|
||||
typedef struct mca_pml_ob1_send_request_t mca_pml_ob1_send_request_t;
|
||||
@ -124,10 +124,9 @@ get_request_from_send_pending(mca_pml_ob1_send_pending_t *type)
|
||||
ompi_free_list_item_t* item; \
|
||||
\
|
||||
if( OPAL_LIKELY(NULL != proc) ) { \
|
||||
OMPI_FREE_LIST_WAIT_MT(&mca_pml_base_send_requests, item); \
|
||||
OMPI_FREE_LIST_WAIT_MT(&mca_pml_base_send_requests, item); \
|
||||
sendreq = (mca_pml_ob1_send_request_t*)item; \
|
||||
sendreq->req_send.req_base.req_proc = proc; \
|
||||
sendreq->src_des = NULL; \
|
||||
} \
|
||||
}
|
||||
|
||||
@ -163,15 +162,18 @@ get_request_from_send_pending(mca_pml_ob1_send_pending_t *type)
|
||||
assert( 0 == _position ); \
|
||||
}
|
||||
|
||||
static inline void mca_pml_ob1_free_rdma_resources(mca_pml_ob1_send_request_t* sendreq)
|
||||
static inline void mca_pml_ob1_free_rdma_resources (mca_pml_ob1_send_request_t* sendreq)
|
||||
{
|
||||
size_t r;
|
||||
|
||||
/* return mpool resources */
|
||||
for(r = 0; r < sendreq->req_rdma_cnt; r++) {
|
||||
mca_mpool_base_registration_t* reg = sendreq->req_rdma[r].btl_reg;
|
||||
if( NULL != reg && reg->mpool != NULL ) {
|
||||
reg->mpool->mpool_deregister(reg->mpool, reg);
|
||||
struct mca_btl_base_registration_handle_t *handle = sendreq->req_rdma[r].btl_reg;
|
||||
mca_bml_base_btl_t *bml_btl = sendreq->req_rdma[r].bml_btl;
|
||||
|
||||
if (NULL != handle) {
|
||||
mca_bml_base_deregister_mem (bml_btl, handle);
|
||||
sendreq->req_rdma[r].btl_reg = NULL;
|
||||
}
|
||||
}
|
||||
sendreq->req_rdma_cnt = 0;
|
||||
@ -218,10 +220,14 @@ do {
|
||||
|
||||
#define MCA_PML_OB1_SEND_REQUEST_RETURN(sendreq) \
|
||||
do { \
|
||||
/* Let the base handle the reference counts */ \
|
||||
MCA_PML_BASE_SEND_REQUEST_FINI((&(sendreq)->req_send)); \
|
||||
OMPI_FREE_LIST_RETURN_MT( &mca_pml_base_send_requests, \
|
||||
(ompi_free_list_item_t*)sendreq); \
|
||||
/* Let the base handle the reference counts */ \
|
||||
MCA_PML_BASE_SEND_REQUEST_FINI((&(sendreq)->req_send)); \
|
||||
if (sendreq->rdma_frag) { \
|
||||
MCA_PML_OB1_RDMA_FRAG_RETURN (sendreq->rdma_frag); \
|
||||
sendreq->rdma_frag = NULL; \
|
||||
} \
|
||||
OMPI_FREE_LIST_RETURN_MT( &mca_pml_base_send_requests, \
|
||||
(ompi_free_list_item_t*)sendreq); \
|
||||
} while(0)
|
||||
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -217,6 +217,14 @@ static inline void opal_convertor_get_current_pointer( const opal_convertor_t* p
|
||||
*position = (void*)base;
|
||||
}
|
||||
|
||||
static inline void opal_convertor_get_offset_pointer( const opal_convertor_t* pConv,
|
||||
size_t offset, void** position )
|
||||
{
|
||||
unsigned char* base = pConv->pBaseBuf + offset + pConv->pDesc->true_lb;
|
||||
*position = (void*)base;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
*/
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user