- added get based protocol (if supported by btl) for pre-registered memory
- removed 8 bytes from the majority of the pml headers This commit was SVN r6916.
Этот коммит содержится в:
родитель
e8c103ac1f
Коммит
f274f524ab
@ -247,6 +247,15 @@ static inline int mca_bml_base_put(mca_bml_base_btl_t* bml_btl, mca_btl_base_des
|
|||||||
des);
|
des);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int mca_bml_base_get(mca_bml_base_btl_t* bml_btl, mca_btl_base_descriptor_t* des) {
|
||||||
|
des->des_context = (void*) bml_btl;
|
||||||
|
return bml_btl->btl_get(
|
||||||
|
bml_btl->btl,
|
||||||
|
bml_btl->btl_endpoint,
|
||||||
|
des);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline void mca_bml_base_prepare_src(mca_bml_base_btl_t* bml_btl,
|
static inline void mca_bml_base_prepare_src(mca_bml_base_btl_t* bml_btl,
|
||||||
mca_mpool_base_registration_t* reg,
|
mca_mpool_base_registration_t* reg,
|
||||||
struct ompi_convertor_t* conv,
|
struct ompi_convertor_t* conv,
|
||||||
|
@ -353,7 +353,7 @@ int mca_bml_r2_add_procs(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* check flags - is rdma prefered */
|
/* check flags - is rdma prefered */
|
||||||
if(btl->btl_flags & MCA_BTL_FLAGS_RDMA &&
|
if(btl->btl_flags & (MCA_BTL_FLAGS_PUT|MCA_BTL_FLAGS_GET) &&
|
||||||
proc->proc_arch == ompi_proc_local_proc->proc_arch) {
|
proc->proc_arch == ompi_proc_local_proc->proc_arch) {
|
||||||
mca_bml_base_btl_t* bml_btl_rdma = mca_bml_base_btl_array_insert(&bml_endpoint->btl_rdma);
|
mca_bml_base_btl_t* bml_btl_rdma = mca_bml_base_btl_array_insert(&bml_endpoint->btl_rdma);
|
||||||
*bml_btl_rdma = *bml_btl;
|
*bml_btl_rdma = *bml_btl;
|
||||||
|
@ -134,7 +134,8 @@ typedef uint8_t mca_btl_base_tag_t;
|
|||||||
|
|
||||||
/* prefered protocol */
|
/* prefered protocol */
|
||||||
#define MCA_BTL_FLAGS_SEND 0x1
|
#define MCA_BTL_FLAGS_SEND 0x1
|
||||||
#define MCA_BTL_FLAGS_RDMA 0x2
|
#define MCA_BTL_FLAGS_PUT 0x2
|
||||||
|
#define MCA_BTL_FLAGS_GET 0x4
|
||||||
|
|
||||||
/* btl can send directly from user buffer w/out registration */
|
/* btl can send directly from user buffer w/out registration */
|
||||||
#define MCA_BTL_FLAGS_SEND_INPLACE 0x10000000
|
#define MCA_BTL_FLAGS_SEND_INPLACE 0x10000000
|
||||||
|
@ -151,7 +151,7 @@ int mca_btl_gm_component_open(void)
|
|||||||
mca_btl_gm_param_register_int("max_rdma_size", 128*1024);
|
mca_btl_gm_param_register_int("max_rdma_size", 128*1024);
|
||||||
#if OMPI_MCA_BTL_GM_SUPPORT_REGISTERING && OMPI_MCA_BTL_GM_HAVE_RDMA_PUT
|
#if OMPI_MCA_BTL_GM_SUPPORT_REGISTERING && OMPI_MCA_BTL_GM_HAVE_RDMA_PUT
|
||||||
mca_btl_gm_module.super.btl_flags =
|
mca_btl_gm_module.super.btl_flags =
|
||||||
mca_btl_gm_param_register_int("flags", MCA_BTL_FLAGS_RDMA);
|
mca_btl_gm_param_register_int("flags", MCA_BTL_FLAGS_PUT);
|
||||||
#else
|
#else
|
||||||
mca_btl_gm_module.super.btl_flags = MCA_BTL_FLAGS_SEND;
|
mca_btl_gm_module.super.btl_flags = MCA_BTL_FLAGS_SEND;
|
||||||
#endif
|
#endif
|
||||||
|
@ -55,7 +55,7 @@ mca_btl_mvapi_module_t mca_btl_mvapi_module = {
|
|||||||
mca_btl_mvapi_prepare_dst,
|
mca_btl_mvapi_prepare_dst,
|
||||||
mca_btl_mvapi_send,
|
mca_btl_mvapi_send,
|
||||||
mca_btl_mvapi_put,
|
mca_btl_mvapi_put,
|
||||||
NULL /* get */
|
mca_btl_mvapi_get
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -770,8 +770,8 @@ int mca_btl_mvapi_send(
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
int mca_btl_mvapi_put( mca_btl_base_module_t* btl,
|
int mca_btl_mvapi_put( mca_btl_base_module_t* btl,
|
||||||
mca_btl_base_endpoint_t* endpoint,
|
mca_btl_base_endpoint_t* endpoint,
|
||||||
mca_btl_base_descriptor_t* descriptor)
|
mca_btl_base_descriptor_t* descriptor)
|
||||||
{
|
{
|
||||||
mca_btl_mvapi_module_t* mvapi_btl = (mca_btl_mvapi_module_t*) btl;
|
mca_btl_mvapi_module_t* mvapi_btl = (mca_btl_mvapi_module_t*) btl;
|
||||||
mca_btl_mvapi_frag_t* frag = (mca_btl_mvapi_frag_t*) descriptor;
|
mca_btl_mvapi_frag_t* frag = (mca_btl_mvapi_frag_t*) descriptor;
|
||||||
@ -801,7 +801,42 @@ int mca_btl_mvapi_put( mca_btl_base_module_t* btl,
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* RDMA read remote buffer to local buffer address.
|
||||||
|
*/
|
||||||
|
|
||||||
|
int mca_btl_mvapi_get( mca_btl_base_module_t* btl,
|
||||||
|
mca_btl_base_endpoint_t* endpoint,
|
||||||
|
mca_btl_base_descriptor_t* descriptor)
|
||||||
|
{
|
||||||
|
mca_btl_mvapi_module_t* mvapi_btl = (mca_btl_mvapi_module_t*) btl;
|
||||||
|
mca_btl_mvapi_frag_t* frag = (mca_btl_mvapi_frag_t*) descriptor;
|
||||||
|
frag->endpoint = endpoint;
|
||||||
|
frag->sr_desc.opcode = VAPI_RDMA_READ;
|
||||||
|
|
||||||
|
frag->sr_desc.remote_qp = endpoint->rem_qp_num_low;
|
||||||
|
frag->sr_desc.remote_addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->base.des_src->seg_addr.pval;
|
||||||
|
frag->sr_desc.r_key = frag->base.des_src->seg_key.key32[0];
|
||||||
|
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->base.des_dst->seg_addr.pval;
|
||||||
|
frag->sg_entry.len = frag->base.des_dst->seg_len;
|
||||||
|
|
||||||
|
frag->ret = VAPI_post_sr(mvapi_btl->nic,
|
||||||
|
endpoint->lcl_qp_hndl_low,
|
||||||
|
&frag->sr_desc);
|
||||||
|
if(VAPI_OK != frag->ret){
|
||||||
|
return OMPI_ERROR;
|
||||||
|
}
|
||||||
|
if(mca_btl_mvapi_component.use_srq) {
|
||||||
|
MCA_BTL_MVAPI_POST_SRR_HIGH(mvapi_btl, 1);
|
||||||
|
MCA_BTL_MVAPI_POST_SRR_LOW(mvapi_btl, 1);
|
||||||
|
} else {
|
||||||
|
MCA_BTL_MVAPI_ENDPOINT_POST_RR_HIGH(endpoint, 1);
|
||||||
|
MCA_BTL_MVAPI_ENDPOINT_POST_RR_LOW(endpoint, 1);
|
||||||
|
}
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Asynchronous event handler to detect unforseen
|
* Asynchronous event handler to detect unforseen
|
||||||
|
@ -395,6 +395,24 @@ extern int mca_btl_mvapi_put(
|
|||||||
struct mca_btl_base_descriptor_t* decriptor
|
struct mca_btl_base_descriptor_t* decriptor
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PML->BTL Initiate a get of the specified size.
|
||||||
|
*
|
||||||
|
* @param btl (IN) BTL instance
|
||||||
|
* @param btl_base_peer (IN) BTL peer addressing
|
||||||
|
* @param send_request (IN/OUT) Send request (allocated by PML via mca_btl_base_request_alloc_fn_t)
|
||||||
|
* @param size (IN) Number of bytes PML is requesting BTL to deliver
|
||||||
|
* @param flags (IN) Flags that should be passed to the peer via the message header.
|
||||||
|
* @param request (OUT) OMPI_SUCCESS if the BTL was able to queue one or more fragments
|
||||||
|
*/
|
||||||
|
extern int mca_btl_mvapi_get(
|
||||||
|
struct mca_btl_base_module_t* btl,
|
||||||
|
struct mca_btl_base_endpoint_t* btl_peer,
|
||||||
|
struct mca_btl_base_descriptor_t* decriptor
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Allocate a descriptor.
|
* Allocate a descriptor.
|
||||||
*
|
*
|
||||||
|
@ -201,7 +201,7 @@ int mca_btl_mvapi_component_open(void)
|
|||||||
1024*1024);
|
1024*1024);
|
||||||
mca_btl_mvapi_module.super.btl_flags =
|
mca_btl_mvapi_module.super.btl_flags =
|
||||||
mca_btl_mvapi_param_register_int("flags",
|
mca_btl_mvapi_param_register_int("flags",
|
||||||
MCA_BTL_FLAGS_RDMA);
|
MCA_BTL_FLAGS_PUT|MCA_BTL_FLAGS_GET);
|
||||||
|
|
||||||
|
|
||||||
param = mca_base_param_find("mpi", NULL, "leave_pinned");
|
param = mca_base_param_find("mpi", NULL, "leave_pinned");
|
||||||
@ -574,6 +574,7 @@ int mca_btl_mvapi_component_progress()
|
|||||||
BTL_ERROR(("Got an RDMA with Immediate data!, not supported!"));
|
BTL_ERROR(("Got an RDMA with Immediate data!, not supported!"));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
|
|
||||||
|
case VAPI_CQE_SQ_RDMA_READ:
|
||||||
case VAPI_CQE_SQ_RDMA_WRITE:
|
case VAPI_CQE_SQ_RDMA_WRITE:
|
||||||
case VAPI_CQE_SQ_SEND_DATA :
|
case VAPI_CQE_SQ_SEND_DATA :
|
||||||
|
|
||||||
@ -632,6 +633,7 @@ int mca_btl_mvapi_component_progress()
|
|||||||
BTL_ERROR(("Got an RDMA with Immediate data!, not supported!"));
|
BTL_ERROR(("Got an RDMA with Immediate data!, not supported!"));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
|
|
||||||
|
case VAPI_CQE_SQ_RDMA_READ:
|
||||||
case VAPI_CQE_SQ_RDMA_WRITE:
|
case VAPI_CQE_SQ_RDMA_WRITE:
|
||||||
case VAPI_CQE_SQ_SEND_DATA :
|
case VAPI_CQE_SQ_SEND_DATA :
|
||||||
|
|
||||||
|
@ -126,7 +126,7 @@ int mca_btl_mx_component_open(void)
|
|||||||
mca_btl_mx_module.super.btl_max_rdma_size =
|
mca_btl_mx_module.super.btl_max_rdma_size =
|
||||||
mca_btl_mx_param_register_int("max_rdma_size", 1024*1024);
|
mca_btl_mx_param_register_int("max_rdma_size", 1024*1024);
|
||||||
mca_btl_mx_module.super.btl_flags =
|
mca_btl_mx_module.super.btl_flags =
|
||||||
mca_btl_mx_param_register_int("flags", MCA_BTL_FLAGS_RDMA);
|
mca_btl_mx_param_register_int("flags", MCA_BTL_FLAGS_PUT);
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -204,7 +204,7 @@ int mca_btl_openib_component_open(void)
|
|||||||
1024*1024);
|
1024*1024);
|
||||||
mca_btl_openib_module.super.btl_flags =
|
mca_btl_openib_module.super.btl_flags =
|
||||||
mca_btl_openib_param_register_int("flags",
|
mca_btl_openib_param_register_int("flags",
|
||||||
MCA_BTL_FLAGS_RDMA);
|
MCA_BTL_FLAGS_PUT);
|
||||||
|
|
||||||
|
|
||||||
param = mca_base_param_find("mpi", NULL, "leave_pinned");
|
param = mca_base_param_find("mpi", NULL, "leave_pinned");
|
||||||
|
@ -202,7 +202,7 @@ mca_btl_portals_component_open(void)
|
|||||||
&dummy);
|
&dummy);
|
||||||
mca_btl_portals_module.super.btl_bandwidth = dummy;
|
mca_btl_portals_module.super.btl_bandwidth = dummy;
|
||||||
|
|
||||||
mca_btl_portals_module.super.btl_flags = MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE;
|
mca_btl_portals_module.super.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND_INPLACE;
|
||||||
|
|
||||||
mca_btl_portals_module.portals_num_procs = 0;
|
mca_btl_portals_module.portals_num_procs = 0;
|
||||||
bzero(&(mca_btl_portals_module.portals_reg),
|
bzero(&(mca_btl_portals_module.portals_reg),
|
||||||
|
@ -119,7 +119,7 @@ int mca_btl_self_component_open(void)
|
|||||||
mca_btl_self.btl_exclusivity =
|
mca_btl_self.btl_exclusivity =
|
||||||
mca_btl_self_param_register_int("exclusivity", 64*1024);
|
mca_btl_self_param_register_int("exclusivity", 64*1024);
|
||||||
mca_btl_self.btl_flags =
|
mca_btl_self.btl_flags =
|
||||||
mca_btl_self_param_register_int("flags", MCA_BTL_FLAGS_RDMA);
|
mca_btl_self_param_register_int("flags", MCA_BTL_FLAGS_PUT);
|
||||||
|
|
||||||
/* initialize objects */
|
/* initialize objects */
|
||||||
OBJ_CONSTRUCT(&mca_btl_self_component.self_lock, opal_mutex_t);
|
OBJ_CONSTRUCT(&mca_btl_self_component.self_lock, opal_mutex_t);
|
||||||
|
@ -218,7 +218,7 @@ int mca_btl_tcp_component_open(void)
|
|||||||
mca_btl_tcp_module.super.btl_max_rdma_size =
|
mca_btl_tcp_module.super.btl_max_rdma_size =
|
||||||
mca_btl_tcp_param_register_int("max_rdma_size", INT_MAX);
|
mca_btl_tcp_param_register_int("max_rdma_size", INT_MAX);
|
||||||
mca_btl_tcp_module.super.btl_flags =
|
mca_btl_tcp_module.super.btl_flags =
|
||||||
mca_btl_tcp_param_register_int("flags", MCA_BTL_FLAGS_RDMA|MCA_BTL_FLAGS_SEND_INPLACE);
|
mca_btl_tcp_param_register_int("flags", MCA_BTL_FLAGS_PUT|MCA_BTL_FLAGS_SEND_INPLACE);
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -128,7 +128,7 @@ int mca_btl_template_component_open(void)
|
|||||||
mca_btl_template_module.super.btl_max_rdma_size =
|
mca_btl_template_module.super.btl_max_rdma_size =
|
||||||
mca_btl_template_param_register_int("max_rdma_size", 1024*1024);
|
mca_btl_template_param_register_int("max_rdma_size", 1024*1024);
|
||||||
mca_btl_template_module.super.btl_flags =
|
mca_btl_template_module.super.btl_flags =
|
||||||
mca_btl_template_param_register_int("flags", MCA_BTL_FLAGS_RDMA);
|
mca_btl_template_param_register_int("flags", MCA_BTL_FLAGS_PUT);
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -82,7 +82,7 @@ int mca_mpool_mvapi_register(mca_mpool_base_module_t* mpool,
|
|||||||
vapi_reg->hndl = VAPI_INVAL_HNDL;
|
vapi_reg->hndl = VAPI_INVAL_HNDL;
|
||||||
|
|
||||||
|
|
||||||
mr_in.acl = VAPI_EN_LOCAL_WRITE | VAPI_EN_REMOTE_WRITE;
|
mr_in.acl = VAPI_EN_LOCAL_WRITE | VAPI_EN_REMOTE_WRITE | VAPI_EN_REMOTE_READ;
|
||||||
mr_in.l_key = 0;
|
mr_in.l_key = 0;
|
||||||
mr_in.r_key = 0;
|
mr_in.r_key = 0;
|
||||||
mr_in.pd_hndl = mpool_module->hca_pd.pd_tag;
|
mr_in.pd_hndl = mpool_module->hca_pd.pd_tag;
|
||||||
|
@ -20,7 +20,6 @@
|
|||||||
#define MCA_PML_OB1_HEADER_H
|
#define MCA_PML_OB1_HEADER_H
|
||||||
|
|
||||||
#include "ompi_config.h"
|
#include "ompi_config.h"
|
||||||
#include "mca/ptl/ptl.h"
|
|
||||||
#ifdef HAVE_SYS_TYPES_H
|
#ifdef HAVE_SYS_TYPES_H
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#endif
|
#endif
|
||||||
@ -28,15 +27,16 @@
|
|||||||
#include <netinet/in.h>
|
#include <netinet/in.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define MCA_PML_OB1_HDR_TYPE_MATCH 1
|
#define MCA_PML_OB1_HDR_TYPE_MATCH 1
|
||||||
#define MCA_PML_OB1_HDR_TYPE_RNDV 2
|
#define MCA_PML_OB1_HDR_TYPE_RNDV 2
|
||||||
#define MCA_PML_OB1_HDR_TYPE_ACK 3
|
#define MCA_PML_OB1_HDR_TYPE_RGET 3
|
||||||
#define MCA_PML_OB1_HDR_TYPE_NACK 4
|
#define MCA_PML_OB1_HDR_TYPE_ACK 4
|
||||||
#define MCA_PML_OB1_HDR_TYPE_FRAG 5
|
#define MCA_PML_OB1_HDR_TYPE_NACK 5
|
||||||
#define MCA_PML_OB1_HDR_TYPE_GET 6
|
#define MCA_PML_OB1_HDR_TYPE_FRAG 6
|
||||||
#define MCA_PML_OB1_HDR_TYPE_PUT 7
|
#define MCA_PML_OB1_HDR_TYPE_GET 7
|
||||||
#define MCA_PML_OB1_HDR_TYPE_FIN 8
|
#define MCA_PML_OB1_HDR_TYPE_PUT 8
|
||||||
#define MCA_PML_OB1_HDR_TYPE_MAX 9
|
#define MCA_PML_OB1_HDR_TYPE_FIN 9
|
||||||
|
#define MCA_PML_OB1_HDR_TYPE_MAX 10
|
||||||
|
|
||||||
#define MCA_PML_OB1_HDR_FLAGS_ACK 1 /* is an ack required */
|
#define MCA_PML_OB1_HDR_FLAGS_ACK 1 /* is an ack required */
|
||||||
#define MCA_PML_OB1_HDR_FLAGS_NBO 2 /* is the hdr in network byte order */
|
#define MCA_PML_OB1_HDR_FLAGS_NBO 2 /* is the hdr in network byte order */
|
||||||
@ -93,7 +93,6 @@ struct mca_pml_ob1_common_hdr_t {
|
|||||||
typedef struct mca_pml_ob1_common_hdr_t mca_pml_ob1_common_hdr_t;
|
typedef struct mca_pml_ob1_common_hdr_t mca_pml_ob1_common_hdr_t;
|
||||||
|
|
||||||
#define MCA_PML_OB1_COMMON_HDR_NTOH(h)
|
#define MCA_PML_OB1_COMMON_HDR_NTOH(h)
|
||||||
|
|
||||||
#define MCA_PML_OB1_COMMON_HDR_HTON(h)
|
#define MCA_PML_OB1_COMMON_HDR_HTON(h)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -102,35 +101,32 @@ typedef struct mca_pml_ob1_common_hdr_t mca_pml_ob1_common_hdr_t;
|
|||||||
*/
|
*/
|
||||||
struct mca_pml_ob1_match_hdr_t {
|
struct mca_pml_ob1_match_hdr_t {
|
||||||
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
|
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
|
||||||
uint16_t hdr_contextid; /**< communicator index */
|
uint16_t hdr_ctx; /**< communicator index */
|
||||||
int32_t hdr_src; /**< source rank */
|
int32_t hdr_src; /**< source rank */
|
||||||
int32_t hdr_dst; /**< destination rank */
|
int32_t hdr_dst; /**< destination rank */
|
||||||
int32_t hdr_tag; /**< user tag */
|
int32_t hdr_tag; /**< user tag */
|
||||||
uint64_t hdr_msg_length; /**< message length */
|
uint16_t hdr_seq; /**< message sequence number */
|
||||||
uint16_t hdr_msg_seq; /**< message sequence number */
|
|
||||||
};
|
};
|
||||||
typedef struct mca_pml_ob1_match_hdr_t mca_pml_ob1_match_hdr_t;
|
typedef struct mca_pml_ob1_match_hdr_t mca_pml_ob1_match_hdr_t;
|
||||||
|
|
||||||
#define MCA_PML_OB1_MATCH_HDR_NTOH(h) \
|
#define MCA_PML_OB1_MATCH_HDR_NTOH(h) \
|
||||||
do { \
|
do { \
|
||||||
MCA_PML_OB1_COMMON_HDR_NTOH((h).hdr_common); \
|
MCA_PML_OB1_COMMON_HDR_NTOH((h).hdr_common); \
|
||||||
(h).hdr_contextid = ntohs((h).hdr_contextid); \
|
(h).hdr_ctx = ntohs((h).hdr_ctx); \
|
||||||
(h).hdr_src = ntohl((h).hdr_src); \
|
(h).hdr_src = ntohl((h).hdr_src); \
|
||||||
(h).hdr_dst = ntohl((h).hdr_dst); \
|
(h).hdr_dst = ntohl((h).hdr_dst); \
|
||||||
(h).hdr_tag = ntohl((h).hdr_tag); \
|
(h).hdr_tag = ntohl((h).hdr_tag); \
|
||||||
(h).hdr_msg_length = ntoh64((h).hdr_msg_length); \
|
(h).hdr_seq = ntohs((h).hdr_seq); \
|
||||||
(h).hdr_msg_seq = ntohs((h).hdr_msg_seq); \
|
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define MCA_PML_OB1_MATCH_HDR_HTON(h) \
|
#define MCA_PML_OB1_MATCH_HDR_HTON(h) \
|
||||||
do { \
|
do { \
|
||||||
MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \
|
MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \
|
||||||
(h).hdr_contextid = htons((h).hdr_contextid); \
|
(h).hdr_ctx = htons((h).hdr_ctx); \
|
||||||
(h).hdr_src = htonl((h).hdr_src); \
|
(h).hdr_src = htonl((h).hdr_src); \
|
||||||
(h).hdr_dst = htonl((h).hdr_dst); \
|
(h).hdr_dst = htonl((h).hdr_dst); \
|
||||||
(h).hdr_tag = htonl((h).hdr_tag); \
|
(h).hdr_tag = htonl((h).hdr_tag); \
|
||||||
(h).hdr_msg_length = hton64((h).hdr_msg_length); \
|
(h).hdr_seq = htons((h).hdr_seq); \
|
||||||
(h).hdr_msg_seq = htons((h).hdr_msg_seq); \
|
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -140,7 +136,7 @@ typedef struct mca_pml_ob1_match_hdr_t mca_pml_ob1_match_hdr_t;
|
|||||||
*/
|
*/
|
||||||
struct mca_pml_ob1_rendezvous_hdr_t {
|
struct mca_pml_ob1_rendezvous_hdr_t {
|
||||||
mca_pml_ob1_match_hdr_t hdr_match;
|
mca_pml_ob1_match_hdr_t hdr_match;
|
||||||
uint64_t hdr_frag_length; /**< fragment length */
|
uint64_t hdr_msg_length; /**< message length */
|
||||||
ompi_ptr_t hdr_src_req; /**< pointer to source request - returned in ack */
|
ompi_ptr_t hdr_src_req; /**< pointer to source request - returned in ack */
|
||||||
};
|
};
|
||||||
typedef struct mca_pml_ob1_rendezvous_hdr_t mca_pml_ob1_rendezvous_hdr_t;
|
typedef struct mca_pml_ob1_rendezvous_hdr_t mca_pml_ob1_rendezvous_hdr_t;
|
||||||
@ -148,21 +144,31 @@ typedef struct mca_pml_ob1_rendezvous_hdr_t mca_pml_ob1_rendezvous_hdr_t;
|
|||||||
#define MCA_PML_OB1_RNDV_HDR_NTOH(h) \
|
#define MCA_PML_OB1_RNDV_HDR_NTOH(h) \
|
||||||
do { \
|
do { \
|
||||||
MCA_PML_OB1_MATCH_HDR_NTOH((h).hdr_match); \
|
MCA_PML_OB1_MATCH_HDR_NTOH((h).hdr_match); \
|
||||||
(h).hdr_frag_length = ntoh64((h).hdr_frag_length); \
|
(h).hdr_msg_length = ntoh64((h).hdr_msg_length); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define MCA_PML_OB1_RNDV_HDR_HTON(h) \
|
#define MCA_PML_OB1_RNDV_HDR_HTON(h) \
|
||||||
do { \
|
do { \
|
||||||
MCA_PML_OB1_MATCH_HDR_HTON((h).hdr_match); \
|
MCA_PML_OB1_MATCH_HDR_HTON((h).hdr_match); \
|
||||||
(h).hdr_frag_length = hton64((h).hdr_frag_length); \
|
(h).hdr_msg_length = hton64((h).hdr_msg_length); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Header definition for a combined rdma rendezvous/get
|
||||||
|
*/
|
||||||
|
struct mca_pml_ob1_rget_hdr_t {
|
||||||
|
mca_pml_ob1_rendezvous_hdr_t hdr_rndv;
|
||||||
|
ompi_ptr_t hdr_des; /**< source descriptor */
|
||||||
|
uint32_t hdr_seg_cnt; /**< number of segments for rdma */
|
||||||
|
mca_btl_base_segment_t hdr_segs[1]; /**< list of segments for rdma */
|
||||||
|
};
|
||||||
|
typedef struct mca_pml_ob1_rget_hdr_t mca_pml_ob1_rget_hdr_t;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Header for subsequent fragments.
|
* Header for subsequent fragments.
|
||||||
*/
|
*/
|
||||||
struct mca_pml_ob1_frag_hdr_t {
|
struct mca_pml_ob1_frag_hdr_t {
|
||||||
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
|
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
|
||||||
uint64_t hdr_frag_length; /**< fragment length */
|
|
||||||
uint64_t hdr_frag_offset; /**< offset into message */
|
uint64_t hdr_frag_offset; /**< offset into message */
|
||||||
ompi_ptr_t hdr_src_req; /**< pointer to source request */
|
ompi_ptr_t hdr_src_req; /**< pointer to source request */
|
||||||
ompi_ptr_t hdr_dst_req; /**< pointer to matched receive */
|
ompi_ptr_t hdr_dst_req; /**< pointer to matched receive */
|
||||||
@ -172,14 +178,12 @@ typedef struct mca_pml_ob1_frag_hdr_t mca_pml_ob1_frag_hdr_t;
|
|||||||
#define MCA_PML_OB1_FRAG_HDR_NTOH(h) \
|
#define MCA_PML_OB1_FRAG_HDR_NTOH(h) \
|
||||||
do { \
|
do { \
|
||||||
MCA_PML_OB1_COMMON_HDR_NTOH((h).hdr_common); \
|
MCA_PML_OB1_COMMON_HDR_NTOH((h).hdr_common); \
|
||||||
(h).hdr_frag_length = ntoh64((h).hdr_frag_length); \
|
|
||||||
(h).hdr_frag_offset = ntoh64((h).hdr_frag_offset); \
|
(h).hdr_frag_offset = ntoh64((h).hdr_frag_offset); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define MCA_PML_OB1_FRAG_HDR_HTON(h) \
|
#define MCA_PML_OB1_FRAG_HDR_HTON(h) \
|
||||||
do { \
|
do { \
|
||||||
MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \
|
MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \
|
||||||
(h).hdr_frag_length = hton64((h).hdr_frag_length); \
|
|
||||||
(h).hdr_frag_offset = hton64((h).hdr_frag_offset); \
|
(h).hdr_frag_offset = hton64((h).hdr_frag_offset); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
@ -214,8 +218,8 @@ typedef struct mca_pml_ob1_ack_hdr_t mca_pml_ob1_ack_hdr_t;
|
|||||||
|
|
||||||
struct mca_pml_ob1_rdma_hdr_t {
|
struct mca_pml_ob1_rdma_hdr_t {
|
||||||
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
|
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
|
||||||
ompi_ptr_t hdr_src; /**< source request/descriptor */
|
ompi_ptr_t hdr_req; /**< destination request */
|
||||||
ompi_ptr_t hdr_dst; /**< receive request/descriptor */
|
ompi_ptr_t hdr_des; /**< source descriptor */
|
||||||
uint64_t hdr_rdma_offset; /**< current offset into user buffer */
|
uint64_t hdr_rdma_offset; /**< current offset into user buffer */
|
||||||
uint32_t hdr_seg_cnt; /**< number of segments for rdma */
|
uint32_t hdr_seg_cnt; /**< number of segments for rdma */
|
||||||
mca_btl_base_segment_t hdr_segs[1]; /**< list of segments for rdma */
|
mca_btl_base_segment_t hdr_segs[1]; /**< list of segments for rdma */
|
||||||
@ -228,10 +232,7 @@ typedef struct mca_pml_ob1_rdma_hdr_t mca_pml_ob1_rdma_hdr_t;
|
|||||||
|
|
||||||
struct mca_pml_ob1_fin_hdr_t {
|
struct mca_pml_ob1_fin_hdr_t {
|
||||||
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
|
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
|
||||||
ompi_ptr_t hdr_src; /**< source request/descriptor */
|
ompi_ptr_t hdr_des; /**< completed descriptor */
|
||||||
ompi_ptr_t hdr_dst; /**< receive request/descriptor */
|
|
||||||
uint64_t hdr_rdma_offset; /**< data offset */
|
|
||||||
uint64_t hdr_rdma_length; /**< number of segments for rdma */
|
|
||||||
};
|
};
|
||||||
typedef struct mca_pml_ob1_fin_hdr_t mca_pml_ob1_fin_hdr_t;
|
typedef struct mca_pml_ob1_fin_hdr_t mca_pml_ob1_fin_hdr_t;
|
||||||
|
|
||||||
@ -242,6 +243,7 @@ union mca_pml_ob1_hdr_t {
|
|||||||
mca_pml_ob1_common_hdr_t hdr_common;
|
mca_pml_ob1_common_hdr_t hdr_common;
|
||||||
mca_pml_ob1_match_hdr_t hdr_match;
|
mca_pml_ob1_match_hdr_t hdr_match;
|
||||||
mca_pml_ob1_rendezvous_hdr_t hdr_rndv;
|
mca_pml_ob1_rendezvous_hdr_t hdr_rndv;
|
||||||
|
mca_pml_ob1_rget_hdr_t hdr_rget;
|
||||||
mca_pml_ob1_frag_hdr_t hdr_frag;
|
mca_pml_ob1_frag_hdr_t hdr_frag;
|
||||||
mca_pml_ob1_ack_hdr_t hdr_ack;
|
mca_pml_ob1_ack_hdr_t hdr_ack;
|
||||||
mca_pml_ob1_rdma_hdr_t hdr_rdma;
|
mca_pml_ob1_rdma_hdr_t hdr_rdma;
|
||||||
|
@ -27,6 +27,7 @@ typedef enum {
|
|||||||
MCA_PML_OB1_RMDA_INIT,
|
MCA_PML_OB1_RMDA_INIT,
|
||||||
MCA_PML_OB1_RDMA_PREPARE,
|
MCA_PML_OB1_RDMA_PREPARE,
|
||||||
MCA_PML_OB1_RDMA_PUT,
|
MCA_PML_OB1_RDMA_PUT,
|
||||||
|
MCA_PML_OB1_RDMA_GET,
|
||||||
MCA_PML_OB1_RDMA_FIN
|
MCA_PML_OB1_RDMA_FIN
|
||||||
} mca_pml_ob1_rdma_state_t;
|
} mca_pml_ob1_rdma_state_t;
|
||||||
|
|
||||||
@ -37,7 +38,7 @@ struct mca_pml_ob1_rdma_frag_t {
|
|||||||
mca_pml_ob1_rdma_state_t rdma_state;
|
mca_pml_ob1_rdma_state_t rdma_state;
|
||||||
size_t rdma_length;
|
size_t rdma_length;
|
||||||
mca_btl_base_segment_t rdma_segs[MCA_BTL_DES_MAX_SEGMENTS];
|
mca_btl_base_segment_t rdma_segs[MCA_BTL_DES_MAX_SEGMENTS];
|
||||||
struct mca_pml_ob1_send_request_t* rdma_req;
|
void *rdma_req;
|
||||||
struct mca_bml_base_endpoint_t* rdma_ep;
|
struct mca_bml_base_endpoint_t* rdma_ep;
|
||||||
};
|
};
|
||||||
typedef struct mca_pml_ob1_rdma_frag_t mca_pml_ob1_rdma_frag_t;
|
typedef struct mca_pml_ob1_rdma_frag_t mca_pml_ob1_rdma_frag_t;
|
||||||
|
@ -69,6 +69,7 @@ void mca_pml_ob1_recv_frag_callback(
|
|||||||
switch(hdr->hdr_common.hdr_type) {
|
switch(hdr->hdr_common.hdr_type) {
|
||||||
case MCA_PML_OB1_HDR_TYPE_MATCH:
|
case MCA_PML_OB1_HDR_TYPE_MATCH:
|
||||||
case MCA_PML_OB1_HDR_TYPE_RNDV:
|
case MCA_PML_OB1_HDR_TYPE_RNDV:
|
||||||
|
case MCA_PML_OB1_HDR_TYPE_RGET:
|
||||||
{
|
{
|
||||||
mca_pml_ob1_recv_frag_match(btl, &hdr->hdr_match, segments,des->des_dst_cnt);
|
mca_pml_ob1_recv_frag_match(btl, &hdr->hdr_match, segments,des->des_dst_cnt);
|
||||||
break;
|
break;
|
||||||
@ -79,9 +80,6 @@ void mca_pml_ob1_recv_frag_callback(
|
|||||||
hdr->hdr_ack.hdr_src_req.pval;
|
hdr->hdr_ack.hdr_src_req.pval;
|
||||||
sendreq->req_recv = hdr->hdr_ack.hdr_dst_req;
|
sendreq->req_recv = hdr->hdr_ack.hdr_dst_req;
|
||||||
sendreq->req_rdma_offset = hdr->hdr_ack.hdr_rdma_offset;
|
sendreq->req_rdma_offset = hdr->hdr_ack.hdr_rdma_offset;
|
||||||
#if MCA_PML_OB1_TIMESTAMPS
|
|
||||||
sendreq->t_send1 = get_profiler_timestamp();
|
|
||||||
#endif
|
|
||||||
MCA_PML_OB1_SEND_REQUEST_ADVANCE(sendreq);
|
MCA_PML_OB1_SEND_REQUEST_ADVANCE(sendreq);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -89,32 +87,21 @@ void mca_pml_ob1_recv_frag_callback(
|
|||||||
{
|
{
|
||||||
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)
|
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)
|
||||||
hdr->hdr_frag.hdr_dst_req.pval;
|
hdr->hdr_frag.hdr_dst_req.pval;
|
||||||
mca_pml_ob1_recv_request_progress(recvreq,segments,des->des_dst_cnt);
|
mca_pml_ob1_recv_request_progress(recvreq,btl,segments,des->des_dst_cnt);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case MCA_PML_OB1_HDR_TYPE_PUT:
|
case MCA_PML_OB1_HDR_TYPE_PUT:
|
||||||
{
|
{
|
||||||
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)
|
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)
|
||||||
hdr->hdr_rdma.hdr_src.pval;
|
hdr->hdr_rdma.hdr_req.pval;
|
||||||
mca_pml_ob1_send_request_put(sendreq,btl,&hdr->hdr_rdma);
|
mca_pml_ob1_send_request_put(sendreq,btl,&hdr->hdr_rdma);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case MCA_PML_OB1_HDR_TYPE_FIN:
|
case MCA_PML_OB1_HDR_TYPE_FIN:
|
||||||
{
|
{
|
||||||
mca_btl_base_descriptor_t* dst = (mca_btl_base_descriptor_t*)
|
mca_btl_base_descriptor_t* rdma = (mca_btl_base_descriptor_t*)
|
||||||
hdr->hdr_fin.hdr_dst.pval;
|
hdr->hdr_fin.hdr_des.pval;
|
||||||
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)dst->des_cbdata;
|
rdma->des_cbfunc(btl, NULL, rdma, OMPI_SUCCESS);
|
||||||
#if MCA_PML_OB1_TIMESTAMPS
|
|
||||||
recvreq->fin1[recvreq->fin_index] = get_profiler_timestamp();
|
|
||||||
btl->btl_free(btl,dst);
|
|
||||||
recvreq->fin2[recvreq->fin_index] = get_profiler_timestamp();
|
|
||||||
recvreq->fin_index++;
|
|
||||||
|
|
||||||
mca_pml_ob1_recv_request_progress(recvreq,segments,des->des_dst_cnt);
|
|
||||||
#else
|
|
||||||
mca_pml_ob1_recv_request_progress(recvreq,segments,des->des_dst_cnt);
|
|
||||||
btl->btl_free(btl,dst);
|
|
||||||
#endif
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@ -426,11 +413,11 @@ int mca_pml_ob1_recv_frag_match(
|
|||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
/* communicator pointer */
|
/* communicator pointer */
|
||||||
comm_ptr=ompi_comm_lookup(hdr->hdr_contextid);
|
comm_ptr=ompi_comm_lookup(hdr->hdr_ctx);
|
||||||
comm=(mca_pml_ob1_comm_t *)comm_ptr->c_pml_comm;
|
comm=(mca_pml_ob1_comm_t *)comm_ptr->c_pml_comm;
|
||||||
|
|
||||||
/* source sequence number */
|
/* source sequence number */
|
||||||
frag_msg_seq = hdr->hdr_msg_seq;
|
frag_msg_seq = hdr->hdr_seq;
|
||||||
proc = comm->procs + hdr->hdr_src;
|
proc = comm->procs + hdr->hdr_src;
|
||||||
|
|
||||||
/* get next expected message sequence number - if threaded
|
/* get next expected message sequence number - if threaded
|
||||||
@ -497,7 +484,7 @@ int mca_pml_ob1_recv_frag_match(
|
|||||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
MCA_PML_OB1_RECV_FRAG_INIT(frag,hdr,segments,num_segments);
|
MCA_PML_OB1_RECV_FRAG_INIT(frag,hdr,segments,num_segments,btl);
|
||||||
opal_list_append( &proc->unexpected_frags, (opal_list_item_t *)frag );
|
opal_list_append( &proc->unexpected_frags, (opal_list_item_t *)frag );
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -522,7 +509,7 @@ int mca_pml_ob1_recv_frag_match(
|
|||||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
MCA_PML_OB1_RECV_FRAG_INIT(frag,hdr,segments,num_segments);
|
MCA_PML_OB1_RECV_FRAG_INIT(frag,hdr,segments,num_segments,btl);
|
||||||
opal_list_append(&proc->frags_cant_match, (opal_list_item_t *)frag);
|
opal_list_append(&proc->frags_cant_match, (opal_list_item_t *)frag);
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -531,15 +518,13 @@ int mca_pml_ob1_recv_frag_match(
|
|||||||
|
|
||||||
/* release matching lock before processing fragment */
|
/* release matching lock before processing fragment */
|
||||||
if(match != NULL) {
|
if(match != NULL) {
|
||||||
MCA_PML_OB1_RECV_REQUEST_MATCHED(match, hdr);
|
mca_pml_ob1_recv_request_progress(match,btl,segments,num_segments);
|
||||||
mca_pml_ob1_recv_request_progress(match,segments,num_segments);
|
|
||||||
}
|
}
|
||||||
if(additional_match) {
|
if(additional_match) {
|
||||||
opal_list_item_t* item;
|
opal_list_item_t* item;
|
||||||
while(NULL != (item = opal_list_remove_first(&additional_matches))) {
|
while(NULL != (item = opal_list_remove_first(&additional_matches))) {
|
||||||
mca_pml_ob1_recv_frag_t* frag = (mca_pml_ob1_recv_frag_t*)item;
|
mca_pml_ob1_recv_frag_t* frag = (mca_pml_ob1_recv_frag_t*)item;
|
||||||
MCA_PML_OB1_RECV_REQUEST_MATCHED(frag->request, hdr);
|
mca_pml_ob1_recv_request_progress(frag->request,frag->btl,frag->segments,frag->num_segments);
|
||||||
mca_pml_ob1_recv_request_progress(frag->request,frag->segments,frag->num_segments);
|
|
||||||
MCA_PML_OB1_RECV_FRAG_RETURN(frag);
|
MCA_PML_OB1_RECV_FRAG_RETURN(frag);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -601,7 +586,7 @@ static bool mca_pml_ob1_check_cantmatch_for_match(
|
|||||||
/*
|
/*
|
||||||
* If the message has the next expected seq from that proc...
|
* If the message has the next expected seq from that proc...
|
||||||
*/
|
*/
|
||||||
frag_seq=frag->hdr.hdr_match.hdr_msg_seq;
|
frag_seq=frag->hdr.hdr_match.hdr_seq;
|
||||||
if (frag_seq == next_msg_seq_expected) {
|
if (frag_seq == next_msg_seq_expected) {
|
||||||
mca_pml_ob1_match_hdr_t* hdr = &frag->hdr.hdr_match;
|
mca_pml_ob1_match_hdr_t* hdr = &frag->hdr.hdr_match;
|
||||||
|
|
||||||
|
@ -38,6 +38,7 @@ struct mca_pml_ob1_recv_frag_t {
|
|||||||
mca_pml_ob1_hdr_t hdr;
|
mca_pml_ob1_hdr_t hdr;
|
||||||
struct mca_pml_ob1_recv_request_t* request;
|
struct mca_pml_ob1_recv_request_t* request;
|
||||||
size_t num_segments;
|
size_t num_segments;
|
||||||
|
mca_btl_base_module_t* btl;
|
||||||
mca_btl_base_segment_t segments[MCA_BTL_DES_MAX_SEGMENTS];
|
mca_btl_base_segment_t segments[MCA_BTL_DES_MAX_SEGMENTS];
|
||||||
mca_pml_ob1_buffer_t* buffers[MCA_BTL_DES_MAX_SEGMENTS];
|
mca_pml_ob1_buffer_t* buffers[MCA_BTL_DES_MAX_SEGMENTS];
|
||||||
};
|
};
|
||||||
@ -54,13 +55,14 @@ do { \
|
|||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
|
|
||||||
#define MCA_PML_OB1_RECV_FRAG_INIT(frag, hdr,segs,cnt) \
|
#define MCA_PML_OB1_RECV_FRAG_INIT(frag, hdr,segs,cnt,btl) \
|
||||||
do { \
|
do { \
|
||||||
size_t i; \
|
size_t i; \
|
||||||
mca_btl_base_segment_t* macro_segments = frag->segments; \
|
mca_btl_base_segment_t* macro_segments = frag->segments; \
|
||||||
mca_pml_ob1_buffer_t** buffers = frag->buffers; \
|
mca_pml_ob1_buffer_t** buffers = frag->buffers; \
|
||||||
\
|
\
|
||||||
/* init recv_frag */ \
|
/* init recv_frag */ \
|
||||||
|
frag->btl = btl; \
|
||||||
frag->hdr = *(mca_pml_ob1_hdr_t*)hdr; \
|
frag->hdr = *(mca_pml_ob1_hdr_t*)hdr; \
|
||||||
frag->num_segments = cnt; \
|
frag->num_segments = cnt; \
|
||||||
/* copy over data */ \
|
/* copy over data */ \
|
||||||
|
@ -24,7 +24,9 @@
|
|||||||
#include "pml_ob1_recvreq.h"
|
#include "pml_ob1_recvreq.h"
|
||||||
#include "pml_ob1_recvfrag.h"
|
#include "pml_ob1_recvfrag.h"
|
||||||
#include "pml_ob1_sendreq.h"
|
#include "pml_ob1_sendreq.h"
|
||||||
|
#include "pml_ob1_rdmafrag.h"
|
||||||
#include "mca/bml/base/base.h"
|
#include "mca/bml/base/base.h"
|
||||||
|
#include "mca/errmgr/errmgr.h"
|
||||||
|
|
||||||
static mca_pml_ob1_recv_frag_t* mca_pml_ob1_recv_request_match_specific_proc(
|
static mca_pml_ob1_recv_frag_t* mca_pml_ob1_recv_request_match_specific_proc(
|
||||||
mca_pml_ob1_recv_request_t* request, mca_pml_ob1_comm_proc_t* proc);
|
mca_pml_ob1_recv_request_t* request, mca_pml_ob1_comm_proc_t* proc);
|
||||||
@ -102,16 +104,31 @@ OBJ_CLASS_INSTANCE(
|
|||||||
* Release resources.
|
* Release resources.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void mca_pml_ob1_send_ctl_complete(
|
static void mca_pml_ob1_ctl_completion(
|
||||||
mca_btl_base_module_t* btl,
|
mca_btl_base_module_t* btl,
|
||||||
struct mca_btl_base_endpoint_t* ep,
|
struct mca_btl_base_endpoint_t* ep,
|
||||||
struct mca_btl_base_descriptor_t* des,
|
struct mca_btl_base_descriptor_t* des,
|
||||||
int status)
|
int status)
|
||||||
{
|
{
|
||||||
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
|
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
|
||||||
MCA_BML_BASE_BTL_DES_RETURN(bml_btl, des);
|
MCA_BML_BASE_BTL_DES_RETURN(bml_btl, des);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Put operation has completed remotely - update request status
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void mca_pml_ob1_put_completion(
|
||||||
|
mca_btl_base_module_t* btl,
|
||||||
|
struct mca_btl_base_endpoint_t* ep,
|
||||||
|
struct mca_btl_base_descriptor_t* des,
|
||||||
|
int status)
|
||||||
|
{
|
||||||
|
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
|
||||||
|
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)des->des_cbdata;
|
||||||
|
mca_pml_ob1_recv_request_progress(recvreq,btl,des->des_dst,des->des_dst_cnt);
|
||||||
|
MCA_BML_BASE_BTL_DES_RETURN(bml_btl, des);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
*
|
*
|
||||||
@ -143,7 +160,7 @@ static void mca_pml_ob1_recv_request_ack(
|
|||||||
* registered. if registered on both sides - do one rdma for
|
* registered. if registered on both sides - do one rdma for
|
||||||
* the entire message.
|
* the entire message.
|
||||||
*/
|
*/
|
||||||
if(hdr->hdr_match.hdr_msg_length > 0) {
|
if(hdr->hdr_msg_length > 0) {
|
||||||
|
|
||||||
recvreq->req_chunk = mca_mpool_base_find(recvreq->req_recv.req_base.req_addr);
|
recvreq->req_chunk = mca_mpool_base_find(recvreq->req_recv.req_base.req_addr);
|
||||||
if( NULL != recvreq->req_chunk &&
|
if( NULL != recvreq->req_chunk &&
|
||||||
@ -170,8 +187,8 @@ static void mca_pml_ob1_recv_request_ack(
|
|||||||
|
|
||||||
/* use convertor to figure out the rdma offset for this request */
|
/* use convertor to figure out the rdma offset for this request */
|
||||||
recvreq->req_rdma_offset = bml_endpoint->btl_rdma_offset;
|
recvreq->req_rdma_offset = bml_endpoint->btl_rdma_offset;
|
||||||
if(recvreq->req_rdma_offset < hdr->hdr_frag_length) {
|
if(recvreq->req_rdma_offset < recvreq->req_bytes_received) {
|
||||||
recvreq->req_rdma_offset = hdr->hdr_frag_length;
|
recvreq->req_rdma_offset = recvreq->req_bytes_received;
|
||||||
}
|
}
|
||||||
ompi_convertor_set_position(
|
ompi_convertor_set_position(
|
||||||
&recvreq->req_recv.req_convertor,
|
&recvreq->req_recv.req_convertor,
|
||||||
@ -182,7 +199,7 @@ static void mca_pml_ob1_recv_request_ack(
|
|||||||
|
|
||||||
/* start rdma at the current fragment offset - no need to send an ack in this case */
|
/* start rdma at the current fragment offset - no need to send an ack in this case */
|
||||||
} else {
|
} else {
|
||||||
recvreq->req_rdma_offset = hdr->hdr_frag_length;
|
recvreq->req_rdma_offset = recvreq->req_bytes_received;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -207,8 +224,7 @@ static void mca_pml_ob1_recv_request_ack(
|
|||||||
|
|
||||||
/* initialize descriptor */
|
/* initialize descriptor */
|
||||||
des->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
|
des->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
|
||||||
des->des_cbfunc = mca_pml_ob1_send_ctl_complete;
|
des->des_cbfunc = mca_pml_ob1_ctl_completion;
|
||||||
des->des_cbdata = bml_btl;
|
|
||||||
|
|
||||||
rc = mca_bml_base_send(bml_btl, des, MCA_BTL_TAG_PML);
|
rc = mca_bml_base_send(bml_btl, des, MCA_BTL_TAG_PML);
|
||||||
if(rc != OMPI_SUCCESS) {
|
if(rc != OMPI_SUCCESS) {
|
||||||
@ -226,6 +242,166 @@ retry:
|
|||||||
opal_list_append(&mca_pml_ob1.acks_pending, (opal_list_item_t*)frag);
|
opal_list_append(&mca_pml_ob1.acks_pending, (opal_list_item_t*)frag);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return resources used by the RDMA
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void mca_pml_ob1_fin_completion(
|
||||||
|
mca_btl_base_module_t* btl,
|
||||||
|
struct mca_btl_base_endpoint_t* ep,
|
||||||
|
struct mca_btl_base_descriptor_t* des,
|
||||||
|
int status)
|
||||||
|
{
|
||||||
|
|
||||||
|
mca_pml_ob1_rdma_frag_t* frag = (mca_pml_ob1_rdma_frag_t*)des->des_cbdata;
|
||||||
|
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
|
||||||
|
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
|
||||||
|
MCA_BML_BASE_BTL_DES_RETURN(bml_btl, des);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void mca_pml_ob1_rget_completion(
|
||||||
|
mca_btl_base_module_t* btl,
|
||||||
|
struct mca_btl_base_endpoint_t* ep,
|
||||||
|
struct mca_btl_base_descriptor_t* des,
|
||||||
|
int status)
|
||||||
|
{
|
||||||
|
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
|
||||||
|
mca_pml_ob1_rdma_frag_t* frag = (mca_pml_ob1_rdma_frag_t*)des->des_cbdata;
|
||||||
|
mca_pml_ob1_recv_request_t* recvreq = frag->rdma_req;
|
||||||
|
mca_pml_ob1_fin_hdr_t* hdr;
|
||||||
|
mca_btl_base_descriptor_t *fin;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
/* is receive request complete */
|
||||||
|
OPAL_THREAD_LOCK(&ompi_request_lock);
|
||||||
|
recvreq->req_bytes_received += frag->rdma_length;
|
||||||
|
recvreq->req_bytes_delivered += frag->rdma_length;
|
||||||
|
if(recvreq->req_bytes_received == recvreq->req_recv.req_bytes_packed) {
|
||||||
|
recvreq->req_recv.req_base.req_ompi.req_status._count = recvreq->req_bytes_delivered;
|
||||||
|
recvreq->req_recv.req_base.req_pml_complete = true;
|
||||||
|
recvreq->req_recv.req_base.req_ompi.req_complete = true;
|
||||||
|
if(ompi_request_waiting) {
|
||||||
|
opal_condition_broadcast(&ompi_request_cond);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||||
|
|
||||||
|
/* return descriptor */
|
||||||
|
MCA_BML_BASE_BTL_DES_RETURN(bml_btl, des);
|
||||||
|
|
||||||
|
/* queue up a fin control message to source */
|
||||||
|
MCA_BML_BASE_BTL_DES_ALLOC(bml_btl, fin, sizeof(mca_pml_ob1_fin_hdr_t));
|
||||||
|
if(NULL == fin) {
|
||||||
|
opal_output(0, "[%s:%d] unable to allocate descriptor", __FILE__,__LINE__);
|
||||||
|
orte_errmgr.abort();
|
||||||
|
}
|
||||||
|
fin->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
|
||||||
|
fin->des_cbfunc = mca_pml_ob1_fin_completion;
|
||||||
|
fin->des_cbdata = frag;
|
||||||
|
|
||||||
|
/* fill in header */
|
||||||
|
hdr = (mca_pml_ob1_fin_hdr_t*)fin->des_src->seg_addr.pval;
|
||||||
|
hdr->hdr_common.hdr_flags = 0;
|
||||||
|
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_FIN;
|
||||||
|
hdr->hdr_des = frag->rdma_hdr.hdr_rget.hdr_des;
|
||||||
|
|
||||||
|
/* queue request */
|
||||||
|
rc = mca_bml_base_send(
|
||||||
|
bml_btl,
|
||||||
|
fin,
|
||||||
|
MCA_BTL_TAG_PML
|
||||||
|
);
|
||||||
|
if(OMPI_SUCCESS != rc) {
|
||||||
|
opal_output(0, "[%s:%d] unable to queue fin", __FILE__,__LINE__);
|
||||||
|
orte_errmgr.abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void mca_pml_ob1_recv_request_rget(
|
||||||
|
mca_pml_ob1_recv_request_t* recvreq,
|
||||||
|
mca_btl_base_module_t* btl,
|
||||||
|
mca_pml_ob1_rget_hdr_t* hdr)
|
||||||
|
{
|
||||||
|
mca_bml_base_endpoint_t* bml_endpoint = NULL;
|
||||||
|
mca_bml_base_btl_t* bml_btl;
|
||||||
|
mca_pml_ob1_rdma_frag_t* frag;
|
||||||
|
mca_btl_base_descriptor_t* descriptor;
|
||||||
|
mca_mpool_base_registration_t* registration = NULL;
|
||||||
|
size_t i, size = 0;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
/* lookup bml datastructures */
|
||||||
|
bml_endpoint = (mca_bml_base_endpoint_t*) recvreq->req_proc->proc_pml;
|
||||||
|
bml_btl = mca_bml_base_btl_array_find(&bml_endpoint->btl_eager, btl);
|
||||||
|
if(NULL == bml_btl) {
|
||||||
|
opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__);
|
||||||
|
orte_errmgr.abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* allocate/initialize a fragment */
|
||||||
|
MCA_PML_OB1_RDMA_FRAG_ALLOC(frag,rc);
|
||||||
|
for(i=0; i<hdr->hdr_seg_cnt; i++) {
|
||||||
|
size += hdr->hdr_segs[i].seg_len;
|
||||||
|
frag->rdma_segs[i] = hdr->hdr_segs[i];
|
||||||
|
}
|
||||||
|
frag->rdma_hdr.hdr_rget = *hdr;
|
||||||
|
frag->rdma_req = recvreq;
|
||||||
|
frag->rdma_ep = bml_endpoint;
|
||||||
|
frag->rdma_state = MCA_PML_OB1_RDMA_PREPARE;
|
||||||
|
|
||||||
|
/* is there an existing registration for this btl */
|
||||||
|
recvreq->req_chunk = mca_mpool_base_find(recvreq->req_recv.req_base.req_addr);
|
||||||
|
if( NULL != recvreq->req_chunk ) {
|
||||||
|
struct mca_mpool_base_reg_mpool_t *reg = recvreq->req_chunk->mpools;
|
||||||
|
while(reg->mpool != NULL) {
|
||||||
|
if(NULL != mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma,
|
||||||
|
(mca_btl_base_module_t*) reg->user_data)) {
|
||||||
|
recvreq->req_mpool = reg;
|
||||||
|
registration = reg->mpool_registration;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
reg++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* prepare descriptor */
|
||||||
|
mca_bml_base_prepare_dst(
|
||||||
|
bml_btl,
|
||||||
|
registration,
|
||||||
|
&recvreq->req_recv.req_convertor,
|
||||||
|
0,
|
||||||
|
&size,
|
||||||
|
&descriptor);
|
||||||
|
if(NULL == descriptor) {
|
||||||
|
opal_output(0, "[%s:%d] unable to allocate descriptor for rdma get", __FILE__, __LINE__);
|
||||||
|
orte_errmgr.abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
frag->rdma_length = size;
|
||||||
|
descriptor->des_src = frag->rdma_segs;
|
||||||
|
descriptor->des_src_cnt = hdr->hdr_seg_cnt;
|
||||||
|
descriptor->des_cbdata = frag;
|
||||||
|
descriptor->des_cbfunc = mca_pml_ob1_rget_completion;
|
||||||
|
|
||||||
|
/* queue up get request */
|
||||||
|
rc = mca_bml_base_get(bml_btl,descriptor);
|
||||||
|
if(rc != OMPI_SUCCESS) {
|
||||||
|
opal_output(0, "[%s:%d] rdma get failed with error %d", __FILE__, __LINE__, rc);
|
||||||
|
orte_errmgr.abort();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Update the recv request status to reflect the number of bytes
|
* Update the recv request status to reflect the number of bytes
|
||||||
@ -234,6 +410,7 @@ retry:
|
|||||||
|
|
||||||
void mca_pml_ob1_recv_request_progress(
|
void mca_pml_ob1_recv_request_progress(
|
||||||
mca_pml_ob1_recv_request_t* recvreq,
|
mca_pml_ob1_recv_request_t* recvreq,
|
||||||
|
mca_btl_base_module_t* btl,
|
||||||
mca_btl_base_segment_t* segments,
|
mca_btl_base_segment_t* segments,
|
||||||
size_t num_segments)
|
size_t num_segments)
|
||||||
{
|
{
|
||||||
@ -242,11 +419,17 @@ void mca_pml_ob1_recv_request_progress(
|
|||||||
size_t data_offset = 0;
|
size_t data_offset = 0;
|
||||||
mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval;
|
mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval;
|
||||||
bool schedule = false;
|
bool schedule = false;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
for(i=0; i<num_segments; i++)
|
||||||
|
bytes_received += segments[i].seg_len;
|
||||||
|
|
||||||
switch(hdr->hdr_common.hdr_type) {
|
switch(hdr->hdr_common.hdr_type) {
|
||||||
case MCA_PML_OB1_HDR_TYPE_MATCH:
|
case MCA_PML_OB1_HDR_TYPE_MATCH:
|
||||||
|
|
||||||
bytes_received = hdr->hdr_match.hdr_msg_length;
|
bytes_received -= sizeof(mca_pml_ob1_match_hdr_t);
|
||||||
|
recvreq->req_recv.req_bytes_packed = bytes_received;
|
||||||
|
MCA_PML_OB1_RECV_REQUEST_MATCHED(recvreq,&hdr->hdr_match);
|
||||||
MCA_PML_OB1_RECV_REQUEST_UNPACK(
|
MCA_PML_OB1_RECV_REQUEST_UNPACK(
|
||||||
recvreq,
|
recvreq,
|
||||||
segments,
|
segments,
|
||||||
@ -259,12 +442,11 @@ void mca_pml_ob1_recv_request_progress(
|
|||||||
|
|
||||||
case MCA_PML_OB1_HDR_TYPE_RNDV:
|
case MCA_PML_OB1_HDR_TYPE_RNDV:
|
||||||
|
|
||||||
#if MCA_PML_OB1_TIMESTAMPS
|
bytes_received -= sizeof(mca_pml_ob1_rendezvous_hdr_t);
|
||||||
recvreq->ack = get_profiler_timestamp();
|
recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length;
|
||||||
#endif
|
|
||||||
recvreq->req_send = hdr->hdr_rndv.hdr_src_req;
|
recvreq->req_send = hdr->hdr_rndv.hdr_src_req;
|
||||||
|
MCA_PML_OB1_RECV_REQUEST_MATCHED(recvreq,&hdr->hdr_match);
|
||||||
mca_pml_ob1_recv_request_ack(recvreq, &hdr->hdr_rndv);
|
mca_pml_ob1_recv_request_ack(recvreq, &hdr->hdr_rndv);
|
||||||
bytes_received = hdr->hdr_rndv.hdr_frag_length;
|
|
||||||
MCA_PML_OB1_RECV_REQUEST_UNPACK(
|
MCA_PML_OB1_RECV_REQUEST_UNPACK(
|
||||||
recvreq,
|
recvreq,
|
||||||
segments,
|
segments,
|
||||||
@ -275,9 +457,16 @@ void mca_pml_ob1_recv_request_progress(
|
|||||||
bytes_delivered);
|
bytes_delivered);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case MCA_PML_OB1_HDR_TYPE_RGET:
|
||||||
|
|
||||||
|
recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length;
|
||||||
|
MCA_PML_OB1_RECV_REQUEST_MATCHED(recvreq,&hdr->hdr_match);
|
||||||
|
mca_pml_ob1_recv_request_rget(recvreq, btl, &hdr->hdr_rget);
|
||||||
|
return;
|
||||||
|
|
||||||
case MCA_PML_OB1_HDR_TYPE_FRAG:
|
case MCA_PML_OB1_HDR_TYPE_FRAG:
|
||||||
|
|
||||||
bytes_received = hdr->hdr_frag.hdr_frag_length;
|
bytes_received -= sizeof(mca_pml_ob1_frag_hdr_t);
|
||||||
data_offset = hdr->hdr_frag.hdr_frag_offset;
|
data_offset = hdr->hdr_frag.hdr_frag_offset;
|
||||||
MCA_PML_OB1_RECV_REQUEST_UNPACK(
|
MCA_PML_OB1_RECV_REQUEST_UNPACK(
|
||||||
recvreq,
|
recvreq,
|
||||||
@ -291,7 +480,6 @@ void mca_pml_ob1_recv_request_progress(
|
|||||||
|
|
||||||
case MCA_PML_OB1_HDR_TYPE_FIN:
|
case MCA_PML_OB1_HDR_TYPE_FIN:
|
||||||
|
|
||||||
bytes_delivered = bytes_received = hdr->hdr_fin.hdr_rdma_length;
|
|
||||||
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth,-1);
|
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth,-1);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -310,21 +498,6 @@ void mca_pml_ob1_recv_request_progress(
|
|||||||
recvreq->req_recv.req_base.req_pml_complete = true;
|
recvreq->req_recv.req_base.req_pml_complete = true;
|
||||||
recvreq->req_recv.req_base.req_ompi.req_complete = true;
|
recvreq->req_recv.req_base.req_ompi.req_complete = true;
|
||||||
|
|
||||||
#if MCA_PML_OB1_TIMESTAMPS
|
|
||||||
if(recvreq->req_bytes_received > 0) {
|
|
||||||
int i;
|
|
||||||
opal_output(0, "[%d,%d,%d] dst ack: %llu",
|
|
||||||
ORTE_NAME_ARGS(orte_process_info.my_name), recvreq->ack);
|
|
||||||
for(i=0; i<recvreq->pin_index; i++) {
|
|
||||||
opal_output(0, "[%d,%d,%d] dst pin, %llu %llu",
|
|
||||||
ORTE_NAME_ARGS(orte_process_info.my_name), recvreq->pin1[i], recvreq->pin2[i] - recvreq->pin1[i]);
|
|
||||||
}
|
|
||||||
for(i=0; i<recvreq->fin_index; i++) {
|
|
||||||
opal_output(0, "[%d,%d,%d] dst fin: %llu %llu",
|
|
||||||
ORTE_NAME_ARGS(orte_process_info.my_name), recvreq->fin1[i], recvreq->fin2[i] - recvreq->fin1[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
if(ompi_request_waiting) {
|
if(ompi_request_waiting) {
|
||||||
opal_condition_broadcast(&ompi_request_cond);
|
opal_condition_broadcast(&ompi_request_cond);
|
||||||
}
|
}
|
||||||
@ -409,9 +582,6 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
|
|||||||
|
|
||||||
/* prepare a descriptor for RDMA */
|
/* prepare a descriptor for RDMA */
|
||||||
ompi_convertor_set_position(&recvreq->req_recv.req_convertor, &recvreq->req_rdma_offset);
|
ompi_convertor_set_position(&recvreq->req_recv.req_convertor, &recvreq->req_rdma_offset);
|
||||||
#if MCA_PML_OB1_TIMESTAMPS
|
|
||||||
recvreq->pin1[recvreq->pin_index] = get_profiler_timestamp();
|
|
||||||
#endif
|
|
||||||
mca_bml_base_prepare_dst(
|
mca_bml_base_prepare_dst(
|
||||||
bml_btl,
|
bml_btl,
|
||||||
reg,
|
reg,
|
||||||
@ -419,16 +589,13 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
|
|||||||
0,
|
0,
|
||||||
&size,
|
&size,
|
||||||
&dst);
|
&dst);
|
||||||
#if MCA_PML_OB1_TIMESTAMPS
|
|
||||||
recvreq->pin2[recvreq->pin_index] = get_profiler_timestamp();
|
|
||||||
recvreq->pin_index++;
|
|
||||||
#endif
|
|
||||||
if(dst == NULL) {
|
if(dst == NULL) {
|
||||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||||
opal_list_append(&mca_pml_ob1.recv_pending, (opal_list_item_t*)recvreq);
|
opal_list_append(&mca_pml_ob1.recv_pending, (opal_list_item_t*)recvreq);
|
||||||
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
dst->des_cbfunc = mca_pml_ob1_put_completion;
|
||||||
dst->des_cbdata = recvreq;
|
dst->des_cbdata = recvreq;
|
||||||
|
|
||||||
/* prepare a descriptor for rdma control message */
|
/* prepare a descriptor for rdma control message */
|
||||||
@ -446,15 +613,14 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
ctl->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
|
ctl->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
|
||||||
ctl->des_cbfunc = mca_pml_ob1_send_ctl_complete;
|
ctl->des_cbfunc = mca_pml_ob1_ctl_completion;
|
||||||
ctl->des_cbdata = bml_btl;
|
|
||||||
|
|
||||||
/* fill in rdma header */
|
/* fill in rdma header */
|
||||||
hdr = (mca_pml_ob1_rdma_hdr_t*)ctl->des_src->seg_addr.pval;
|
hdr = (mca_pml_ob1_rdma_hdr_t*)ctl->des_src->seg_addr.pval;
|
||||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_PUT;
|
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_PUT;
|
||||||
hdr->hdr_common.hdr_flags = 0;
|
hdr->hdr_common.hdr_flags = 0;
|
||||||
hdr->hdr_src = recvreq->req_send;
|
hdr->hdr_req = recvreq->req_send;
|
||||||
hdr->hdr_dst.pval = dst;
|
hdr->hdr_des.pval = dst;
|
||||||
hdr->hdr_rdma_offset = recvreq->req_rdma_offset;
|
hdr->hdr_rdma_offset = recvreq->req_rdma_offset;
|
||||||
hdr->hdr_seg_cnt = dst->des_dst_cnt;
|
hdr->hdr_seg_cnt = dst->des_dst_cnt;
|
||||||
memcpy(hdr->hdr_segs, dst->des_dst, dst->des_dst_cnt * sizeof(mca_btl_base_segment_t));
|
memcpy(hdr->hdr_segs, dst->des_dst, dst->des_dst_cnt * sizeof(mca_btl_base_segment_t));
|
||||||
@ -506,7 +672,7 @@ void mca_pml_ob1_recv_request_match_specific(mca_pml_ob1_recv_request_t* request
|
|||||||
(frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) {
|
(frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) {
|
||||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||||
|
|
||||||
mca_pml_ob1_recv_request_progress(request,frag->segments,frag->num_segments);
|
mca_pml_ob1_recv_request_progress(request,frag->btl,frag->segments,frag->num_segments);
|
||||||
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
|
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
|
||||||
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
|
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
|
||||||
MCA_PML_OB1_RECV_FRAG_RETURN(frag);
|
MCA_PML_OB1_RECV_FRAG_RETURN(frag);
|
||||||
@ -560,7 +726,7 @@ void mca_pml_ob1_recv_request_match_wild(mca_pml_ob1_recv_request_t* request)
|
|||||||
if ((frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) {
|
if ((frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) {
|
||||||
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
OPAL_THREAD_UNLOCK(&comm->matching_lock);
|
||||||
|
|
||||||
mca_pml_ob1_recv_request_progress(request,frag->segments,frag->num_segments);
|
mca_pml_ob1_recv_request_progress(request,frag->btl,frag->segments,frag->num_segments);
|
||||||
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
|
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
|
||||||
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
|
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
|
||||||
MCA_PML_OB1_RECV_FRAG_RETURN(frag);
|
MCA_PML_OB1_RECV_FRAG_RETURN(frag);
|
||||||
@ -620,7 +786,6 @@ static mca_pml_ob1_recv_frag_t* mca_pml_ob1_recv_request_match_specific_proc(
|
|||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
find_fragment:
|
find_fragment:
|
||||||
MCA_PML_OB1_RECV_REQUEST_MATCHED(request, hdr);
|
|
||||||
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
|
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
|
||||||
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
|
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
|
||||||
opal_list_remove_item(unexpected_frags, (opal_list_item_t*)frag);
|
opal_list_remove_item(unexpected_frags, (opal_list_item_t*)frag);
|
||||||
|
@ -28,12 +28,6 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
struct mca_pml_ob1_registration_t {
|
|
||||||
struct mca_pml_ob1_endpoint_t* endpoint;
|
|
||||||
struct mca_mpool_base_registration_t* registration;
|
|
||||||
};
|
|
||||||
typedef struct mca_pml_ob1_registration_t mca_pml_ob1_registration_t;
|
|
||||||
|
|
||||||
|
|
||||||
struct mca_pml_ob1_recv_request_t {
|
struct mca_pml_ob1_recv_request_t {
|
||||||
mca_pml_base_recv_request_t req_recv;
|
mca_pml_base_recv_request_t req_recv;
|
||||||
@ -46,18 +40,6 @@ struct mca_pml_ob1_recv_request_t {
|
|||||||
size_t req_bytes_received;
|
size_t req_bytes_received;
|
||||||
size_t req_bytes_delivered;
|
size_t req_bytes_delivered;
|
||||||
size_t req_rdma_offset;
|
size_t req_rdma_offset;
|
||||||
mca_pml_ob1_registration_t req_reg[MCA_MPOOL_BASE_MAX_REG];
|
|
||||||
size_t req_num_reg;
|
|
||||||
|
|
||||||
#if MCA_PML_OB1_TIMESTAMPS
|
|
||||||
unsigned long long ack;
|
|
||||||
unsigned long long pin1[MCA_PML_OB1_NUM_TSTAMPS];
|
|
||||||
unsigned long long pin2[MCA_PML_OB1_NUM_TSTAMPS];
|
|
||||||
unsigned long long fin1[MCA_PML_OB1_NUM_TSTAMPS];
|
|
||||||
unsigned long long fin2[MCA_PML_OB1_NUM_TSTAMPS];
|
|
||||||
int pin_index;
|
|
||||||
int fin_index;
|
|
||||||
#endif
|
|
||||||
};
|
};
|
||||||
typedef struct mca_pml_ob1_recv_request_t mca_pml_ob1_recv_request_t;
|
typedef struct mca_pml_ob1_recv_request_t mca_pml_ob1_recv_request_t;
|
||||||
|
|
||||||
@ -154,13 +136,6 @@ void mca_pml_ob1_recv_request_match_specific(mca_pml_ob1_recv_request_t* request
|
|||||||
/**
|
/**
|
||||||
* Initialize diagnostic code for tracing rdma protocol timing
|
* Initialize diagnostic code for tracing rdma protocol timing
|
||||||
*/
|
*/
|
||||||
#if MCA_PML_OB1_TIMESTAMPS
|
|
||||||
#define MCA_PML_OB1_RECV_REQUEST_TSTAMPS_INIT(recvreq) \
|
|
||||||
(request)->fin_index = 0; \
|
|
||||||
(request)->pin_index = 0;
|
|
||||||
#else
|
|
||||||
#define MCA_PML_OB1_RECV_REQUEST_TSTAMPS_INIT(recvreq)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Start an initialized request.
|
* Start an initialized request.
|
||||||
@ -180,7 +155,6 @@ do {
|
|||||||
(request)->req_recv.req_base.req_pml_complete = false; \
|
(request)->req_recv.req_base.req_pml_complete = false; \
|
||||||
(request)->req_recv.req_base.req_ompi.req_complete = false; \
|
(request)->req_recv.req_base.req_ompi.req_complete = false; \
|
||||||
(request)->req_recv.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \
|
(request)->req_recv.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \
|
||||||
MCA_PML_OB1_RECV_REQUEST_TSTAMPS_INIT(request); \
|
|
||||||
\
|
\
|
||||||
/* always set the req_status.MPI_TAG to ANY_TAG before starting the \
|
/* always set the req_status.MPI_TAG to ANY_TAG before starting the \
|
||||||
* request. This field is used if cancelled to find out if the request \
|
* request. This field is used if cancelled to find out if the request \
|
||||||
@ -207,10 +181,8 @@ do {
|
|||||||
request, \
|
request, \
|
||||||
hdr) \
|
hdr) \
|
||||||
do { \
|
do { \
|
||||||
(request)->req_recv.req_bytes_packed = (hdr)->hdr_msg_length; \
|
|
||||||
(request)->req_recv.req_base.req_ompi.req_status.MPI_TAG = (hdr)->hdr_tag; \
|
(request)->req_recv.req_base.req_ompi.req_status.MPI_TAG = (hdr)->hdr_tag; \
|
||||||
(request)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = (hdr)->hdr_src; \
|
(request)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = (hdr)->hdr_src; \
|
||||||
\
|
|
||||||
if((request)->req_recv.req_bytes_packed != 0) { \
|
if((request)->req_recv.req_bytes_packed != 0) { \
|
||||||
ompi_proc_t *proc = \
|
ompi_proc_t *proc = \
|
||||||
ompi_comm_peer_lookup( \
|
ompi_comm_peer_lookup( \
|
||||||
@ -280,6 +252,7 @@ do {
|
|||||||
|
|
||||||
void mca_pml_ob1_recv_request_progress(
|
void mca_pml_ob1_recv_request_progress(
|
||||||
mca_pml_ob1_recv_request_t* req,
|
mca_pml_ob1_recv_request_t* req,
|
||||||
|
struct mca_btl_base_module_t* btl,
|
||||||
mca_btl_base_segment_t* segments,
|
mca_btl_base_segment_t* segments,
|
||||||
size_t num_segments);
|
size_t num_segments);
|
||||||
|
|
||||||
|
@ -121,13 +121,9 @@ static void mca_pml_ob1_rndv_completion(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* count bytes of user data actually delivered */
|
/* count bytes of user data actually delivered */
|
||||||
|
OPAL_THREAD_LOCK(&ompi_request_lock);
|
||||||
MCA_PML_OB1_SEND_REQUEST_SET_BYTES_DELIVERED(sendreq,descriptor,sizeof(mca_pml_ob1_rendezvous_hdr_t));
|
MCA_PML_OB1_SEND_REQUEST_SET_BYTES_DELIVERED(sendreq,descriptor,sizeof(mca_pml_ob1_rendezvous_hdr_t));
|
||||||
|
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||||
#if MCA_PML_OB1_TIMESTAMPS
|
|
||||||
if(sendreq->req_pipeline_depth == 1) {
|
|
||||||
sendreq->t_send2 = get_profiler_timestamp();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* return the descriptor */
|
/* return the descriptor */
|
||||||
mca_bml_base_free(bml_btl, descriptor);
|
mca_bml_base_free(bml_btl, descriptor);
|
||||||
@ -140,6 +136,46 @@ static void mca_pml_ob1_rndv_completion(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Completion of a get request.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void mca_pml_ob1_rget_completion(
|
||||||
|
mca_btl_base_module_t* btl,
|
||||||
|
struct mca_btl_base_endpoint_t* ep,
|
||||||
|
struct mca_btl_base_descriptor_t* des,
|
||||||
|
int status)
|
||||||
|
{
|
||||||
|
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)des->des_cbdata;
|
||||||
|
|
||||||
|
/* count bytes of user data actually delivered and check for request completion */
|
||||||
|
OPAL_THREAD_LOCK(&ompi_request_lock);
|
||||||
|
MCA_PML_OB1_SEND_REQUEST_SET_BYTES_DELIVERED(sendreq,des,0);
|
||||||
|
if (sendreq->req_bytes_delivered == sendreq->req_send.req_bytes_packed) {
|
||||||
|
MCA_PML_OB1_SEND_REQUEST_COMPLETE(sendreq);
|
||||||
|
}
|
||||||
|
OPAL_THREAD_LOCK(&ompi_request_lock);
|
||||||
|
|
||||||
|
/* release resources */
|
||||||
|
btl->btl_free(btl,des);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Completion of a control message - return resources.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void mca_pml_ob1_ctl_completion(
|
||||||
|
mca_btl_base_module_t* btl,
|
||||||
|
struct mca_btl_base_endpoint_t* ep,
|
||||||
|
struct mca_btl_base_descriptor_t* descriptor,
|
||||||
|
int status)
|
||||||
|
{
|
||||||
|
/* return the descriptor */
|
||||||
|
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context;
|
||||||
|
mca_bml_base_free(bml_btl, descriptor);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Completion of additional fragments of a large message - may need
|
* Completion of additional fragments of a large message - may need
|
||||||
* to schedule additional fragments.
|
* to schedule additional fragments.
|
||||||
@ -162,20 +198,11 @@ static void mca_pml_ob1_frag_completion(
|
|||||||
orte_errmgr.abort();
|
orte_errmgr.abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* count bytes of user data actually delivered */
|
|
||||||
MCA_PML_OB1_SEND_REQUEST_SET_BYTES_DELIVERED(sendreq,descriptor,sizeof(mca_pml_ob1_frag_hdr_t));
|
|
||||||
|
|
||||||
#if MCA_PML_OB1_TIMESTAMPS
|
|
||||||
if(sendreq->req_pipeline_depth == 1) {
|
|
||||||
sendreq->t_send2 = get_profiler_timestamp();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* return the descriptor */
|
|
||||||
mca_bml_base_free(bml_btl, descriptor);
|
|
||||||
|
|
||||||
/* check for request completion */
|
/* check for request completion */
|
||||||
OPAL_THREAD_LOCK(&ompi_request_lock);
|
OPAL_THREAD_LOCK(&ompi_request_lock);
|
||||||
|
|
||||||
|
/* count bytes of user data actually delivered */
|
||||||
|
MCA_PML_OB1_SEND_REQUEST_SET_BYTES_DELIVERED(sendreq,descriptor,sizeof(mca_pml_ob1_frag_hdr_t));
|
||||||
if (OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth,-1) == 0 &&
|
if (OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth,-1) == 0 &&
|
||||||
sendreq->req_bytes_delivered == sendreq->req_send.req_bytes_packed) {
|
sendreq->req_bytes_delivered == sendreq->req_send.req_bytes_packed) {
|
||||||
MCA_PML_OB1_SEND_REQUEST_COMPLETE(sendreq);
|
MCA_PML_OB1_SEND_REQUEST_COMPLETE(sendreq);
|
||||||
@ -188,6 +215,9 @@ static void mca_pml_ob1_frag_completion(
|
|||||||
mca_pml_ob1_send_request_schedule(sendreq);
|
mca_pml_ob1_send_request_schedule(sendreq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* return the descriptor */
|
||||||
|
mca_bml_base_free(bml_btl, descriptor);
|
||||||
|
|
||||||
/* check for pending requests */
|
/* check for pending requests */
|
||||||
MCA_PML_OB1_SEND_REQUEST_PROCESS_PENDING();
|
MCA_PML_OB1_SEND_REQUEST_PROCESS_PENDING();
|
||||||
}
|
}
|
||||||
@ -250,12 +280,11 @@ int mca_pml_ob1_send_request_start_copy(
|
|||||||
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
|
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
|
||||||
hdr->hdr_common.hdr_flags = 0;
|
hdr->hdr_common.hdr_flags = 0;
|
||||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH;
|
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH;
|
||||||
hdr->hdr_match.hdr_contextid = sendreq->req_send.req_base.req_comm->c_contextid;
|
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||||
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||||
hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer;
|
hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer;
|
||||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||||
hdr->hdr_match.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
|
||||||
hdr->hdr_match.hdr_msg_seq = sendreq->req_send.req_base.req_sequence;
|
|
||||||
|
|
||||||
/* update lengths */
|
/* update lengths */
|
||||||
segment->seg_len = sizeof(mca_pml_ob1_match_hdr_t) + max_data;
|
segment->seg_len = sizeof(mca_pml_ob1_match_hdr_t) + max_data;
|
||||||
@ -270,78 +299,131 @@ int mca_pml_ob1_send_request_start_copy(
|
|||||||
|
|
||||||
/* rendezvous header is required */
|
/* rendezvous header is required */
|
||||||
} else {
|
} else {
|
||||||
int32_t free_after;
|
struct mca_mpool_base_registration_t* registration = NULL;
|
||||||
|
struct mca_bml_base_btl_t* reg_btl = NULL;
|
||||||
/* allocate space for hdr + first fragment */
|
bool do_rdma;
|
||||||
mca_bml_base_alloc(bml_btl, &descriptor, sizeof(mca_pml_ob1_rendezvous_hdr_t) + size);
|
|
||||||
if(NULL == descriptor) {
|
|
||||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
||||||
}
|
|
||||||
segment = descriptor->des_src;
|
|
||||||
|
|
||||||
/* check to see if memory is registered */
|
/* check to see if memory is registered */
|
||||||
sendreq->req_chunk = mca_mpool_base_find(sendreq->req_send.req_addr);
|
sendreq->req_chunk = mca_mpool_base_find(sendreq->req_send.req_addr);
|
||||||
|
if(NULL != sendreq->req_chunk) {
|
||||||
|
struct mca_mpool_base_reg_mpool_t *reg = sendreq->req_chunk->mpools;
|
||||||
|
while(reg->mpool != NULL) {
|
||||||
|
if(NULL != (reg_btl = mca_bml_base_btl_array_find(&sendreq->bml_endpoint->btl_rdma,
|
||||||
|
(mca_btl_base_module_t*) reg->user_data))) {
|
||||||
|
registration = reg->mpool_registration;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
reg++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if the data is already registed or leave_pinned is set - then we
|
||||||
|
* will attempt to do an rdma of the entire message.
|
||||||
|
*/
|
||||||
|
do_rdma = (reg_btl != NULL || mca_pml_ob1.leave_pinned) &&
|
||||||
|
ompi_convertor_need_buffers(&sendreq->req_send.req_convertor) == false;
|
||||||
|
if(do_rdma == false || (reg_btl != NULL && (reg_btl->btl_flags & MCA_BTL_FLAGS_GET) == 0)) {
|
||||||
|
int32_t free_after;
|
||||||
|
|
||||||
/* if the buffer is not pinned and leave pinned is false we eagerly send
|
/* allocate space for hdr + first fragment */
|
||||||
data to cover the cost of pinning the recv buffers on the peer */
|
mca_bml_base_alloc(bml_btl, &descriptor, sizeof(mca_pml_ob1_rendezvous_hdr_t) + size);
|
||||||
if(size && NULL == sendreq->req_chunk && !mca_pml_ob1.leave_pinned) {
|
if(NULL == descriptor) {
|
||||||
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
/* pack the data into the supplied buffer */
|
}
|
||||||
iov.iov_base = (void*)((unsigned char*)segment->seg_addr.pval +
|
segment = descriptor->des_src;
|
||||||
|
|
||||||
|
/* rdma put is supported rather than rdma get */
|
||||||
|
if(do_rdma) {
|
||||||
|
max_data = 0; /* dont eager send any data */
|
||||||
|
} else {
|
||||||
|
iov.iov_base = (void*)((unsigned char*)segment->seg_addr.pval +
|
||||||
sizeof(mca_pml_ob1_rendezvous_hdr_t));
|
sizeof(mca_pml_ob1_rendezvous_hdr_t));
|
||||||
iov.iov_len = size;
|
iov.iov_len = size;
|
||||||
iov_count = 1;
|
iov_count = 1;
|
||||||
max_data = size;
|
max_data = size;
|
||||||
if((rc = ompi_convertor_pack(
|
if((rc = ompi_convertor_pack(
|
||||||
&sendreq->req_send.req_convertor,
|
&sendreq->req_send.req_convertor,
|
||||||
&iov,
|
&iov,
|
||||||
&iov_count,
|
&iov_count,
|
||||||
&max_data,
|
&max_data,
|
||||||
&free_after)) < 0) {
|
&free_after)) < 0) {
|
||||||
mca_bml_base_free(bml_btl , descriptor);
|
mca_bml_base_free(bml_btl , descriptor);
|
||||||
return rc;
|
return rc;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if(max_data != size) {
|
|
||||||
opal_output(0, "[%s:%d] max_data (%lu) != size (%lu)\n", __FILE__,__LINE__,max_data,size);
|
/* build hdr */
|
||||||
|
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
|
||||||
|
hdr->hdr_common.hdr_flags = (sendreq->req_chunk != NULL ? MCA_PML_OB1_HDR_FLAGS_PIN : 0);
|
||||||
|
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RNDV;
|
||||||
|
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||||
|
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||||
|
hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer;
|
||||||
|
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||||
|
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
|
||||||
|
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||||
|
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
||||||
|
|
||||||
|
/* update lengths with number of bytes actually packed */
|
||||||
|
segment->seg_len = sizeof(mca_pml_ob1_rendezvous_hdr_t) + max_data;
|
||||||
|
sendreq->req_send_offset = max_data;
|
||||||
|
|
||||||
|
/* first fragment of a long message */
|
||||||
|
descriptor->des_cbfunc = mca_pml_ob1_rndv_completion;
|
||||||
|
|
||||||
|
/* the buffer is already pinned */
|
||||||
|
} else {
|
||||||
|
mca_btl_base_descriptor_t* src;
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
/* prepare descriptor */
|
||||||
|
if(reg_btl != NULL) {
|
||||||
|
bml_btl = reg_btl;
|
||||||
}
|
}
|
||||||
}
|
mca_bml_base_prepare_src(
|
||||||
/* if the buffer is pinned or leave pinned is true we do not eagerly send
|
bml_btl,
|
||||||
any data */
|
registration,
|
||||||
else {
|
&sendreq->req_send.req_convertor,
|
||||||
max_data = 0;
|
0,
|
||||||
}
|
&sendreq->req_send.req_bytes_packed,
|
||||||
/* build hdr */
|
&src);
|
||||||
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
|
if(NULL == src) {
|
||||||
hdr->hdr_common.hdr_flags = (sendreq->req_chunk != NULL ? MCA_PML_OB1_HDR_FLAGS_PIN : 0);
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RNDV;
|
}
|
||||||
hdr->hdr_match.hdr_contextid = sendreq->req_send.req_base.req_comm->c_contextid;
|
src->des_cbfunc = mca_pml_ob1_rget_completion;
|
||||||
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
src->des_cbdata = sendreq;
|
||||||
hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer;
|
|
||||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
|
||||||
hdr->hdr_match.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
|
||||||
hdr->hdr_match.hdr_msg_seq = sendreq->req_send.req_base.req_sequence;
|
|
||||||
hdr->hdr_rndv.hdr_src_req.lval = 0; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */
|
|
||||||
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
|
||||||
hdr->hdr_rndv.hdr_frag_length = max_data;
|
|
||||||
|
|
||||||
/* update lengths with number of bytes actually packed */
|
/* allocate space for hdr + segment list */
|
||||||
segment->seg_len = sizeof(mca_pml_ob1_rendezvous_hdr_t) + max_data;
|
mca_bml_base_alloc(bml_btl, &descriptor,
|
||||||
sendreq->req_send_offset = max_data;
|
sizeof(mca_pml_ob1_rget_hdr_t) + (sizeof(mca_btl_base_segment_t)*(src->des_src_cnt-1)));
|
||||||
|
if(NULL == descriptor) {
|
||||||
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
|
}
|
||||||
|
segment = descriptor->des_src;
|
||||||
|
|
||||||
/* first fragment of a long message */
|
/* build match header */
|
||||||
descriptor->des_cbfunc = mca_pml_ob1_rndv_completion;
|
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
|
||||||
|
hdr->hdr_common.hdr_flags = (sendreq->req_chunk != NULL ? MCA_PML_OB1_HDR_FLAGS_PIN : 0);
|
||||||
|
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RGET;
|
||||||
|
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||||
|
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||||
|
hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer;
|
||||||
|
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||||
|
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
|
||||||
|
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||||
|
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
||||||
|
hdr->hdr_rget.hdr_des.pval = src;
|
||||||
|
hdr->hdr_rget.hdr_seg_cnt = src->des_src_cnt;
|
||||||
|
for(i=0; i<src->des_src_cnt; i++)
|
||||||
|
hdr->hdr_rget.hdr_segs[i] = src->des_src[i];
|
||||||
|
descriptor->des_cbfunc = mca_pml_ob1_ctl_completion;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
descriptor->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
|
descriptor->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
|
||||||
descriptor->des_cbdata = sendreq;
|
descriptor->des_cbdata = sendreq;
|
||||||
|
|
||||||
/* send */
|
/* send */
|
||||||
#if MCA_PML_OB1_TIMESTAMPS
|
rc = mca_bml_base_send(bml_btl, descriptor, MCA_BTL_TAG_PML);
|
||||||
sendreq->t_start = get_profiler_timestamp();
|
|
||||||
#endif
|
|
||||||
rc = mca_bml_base_send(bml_btl,
|
|
||||||
descriptor,
|
|
||||||
MCA_BTL_TAG_PML);
|
|
||||||
if(OMPI_SUCCESS != rc) {
|
if(OMPI_SUCCESS != rc) {
|
||||||
mca_bml_base_free(bml_btl, descriptor );
|
mca_bml_base_free(bml_btl, descriptor );
|
||||||
}
|
}
|
||||||
@ -392,12 +474,11 @@ int mca_pml_ob1_send_request_start_prepare(
|
|||||||
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
|
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
|
||||||
hdr->hdr_common.hdr_flags = 0;
|
hdr->hdr_common.hdr_flags = 0;
|
||||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH;
|
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH;
|
||||||
hdr->hdr_match.hdr_contextid = sendreq->req_send.req_base.req_comm->c_contextid;
|
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||||
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||||
hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer;
|
hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer;
|
||||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||||
hdr->hdr_match.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
|
||||||
hdr->hdr_match.hdr_msg_seq = sendreq->req_send.req_base.req_sequence;
|
|
||||||
|
|
||||||
/* short message */
|
/* short message */
|
||||||
descriptor->des_cbfunc = mca_pml_ob1_match_completion;
|
descriptor->des_cbfunc = mca_pml_ob1_match_completion;
|
||||||
@ -446,15 +527,13 @@ int mca_pml_ob1_send_request_start_prepare(
|
|||||||
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
|
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
|
||||||
hdr->hdr_common.hdr_flags = (sendreq->req_chunk != NULL ? MCA_PML_OB1_HDR_FLAGS_PIN : 0);
|
hdr->hdr_common.hdr_flags = (sendreq->req_chunk != NULL ? MCA_PML_OB1_HDR_FLAGS_PIN : 0);
|
||||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RNDV;
|
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RNDV;
|
||||||
hdr->hdr_match.hdr_contextid = sendreq->req_send.req_base.req_comm->c_contextid;
|
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||||
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
|
||||||
hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer;
|
hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer;
|
||||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||||
hdr->hdr_match.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
|
||||||
hdr->hdr_match.hdr_msg_seq = sendreq->req_send.req_base.req_sequence;
|
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||||
hdr->hdr_rndv.hdr_src_req.lval = 0; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */
|
|
||||||
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
|
||||||
hdr->hdr_rndv.hdr_frag_length = size;
|
|
||||||
|
|
||||||
/* first fragment of a long message */
|
/* first fragment of a long message */
|
||||||
descriptor->des_cbfunc = mca_pml_ob1_rndv_completion;
|
descriptor->des_cbfunc = mca_pml_ob1_rndv_completion;
|
||||||
@ -464,12 +543,10 @@ int mca_pml_ob1_send_request_start_prepare(
|
|||||||
descriptor->des_cbdata = sendreq;
|
descriptor->des_cbdata = sendreq;
|
||||||
|
|
||||||
/* send */
|
/* send */
|
||||||
#if MCA_PML_OB1_TIMESTAMPS
|
rc = mca_bml_base_send(
|
||||||
sendreq->t_start = get_profiler_timestamp();
|
bml_btl,
|
||||||
#endif
|
descriptor,
|
||||||
rc = mca_bml_base_send(bml_btl,
|
MCA_BTL_TAG_PML);
|
||||||
descriptor,
|
|
||||||
MCA_BTL_TAG_PML);
|
|
||||||
if(OMPI_SUCCESS != rc) {
|
if(OMPI_SUCCESS != rc) {
|
||||||
mca_bml_base_free(bml_btl, descriptor );
|
mca_bml_base_free(bml_btl, descriptor );
|
||||||
}
|
}
|
||||||
@ -552,7 +629,6 @@ int mca_pml_ob1_send_request_schedule(mca_pml_ob1_send_request_t* sendreq)
|
|||||||
hdr = (mca_pml_ob1_frag_hdr_t*)des->des_src->seg_addr.pval;
|
hdr = (mca_pml_ob1_frag_hdr_t*)des->des_src->seg_addr.pval;
|
||||||
hdr->hdr_common.hdr_flags = 0;
|
hdr->hdr_common.hdr_flags = 0;
|
||||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_FRAG;
|
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_FRAG;
|
||||||
hdr->hdr_frag_length = size;
|
|
||||||
hdr->hdr_frag_offset = sendreq->req_send_offset;
|
hdr->hdr_frag_offset = sendreq->req_send_offset;
|
||||||
hdr->hdr_src_req.pval = sendreq;
|
hdr->hdr_src_req.pval = sendreq;
|
||||||
hdr->hdr_dst_req = sendreq->req_recv;
|
hdr->hdr_dst_req = sendreq->req_recv;
|
||||||
@ -575,10 +651,6 @@ int mca_pml_ob1_send_request_schedule(mca_pml_ob1_send_request_t* sendreq)
|
|||||||
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#if MCA_PML_OB1_TIMESTAMPS
|
|
||||||
if(bytes_remaining == 0)
|
|
||||||
sendreq->t_scheduled = get_profiler_timestamp();
|
|
||||||
#endif
|
|
||||||
mca_pml_ob1_progress();
|
mca_pml_ob1_progress();
|
||||||
}
|
}
|
||||||
} while (OPAL_THREAD_ADD32(&sendreq->req_lock,-1) > 0);
|
} while (OPAL_THREAD_ADD32(&sendreq->req_lock,-1) > 0);
|
||||||
@ -631,13 +703,6 @@ static void mca_pml_ob1_put_completion(
|
|||||||
orte_errmgr.abort();
|
orte_errmgr.abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
#if MCA_PML_OB1_TIMESTAMPS
|
|
||||||
/* update statistics */
|
|
||||||
sendreq->t_fin[sendreq->t_fin_index++] = get_profiler_timestamp();
|
|
||||||
if(sendreq->t_fin_index >= MCA_PML_OB1_NUM_TSTAMPS)
|
|
||||||
sendreq->t_fin_index = 0;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* check for request completion */
|
/* check for request completion */
|
||||||
OPAL_THREAD_LOCK(&ompi_request_lock);
|
OPAL_THREAD_LOCK(&ompi_request_lock);
|
||||||
sendreq->req_bytes_delivered += frag->rdma_length;
|
sendreq->req_bytes_delivered += frag->rdma_length;
|
||||||
@ -667,10 +732,7 @@ static void mca_pml_ob1_put_completion(
|
|||||||
hdr = (mca_pml_ob1_fin_hdr_t*)fin->des_src->seg_addr.pval;
|
hdr = (mca_pml_ob1_fin_hdr_t*)fin->des_src->seg_addr.pval;
|
||||||
hdr->hdr_common.hdr_flags = 0;
|
hdr->hdr_common.hdr_flags = 0;
|
||||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_FIN;
|
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_FIN;
|
||||||
hdr->hdr_src = frag->rdma_hdr.hdr_rdma.hdr_src;
|
hdr->hdr_des = frag->rdma_hdr.hdr_rdma.hdr_des;
|
||||||
hdr->hdr_dst = frag->rdma_hdr.hdr_rdma.hdr_dst;
|
|
||||||
hdr->hdr_rdma_offset = frag->rdma_hdr.hdr_rdma.hdr_rdma_offset;
|
|
||||||
hdr->hdr_rdma_length = frag->rdma_length;
|
|
||||||
|
|
||||||
/* queue request */
|
/* queue request */
|
||||||
rc = mca_bml_base_send(
|
rc = mca_bml_base_send(
|
||||||
@ -752,12 +814,6 @@ void mca_pml_ob1_send_request_put(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if MCA_PML_OB1_TIMESTAMPS
|
|
||||||
sendreq->t_pin[sendreq->t_pin_index++] = get_profiler_timestamp();
|
|
||||||
if(sendreq->t_pin_index >= MCA_PML_OB1_NUM_TSTAMPS)
|
|
||||||
sendreq->t_pin_index = 0;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* setup descriptor */
|
/* setup descriptor */
|
||||||
ompi_convertor_set_position(&sendreq->req_send.req_convertor, &offset);
|
ompi_convertor_set_position(&sendreq->req_send.req_convertor, &offset);
|
||||||
|
|
||||||
@ -782,13 +838,6 @@ void mca_pml_ob1_send_request_put(
|
|||||||
des->des_cbfunc = mca_pml_ob1_put_completion;
|
des->des_cbfunc = mca_pml_ob1_put_completion;
|
||||||
des->des_cbdata = frag;
|
des->des_cbdata = frag;
|
||||||
|
|
||||||
#if MCA_PML_OB1_TIMESTAMPS
|
|
||||||
/* queue put */
|
|
||||||
sendreq->t_put[sendreq->t_put_index++] = get_profiler_timestamp();
|
|
||||||
if(sendreq->t_put_index >= MCA_PML_OB1_NUM_TSTAMPS)
|
|
||||||
sendreq->t_put_index = 0;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if(OMPI_SUCCESS != (rc = mca_bml_base_put(bml_btl, des))) {
|
if(OMPI_SUCCESS != (rc = mca_bml_base_put(bml_btl, des))) {
|
||||||
if(rc == OMPI_ERR_OUT_OF_RESOURCE) {
|
if(rc == OMPI_ERR_OUT_OF_RESOURCE) {
|
||||||
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
|
||||||
|
@ -45,19 +45,6 @@ struct mca_pml_ob1_send_request_t {
|
|||||||
size_t req_bytes_delivered;
|
size_t req_bytes_delivered;
|
||||||
size_t req_send_offset;
|
size_t req_send_offset;
|
||||||
size_t req_rdma_offset;
|
size_t req_rdma_offset;
|
||||||
|
|
||||||
#if MCA_PML_OB1_TIMESTAMPS
|
|
||||||
unsigned long long t_start;
|
|
||||||
unsigned long long t_send1;
|
|
||||||
unsigned long long t_send2;
|
|
||||||
unsigned long long t_scheduled;
|
|
||||||
unsigned long long t_pin[MCA_PML_OB1_NUM_TSTAMPS];
|
|
||||||
unsigned long long t_put[MCA_PML_OB1_NUM_TSTAMPS];
|
|
||||||
unsigned long long t_fin[MCA_PML_OB1_NUM_TSTAMPS];
|
|
||||||
int t_pin_index;
|
|
||||||
int t_put_index;
|
|
||||||
int t_fin_index;
|
|
||||||
#endif
|
|
||||||
};
|
};
|
||||||
typedef struct mca_pml_ob1_send_request_t mca_pml_ob1_send_request_t;
|
typedef struct mca_pml_ob1_send_request_t mca_pml_ob1_send_request_t;
|
||||||
|
|
||||||
@ -113,47 +100,6 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_send_request_t);
|
|||||||
* Diagnostic output to trace rdma protocol timing
|
* Diagnostic output to trace rdma protocol timing
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if MCA_PML_OB1_TIMESTAMPS
|
|
||||||
#define MCA_PML_OB1_SEND_REQUEST_TSTAMPS_DUMP(sendreq) \
|
|
||||||
{ \
|
|
||||||
int i; \
|
|
||||||
opal_output(0, "[%d,%d,%d] src start, %llu\n", \
|
|
||||||
ORTE_NAME_ARGS(orte_process_info.my_name), (sendreq)->t_start); \
|
|
||||||
\
|
|
||||||
opal_output(0, "[%d,%d,%d] src send start, %llu\n", \
|
|
||||||
ORTE_NAME_ARGS(orte_process_info.my_name), (sendreq)->t_send1); \
|
|
||||||
\
|
|
||||||
opal_output(0, "[%d,%d,%d] src scheduled, %llu\n", \
|
|
||||||
ORTE_NAME_ARGS(orte_process_info.my_name), (sendreq)->t_scheduled); \
|
|
||||||
\
|
|
||||||
opal_output(0, "[%d,%d,%d] src send complete, %llu\n", \
|
|
||||||
ORTE_NAME_ARGS(orte_process_info.my_name), (sendreq)->t_send2); \
|
|
||||||
\
|
|
||||||
for(i=0; i<(sendreq)->t_pin_index; i++) \
|
|
||||||
opal_output(0, "[%d,%d,%d] src pin, %llu %llu\n", \
|
|
||||||
ORTE_NAME_ARGS(orte_process_info.my_name), (sendreq)->t_pin[i], \
|
|
||||||
(sendreq)->t_put[i] - (sendreq)->t_pin[i]); \
|
|
||||||
for(i=0; i<(sendreq)->t_put_index; i++) \
|
|
||||||
opal_output(0, "[%d,%d,%d] src put, %llu %llu\n", \
|
|
||||||
ORTE_NAME_ARGS(orte_process_info.my_name), (sendreq)->t_put[i], \
|
|
||||||
(sendreq)->t_fin[i] - (sendreq)->t_put[i]); \
|
|
||||||
for(i=0; i<(sendreq)->t_fin_index; i++) \
|
|
||||||
opal_output(0, "[%d,%d,%d] src fin, %llu\n", \
|
|
||||||
ORTE_NAME_ARGS(orte_process_info.my_name), (sendreq)->t_fin[i]); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define MCA_PML_OB1_SEND_REQUEST_TSTAMPS_INIT(sendreq) \
|
|
||||||
{ \
|
|
||||||
sendreq->t_pin_index = 0; \
|
|
||||||
sendreq->t_put_index = 0; \
|
|
||||||
sendreq->t_fin_index = 0; \
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
#define MCA_PML_OB1_SEND_REQUEST_TSTAMPS_DUMP(sendreq)
|
|
||||||
#define MCA_PML_OB1_SEND_REQUEST_TSTAMPS_INIT(sendreq)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Start a send request.
|
* Start a send request.
|
||||||
@ -170,7 +116,6 @@ do {
|
|||||||
break; \
|
break; \
|
||||||
} \
|
} \
|
||||||
\
|
\
|
||||||
MCA_PML_OB1_SEND_REQUEST_TSTAMPS_INIT(sendreq); \
|
|
||||||
sendreq->req_lock = 0; \
|
sendreq->req_lock = 0; \
|
||||||
sendreq->req_pipeline_depth = 0; \
|
sendreq->req_pipeline_depth = 0; \
|
||||||
sendreq->req_bytes_delivered = 0; \
|
sendreq->req_bytes_delivered = 0; \
|
||||||
@ -205,12 +150,11 @@ do {
|
|||||||
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; \
|
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; \
|
||||||
hdr->hdr_common.hdr_flags = 0; \
|
hdr->hdr_common.hdr_flags = 0; \
|
||||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH; \
|
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH; \
|
||||||
hdr->hdr_match.hdr_contextid = sendreq->req_send.req_base.req_comm->c_contextid; \
|
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; \
|
||||||
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; \
|
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; \
|
||||||
hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer; \
|
hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer; \
|
||||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; \
|
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; \
|
||||||
hdr->hdr_match.hdr_msg_length = 0; \
|
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence; \
|
||||||
hdr->hdr_match.hdr_msg_seq = sendreq->req_send.req_base.req_sequence; \
|
|
||||||
\
|
\
|
||||||
/* short message */ \
|
/* short message */ \
|
||||||
descriptor->des_cbfunc = mca_pml_ob1_match_completion; \
|
descriptor->des_cbfunc = mca_pml_ob1_match_completion; \
|
||||||
@ -257,7 +201,6 @@ do {
|
|||||||
(sendreq)->req_send.req_base.req_ompi.req_status._count = \
|
(sendreq)->req_send.req_base.req_ompi.req_status._count = \
|
||||||
(sendreq)->req_send.req_bytes_packed; \
|
(sendreq)->req_send.req_bytes_packed; \
|
||||||
(sendreq)->req_send.req_base.req_ompi.req_complete = true; \
|
(sendreq)->req_send.req_base.req_ompi.req_complete = true; \
|
||||||
MCA_PML_OB1_SEND_REQUEST_TSTAMPS_DUMP(sendreq); \
|
|
||||||
if(ompi_request_waiting) { \
|
if(ompi_request_waiting) { \
|
||||||
opal_condition_broadcast(&ompi_request_cond); \
|
opal_condition_broadcast(&ompi_request_cond); \
|
||||||
} \
|
} \
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user