1
1

- added get based protocol (if supported by btl) for pre-registered memory

- removed 8 bytes from the majority of the pml headers 

This commit was SVN r6916.
Этот коммит содержится в:
Tim Woodall 2005-08-17 18:23:38 +00:00
родитель e8c103ac1f
Коммит f274f524ab
22 изменённых файлов: 520 добавлений и 335 удалений

Просмотреть файл

@ -247,6 +247,15 @@ static inline int mca_bml_base_put(mca_bml_base_btl_t* bml_btl, mca_btl_base_des
des); des);
} }
static inline int mca_bml_base_get(mca_bml_base_btl_t* bml_btl, mca_btl_base_descriptor_t* des) {
des->des_context = (void*) bml_btl;
return bml_btl->btl_get(
bml_btl->btl,
bml_btl->btl_endpoint,
des);
}
static inline void mca_bml_base_prepare_src(mca_bml_base_btl_t* bml_btl, static inline void mca_bml_base_prepare_src(mca_bml_base_btl_t* bml_btl,
mca_mpool_base_registration_t* reg, mca_mpool_base_registration_t* reg,
struct ompi_convertor_t* conv, struct ompi_convertor_t* conv,

Просмотреть файл

@ -353,7 +353,7 @@ int mca_bml_r2_add_procs(
} }
/* check flags - is rdma prefered */ /* check flags - is rdma prefered */
if(btl->btl_flags & MCA_BTL_FLAGS_RDMA && if(btl->btl_flags & (MCA_BTL_FLAGS_PUT|MCA_BTL_FLAGS_GET) &&
proc->proc_arch == ompi_proc_local_proc->proc_arch) { proc->proc_arch == ompi_proc_local_proc->proc_arch) {
mca_bml_base_btl_t* bml_btl_rdma = mca_bml_base_btl_array_insert(&bml_endpoint->btl_rdma); mca_bml_base_btl_t* bml_btl_rdma = mca_bml_base_btl_array_insert(&bml_endpoint->btl_rdma);
*bml_btl_rdma = *bml_btl; *bml_btl_rdma = *bml_btl;

Просмотреть файл

@ -134,7 +134,8 @@ typedef uint8_t mca_btl_base_tag_t;
/* prefered protocol */ /* prefered protocol */
#define MCA_BTL_FLAGS_SEND 0x1 #define MCA_BTL_FLAGS_SEND 0x1
#define MCA_BTL_FLAGS_RDMA 0x2 #define MCA_BTL_FLAGS_PUT 0x2
#define MCA_BTL_FLAGS_GET 0x4
/* btl can send directly from user buffer w/out registration */ /* btl can send directly from user buffer w/out registration */
#define MCA_BTL_FLAGS_SEND_INPLACE 0x10000000 #define MCA_BTL_FLAGS_SEND_INPLACE 0x10000000

Просмотреть файл

@ -151,7 +151,7 @@ int mca_btl_gm_component_open(void)
mca_btl_gm_param_register_int("max_rdma_size", 128*1024); mca_btl_gm_param_register_int("max_rdma_size", 128*1024);
#if OMPI_MCA_BTL_GM_SUPPORT_REGISTERING && OMPI_MCA_BTL_GM_HAVE_RDMA_PUT #if OMPI_MCA_BTL_GM_SUPPORT_REGISTERING && OMPI_MCA_BTL_GM_HAVE_RDMA_PUT
mca_btl_gm_module.super.btl_flags = mca_btl_gm_module.super.btl_flags =
mca_btl_gm_param_register_int("flags", MCA_BTL_FLAGS_RDMA); mca_btl_gm_param_register_int("flags", MCA_BTL_FLAGS_PUT);
#else #else
mca_btl_gm_module.super.btl_flags = MCA_BTL_FLAGS_SEND; mca_btl_gm_module.super.btl_flags = MCA_BTL_FLAGS_SEND;
#endif #endif

Просмотреть файл

@ -55,7 +55,7 @@ mca_btl_mvapi_module_t mca_btl_mvapi_module = {
mca_btl_mvapi_prepare_dst, mca_btl_mvapi_prepare_dst,
mca_btl_mvapi_send, mca_btl_mvapi_send,
mca_btl_mvapi_put, mca_btl_mvapi_put,
NULL /* get */ mca_btl_mvapi_get
} }
}; };
@ -801,6 +801,41 @@ int mca_btl_mvapi_put( mca_btl_base_module_t* btl,
} }
/*
* RDMA read remote buffer to local buffer address.
*/
int mca_btl_mvapi_get( mca_btl_base_module_t* btl,
mca_btl_base_endpoint_t* endpoint,
mca_btl_base_descriptor_t* descriptor)
{
mca_btl_mvapi_module_t* mvapi_btl = (mca_btl_mvapi_module_t*) btl;
mca_btl_mvapi_frag_t* frag = (mca_btl_mvapi_frag_t*) descriptor;
frag->endpoint = endpoint;
frag->sr_desc.opcode = VAPI_RDMA_READ;
frag->sr_desc.remote_qp = endpoint->rem_qp_num_low;
frag->sr_desc.remote_addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->base.des_src->seg_addr.pval;
frag->sr_desc.r_key = frag->base.des_src->seg_key.key32[0];
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->base.des_dst->seg_addr.pval;
frag->sg_entry.len = frag->base.des_dst->seg_len;
frag->ret = VAPI_post_sr(mvapi_btl->nic,
endpoint->lcl_qp_hndl_low,
&frag->sr_desc);
if(VAPI_OK != frag->ret){
return OMPI_ERROR;
}
if(mca_btl_mvapi_component.use_srq) {
MCA_BTL_MVAPI_POST_SRR_HIGH(mvapi_btl, 1);
MCA_BTL_MVAPI_POST_SRR_LOW(mvapi_btl, 1);
} else {
MCA_BTL_MVAPI_ENDPOINT_POST_RR_HIGH(endpoint, 1);
MCA_BTL_MVAPI_ENDPOINT_POST_RR_LOW(endpoint, 1);
}
return OMPI_SUCCESS;
}
/* /*

Просмотреть файл

@ -395,6 +395,24 @@ extern int mca_btl_mvapi_put(
struct mca_btl_base_descriptor_t* decriptor struct mca_btl_base_descriptor_t* decriptor
); );
/**
* PML->BTL Initiate a get of the specified size.
*
* @param btl (IN) BTL instance
* @param btl_base_peer (IN) BTL peer addressing
* @param send_request (IN/OUT) Send request (allocated by PML via mca_btl_base_request_alloc_fn_t)
* @param size (IN) Number of bytes PML is requesting BTL to deliver
* @param flags (IN) Flags that should be passed to the peer via the message header.
* @param request (OUT) OMPI_SUCCESS if the BTL was able to queue one or more fragments
*/
extern int mca_btl_mvapi_get(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* decriptor
);
/** /**
* Allocate a descriptor. * Allocate a descriptor.
* *

Просмотреть файл

@ -201,7 +201,7 @@ int mca_btl_mvapi_component_open(void)
1024*1024); 1024*1024);
mca_btl_mvapi_module.super.btl_flags = mca_btl_mvapi_module.super.btl_flags =
mca_btl_mvapi_param_register_int("flags", mca_btl_mvapi_param_register_int("flags",
MCA_BTL_FLAGS_RDMA); MCA_BTL_FLAGS_PUT|MCA_BTL_FLAGS_GET);
param = mca_base_param_find("mpi", NULL, "leave_pinned"); param = mca_base_param_find("mpi", NULL, "leave_pinned");
@ -574,6 +574,7 @@ int mca_btl_mvapi_component_progress()
BTL_ERROR(("Got an RDMA with Immediate data!, not supported!")); BTL_ERROR(("Got an RDMA with Immediate data!, not supported!"));
return OMPI_ERROR; return OMPI_ERROR;
case VAPI_CQE_SQ_RDMA_READ:
case VAPI_CQE_SQ_RDMA_WRITE: case VAPI_CQE_SQ_RDMA_WRITE:
case VAPI_CQE_SQ_SEND_DATA : case VAPI_CQE_SQ_SEND_DATA :
@ -632,6 +633,7 @@ int mca_btl_mvapi_component_progress()
BTL_ERROR(("Got an RDMA with Immediate data!, not supported!")); BTL_ERROR(("Got an RDMA with Immediate data!, not supported!"));
return OMPI_ERROR; return OMPI_ERROR;
case VAPI_CQE_SQ_RDMA_READ:
case VAPI_CQE_SQ_RDMA_WRITE: case VAPI_CQE_SQ_RDMA_WRITE:
case VAPI_CQE_SQ_SEND_DATA : case VAPI_CQE_SQ_SEND_DATA :

Просмотреть файл

@ -126,7 +126,7 @@ int mca_btl_mx_component_open(void)
mca_btl_mx_module.super.btl_max_rdma_size = mca_btl_mx_module.super.btl_max_rdma_size =
mca_btl_mx_param_register_int("max_rdma_size", 1024*1024); mca_btl_mx_param_register_int("max_rdma_size", 1024*1024);
mca_btl_mx_module.super.btl_flags = mca_btl_mx_module.super.btl_flags =
mca_btl_mx_param_register_int("flags", MCA_BTL_FLAGS_RDMA); mca_btl_mx_param_register_int("flags", MCA_BTL_FLAGS_PUT);
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }

Просмотреть файл

@ -204,7 +204,7 @@ int mca_btl_openib_component_open(void)
1024*1024); 1024*1024);
mca_btl_openib_module.super.btl_flags = mca_btl_openib_module.super.btl_flags =
mca_btl_openib_param_register_int("flags", mca_btl_openib_param_register_int("flags",
MCA_BTL_FLAGS_RDMA); MCA_BTL_FLAGS_PUT);
param = mca_base_param_find("mpi", NULL, "leave_pinned"); param = mca_base_param_find("mpi", NULL, "leave_pinned");

Просмотреть файл

@ -202,7 +202,7 @@ mca_btl_portals_component_open(void)
&dummy); &dummy);
mca_btl_portals_module.super.btl_bandwidth = dummy; mca_btl_portals_module.super.btl_bandwidth = dummy;
mca_btl_portals_module.super.btl_flags = MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE; mca_btl_portals_module.super.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND_INPLACE;
mca_btl_portals_module.portals_num_procs = 0; mca_btl_portals_module.portals_num_procs = 0;
bzero(&(mca_btl_portals_module.portals_reg), bzero(&(mca_btl_portals_module.portals_reg),

Просмотреть файл

@ -119,7 +119,7 @@ int mca_btl_self_component_open(void)
mca_btl_self.btl_exclusivity = mca_btl_self.btl_exclusivity =
mca_btl_self_param_register_int("exclusivity", 64*1024); mca_btl_self_param_register_int("exclusivity", 64*1024);
mca_btl_self.btl_flags = mca_btl_self.btl_flags =
mca_btl_self_param_register_int("flags", MCA_BTL_FLAGS_RDMA); mca_btl_self_param_register_int("flags", MCA_BTL_FLAGS_PUT);
/* initialize objects */ /* initialize objects */
OBJ_CONSTRUCT(&mca_btl_self_component.self_lock, opal_mutex_t); OBJ_CONSTRUCT(&mca_btl_self_component.self_lock, opal_mutex_t);

Просмотреть файл

@ -218,7 +218,7 @@ int mca_btl_tcp_component_open(void)
mca_btl_tcp_module.super.btl_max_rdma_size = mca_btl_tcp_module.super.btl_max_rdma_size =
mca_btl_tcp_param_register_int("max_rdma_size", INT_MAX); mca_btl_tcp_param_register_int("max_rdma_size", INT_MAX);
mca_btl_tcp_module.super.btl_flags = mca_btl_tcp_module.super.btl_flags =
mca_btl_tcp_param_register_int("flags", MCA_BTL_FLAGS_RDMA|MCA_BTL_FLAGS_SEND_INPLACE); mca_btl_tcp_param_register_int("flags", MCA_BTL_FLAGS_PUT|MCA_BTL_FLAGS_SEND_INPLACE);
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }

Просмотреть файл

@ -128,7 +128,7 @@ int mca_btl_template_component_open(void)
mca_btl_template_module.super.btl_max_rdma_size = mca_btl_template_module.super.btl_max_rdma_size =
mca_btl_template_param_register_int("max_rdma_size", 1024*1024); mca_btl_template_param_register_int("max_rdma_size", 1024*1024);
mca_btl_template_module.super.btl_flags = mca_btl_template_module.super.btl_flags =
mca_btl_template_param_register_int("flags", MCA_BTL_FLAGS_RDMA); mca_btl_template_param_register_int("flags", MCA_BTL_FLAGS_PUT);
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }

Просмотреть файл

@ -82,7 +82,7 @@ int mca_mpool_mvapi_register(mca_mpool_base_module_t* mpool,
vapi_reg->hndl = VAPI_INVAL_HNDL; vapi_reg->hndl = VAPI_INVAL_HNDL;
mr_in.acl = VAPI_EN_LOCAL_WRITE | VAPI_EN_REMOTE_WRITE; mr_in.acl = VAPI_EN_LOCAL_WRITE | VAPI_EN_REMOTE_WRITE | VAPI_EN_REMOTE_READ;
mr_in.l_key = 0; mr_in.l_key = 0;
mr_in.r_key = 0; mr_in.r_key = 0;
mr_in.pd_hndl = mpool_module->hca_pd.pd_tag; mr_in.pd_hndl = mpool_module->hca_pd.pd_tag;

Просмотреть файл

@ -20,7 +20,6 @@
#define MCA_PML_OB1_HEADER_H #define MCA_PML_OB1_HEADER_H
#include "ompi_config.h" #include "ompi_config.h"
#include "mca/ptl/ptl.h"
#ifdef HAVE_SYS_TYPES_H #ifdef HAVE_SYS_TYPES_H
#include <sys/types.h> #include <sys/types.h>
#endif #endif
@ -30,13 +29,14 @@
#define MCA_PML_OB1_HDR_TYPE_MATCH 1 #define MCA_PML_OB1_HDR_TYPE_MATCH 1
#define MCA_PML_OB1_HDR_TYPE_RNDV 2 #define MCA_PML_OB1_HDR_TYPE_RNDV 2
#define MCA_PML_OB1_HDR_TYPE_ACK 3 #define MCA_PML_OB1_HDR_TYPE_RGET 3
#define MCA_PML_OB1_HDR_TYPE_NACK 4 #define MCA_PML_OB1_HDR_TYPE_ACK 4
#define MCA_PML_OB1_HDR_TYPE_FRAG 5 #define MCA_PML_OB1_HDR_TYPE_NACK 5
#define MCA_PML_OB1_HDR_TYPE_GET 6 #define MCA_PML_OB1_HDR_TYPE_FRAG 6
#define MCA_PML_OB1_HDR_TYPE_PUT 7 #define MCA_PML_OB1_HDR_TYPE_GET 7
#define MCA_PML_OB1_HDR_TYPE_FIN 8 #define MCA_PML_OB1_HDR_TYPE_PUT 8
#define MCA_PML_OB1_HDR_TYPE_MAX 9 #define MCA_PML_OB1_HDR_TYPE_FIN 9
#define MCA_PML_OB1_HDR_TYPE_MAX 10
#define MCA_PML_OB1_HDR_FLAGS_ACK 1 /* is an ack required */ #define MCA_PML_OB1_HDR_FLAGS_ACK 1 /* is an ack required */
#define MCA_PML_OB1_HDR_FLAGS_NBO 2 /* is the hdr in network byte order */ #define MCA_PML_OB1_HDR_FLAGS_NBO 2 /* is the hdr in network byte order */
@ -93,7 +93,6 @@ struct mca_pml_ob1_common_hdr_t {
typedef struct mca_pml_ob1_common_hdr_t mca_pml_ob1_common_hdr_t; typedef struct mca_pml_ob1_common_hdr_t mca_pml_ob1_common_hdr_t;
#define MCA_PML_OB1_COMMON_HDR_NTOH(h) #define MCA_PML_OB1_COMMON_HDR_NTOH(h)
#define MCA_PML_OB1_COMMON_HDR_HTON(h) #define MCA_PML_OB1_COMMON_HDR_HTON(h)
/** /**
@ -102,35 +101,32 @@ typedef struct mca_pml_ob1_common_hdr_t mca_pml_ob1_common_hdr_t;
*/ */
struct mca_pml_ob1_match_hdr_t { struct mca_pml_ob1_match_hdr_t {
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */ mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
uint16_t hdr_contextid; /**< communicator index */ uint16_t hdr_ctx; /**< communicator index */
int32_t hdr_src; /**< source rank */ int32_t hdr_src; /**< source rank */
int32_t hdr_dst; /**< destination rank */ int32_t hdr_dst; /**< destination rank */
int32_t hdr_tag; /**< user tag */ int32_t hdr_tag; /**< user tag */
uint64_t hdr_msg_length; /**< message length */ uint16_t hdr_seq; /**< message sequence number */
uint16_t hdr_msg_seq; /**< message sequence number */
}; };
typedef struct mca_pml_ob1_match_hdr_t mca_pml_ob1_match_hdr_t; typedef struct mca_pml_ob1_match_hdr_t mca_pml_ob1_match_hdr_t;
#define MCA_PML_OB1_MATCH_HDR_NTOH(h) \ #define MCA_PML_OB1_MATCH_HDR_NTOH(h) \
do { \ do { \
MCA_PML_OB1_COMMON_HDR_NTOH((h).hdr_common); \ MCA_PML_OB1_COMMON_HDR_NTOH((h).hdr_common); \
(h).hdr_contextid = ntohs((h).hdr_contextid); \ (h).hdr_ctx = ntohs((h).hdr_ctx); \
(h).hdr_src = ntohl((h).hdr_src); \ (h).hdr_src = ntohl((h).hdr_src); \
(h).hdr_dst = ntohl((h).hdr_dst); \ (h).hdr_dst = ntohl((h).hdr_dst); \
(h).hdr_tag = ntohl((h).hdr_tag); \ (h).hdr_tag = ntohl((h).hdr_tag); \
(h).hdr_msg_length = ntoh64((h).hdr_msg_length); \ (h).hdr_seq = ntohs((h).hdr_seq); \
(h).hdr_msg_seq = ntohs((h).hdr_msg_seq); \
} while (0) } while (0)
#define MCA_PML_OB1_MATCH_HDR_HTON(h) \ #define MCA_PML_OB1_MATCH_HDR_HTON(h) \
do { \ do { \
MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \ MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \
(h).hdr_contextid = htons((h).hdr_contextid); \ (h).hdr_ctx = htons((h).hdr_ctx); \
(h).hdr_src = htonl((h).hdr_src); \ (h).hdr_src = htonl((h).hdr_src); \
(h).hdr_dst = htonl((h).hdr_dst); \ (h).hdr_dst = htonl((h).hdr_dst); \
(h).hdr_tag = htonl((h).hdr_tag); \ (h).hdr_tag = htonl((h).hdr_tag); \
(h).hdr_msg_length = hton64((h).hdr_msg_length); \ (h).hdr_seq = htons((h).hdr_seq); \
(h).hdr_msg_seq = htons((h).hdr_msg_seq); \
} while (0) } while (0)
/** /**
@ -140,7 +136,7 @@ typedef struct mca_pml_ob1_match_hdr_t mca_pml_ob1_match_hdr_t;
*/ */
struct mca_pml_ob1_rendezvous_hdr_t { struct mca_pml_ob1_rendezvous_hdr_t {
mca_pml_ob1_match_hdr_t hdr_match; mca_pml_ob1_match_hdr_t hdr_match;
uint64_t hdr_frag_length; /**< fragment length */ uint64_t hdr_msg_length; /**< message length */
ompi_ptr_t hdr_src_req; /**< pointer to source request - returned in ack */ ompi_ptr_t hdr_src_req; /**< pointer to source request - returned in ack */
}; };
typedef struct mca_pml_ob1_rendezvous_hdr_t mca_pml_ob1_rendezvous_hdr_t; typedef struct mca_pml_ob1_rendezvous_hdr_t mca_pml_ob1_rendezvous_hdr_t;
@ -148,21 +144,31 @@ typedef struct mca_pml_ob1_rendezvous_hdr_t mca_pml_ob1_rendezvous_hdr_t;
#define MCA_PML_OB1_RNDV_HDR_NTOH(h) \ #define MCA_PML_OB1_RNDV_HDR_NTOH(h) \
do { \ do { \
MCA_PML_OB1_MATCH_HDR_NTOH((h).hdr_match); \ MCA_PML_OB1_MATCH_HDR_NTOH((h).hdr_match); \
(h).hdr_frag_length = ntoh64((h).hdr_frag_length); \ (h).hdr_msg_length = ntoh64((h).hdr_msg_length); \
} while (0) } while (0)
#define MCA_PML_OB1_RNDV_HDR_HTON(h) \ #define MCA_PML_OB1_RNDV_HDR_HTON(h) \
do { \ do { \
MCA_PML_OB1_MATCH_HDR_HTON((h).hdr_match); \ MCA_PML_OB1_MATCH_HDR_HTON((h).hdr_match); \
(h).hdr_frag_length = hton64((h).hdr_frag_length); \ (h).hdr_msg_length = hton64((h).hdr_msg_length); \
} while (0) } while (0)
/**
* Header definition for a combined rdma rendezvous/get
*/
struct mca_pml_ob1_rget_hdr_t {
mca_pml_ob1_rendezvous_hdr_t hdr_rndv;
ompi_ptr_t hdr_des; /**< source descriptor */
uint32_t hdr_seg_cnt; /**< number of segments for rdma */
mca_btl_base_segment_t hdr_segs[1]; /**< list of segments for rdma */
};
typedef struct mca_pml_ob1_rget_hdr_t mca_pml_ob1_rget_hdr_t;
/** /**
* Header for subsequent fragments. * Header for subsequent fragments.
*/ */
struct mca_pml_ob1_frag_hdr_t { struct mca_pml_ob1_frag_hdr_t {
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */ mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
uint64_t hdr_frag_length; /**< fragment length */
uint64_t hdr_frag_offset; /**< offset into message */ uint64_t hdr_frag_offset; /**< offset into message */
ompi_ptr_t hdr_src_req; /**< pointer to source request */ ompi_ptr_t hdr_src_req; /**< pointer to source request */
ompi_ptr_t hdr_dst_req; /**< pointer to matched receive */ ompi_ptr_t hdr_dst_req; /**< pointer to matched receive */
@ -172,14 +178,12 @@ typedef struct mca_pml_ob1_frag_hdr_t mca_pml_ob1_frag_hdr_t;
#define MCA_PML_OB1_FRAG_HDR_NTOH(h) \ #define MCA_PML_OB1_FRAG_HDR_NTOH(h) \
do { \ do { \
MCA_PML_OB1_COMMON_HDR_NTOH((h).hdr_common); \ MCA_PML_OB1_COMMON_HDR_NTOH((h).hdr_common); \
(h).hdr_frag_length = ntoh64((h).hdr_frag_length); \
(h).hdr_frag_offset = ntoh64((h).hdr_frag_offset); \ (h).hdr_frag_offset = ntoh64((h).hdr_frag_offset); \
} while (0) } while (0)
#define MCA_PML_OB1_FRAG_HDR_HTON(h) \ #define MCA_PML_OB1_FRAG_HDR_HTON(h) \
do { \ do { \
MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \ MCA_PML_OB1_COMMON_HDR_HTON((h).hdr_common); \
(h).hdr_frag_length = hton64((h).hdr_frag_length); \
(h).hdr_frag_offset = hton64((h).hdr_frag_offset); \ (h).hdr_frag_offset = hton64((h).hdr_frag_offset); \
} while (0) } while (0)
@ -214,8 +218,8 @@ typedef struct mca_pml_ob1_ack_hdr_t mca_pml_ob1_ack_hdr_t;
struct mca_pml_ob1_rdma_hdr_t { struct mca_pml_ob1_rdma_hdr_t {
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */ mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
ompi_ptr_t hdr_src; /**< source request/descriptor */ ompi_ptr_t hdr_req; /**< destination request */
ompi_ptr_t hdr_dst; /**< receive request/descriptor */ ompi_ptr_t hdr_des; /**< source descriptor */
uint64_t hdr_rdma_offset; /**< current offset into user buffer */ uint64_t hdr_rdma_offset; /**< current offset into user buffer */
uint32_t hdr_seg_cnt; /**< number of segments for rdma */ uint32_t hdr_seg_cnt; /**< number of segments for rdma */
mca_btl_base_segment_t hdr_segs[1]; /**< list of segments for rdma */ mca_btl_base_segment_t hdr_segs[1]; /**< list of segments for rdma */
@ -228,10 +232,7 @@ typedef struct mca_pml_ob1_rdma_hdr_t mca_pml_ob1_rdma_hdr_t;
struct mca_pml_ob1_fin_hdr_t { struct mca_pml_ob1_fin_hdr_t {
mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */ mca_pml_ob1_common_hdr_t hdr_common; /**< common attributes */
ompi_ptr_t hdr_src; /**< source request/descriptor */ ompi_ptr_t hdr_des; /**< completed descriptor */
ompi_ptr_t hdr_dst; /**< receive request/descriptor */
uint64_t hdr_rdma_offset; /**< data offset */
uint64_t hdr_rdma_length; /**< number of segments for rdma */
}; };
typedef struct mca_pml_ob1_fin_hdr_t mca_pml_ob1_fin_hdr_t; typedef struct mca_pml_ob1_fin_hdr_t mca_pml_ob1_fin_hdr_t;
@ -242,6 +243,7 @@ union mca_pml_ob1_hdr_t {
mca_pml_ob1_common_hdr_t hdr_common; mca_pml_ob1_common_hdr_t hdr_common;
mca_pml_ob1_match_hdr_t hdr_match; mca_pml_ob1_match_hdr_t hdr_match;
mca_pml_ob1_rendezvous_hdr_t hdr_rndv; mca_pml_ob1_rendezvous_hdr_t hdr_rndv;
mca_pml_ob1_rget_hdr_t hdr_rget;
mca_pml_ob1_frag_hdr_t hdr_frag; mca_pml_ob1_frag_hdr_t hdr_frag;
mca_pml_ob1_ack_hdr_t hdr_ack; mca_pml_ob1_ack_hdr_t hdr_ack;
mca_pml_ob1_rdma_hdr_t hdr_rdma; mca_pml_ob1_rdma_hdr_t hdr_rdma;

Просмотреть файл

@ -27,6 +27,7 @@ typedef enum {
MCA_PML_OB1_RMDA_INIT, MCA_PML_OB1_RMDA_INIT,
MCA_PML_OB1_RDMA_PREPARE, MCA_PML_OB1_RDMA_PREPARE,
MCA_PML_OB1_RDMA_PUT, MCA_PML_OB1_RDMA_PUT,
MCA_PML_OB1_RDMA_GET,
MCA_PML_OB1_RDMA_FIN MCA_PML_OB1_RDMA_FIN
} mca_pml_ob1_rdma_state_t; } mca_pml_ob1_rdma_state_t;
@ -37,7 +38,7 @@ struct mca_pml_ob1_rdma_frag_t {
mca_pml_ob1_rdma_state_t rdma_state; mca_pml_ob1_rdma_state_t rdma_state;
size_t rdma_length; size_t rdma_length;
mca_btl_base_segment_t rdma_segs[MCA_BTL_DES_MAX_SEGMENTS]; mca_btl_base_segment_t rdma_segs[MCA_BTL_DES_MAX_SEGMENTS];
struct mca_pml_ob1_send_request_t* rdma_req; void *rdma_req;
struct mca_bml_base_endpoint_t* rdma_ep; struct mca_bml_base_endpoint_t* rdma_ep;
}; };
typedef struct mca_pml_ob1_rdma_frag_t mca_pml_ob1_rdma_frag_t; typedef struct mca_pml_ob1_rdma_frag_t mca_pml_ob1_rdma_frag_t;

Просмотреть файл

@ -69,6 +69,7 @@ void mca_pml_ob1_recv_frag_callback(
switch(hdr->hdr_common.hdr_type) { switch(hdr->hdr_common.hdr_type) {
case MCA_PML_OB1_HDR_TYPE_MATCH: case MCA_PML_OB1_HDR_TYPE_MATCH:
case MCA_PML_OB1_HDR_TYPE_RNDV: case MCA_PML_OB1_HDR_TYPE_RNDV:
case MCA_PML_OB1_HDR_TYPE_RGET:
{ {
mca_pml_ob1_recv_frag_match(btl, &hdr->hdr_match, segments,des->des_dst_cnt); mca_pml_ob1_recv_frag_match(btl, &hdr->hdr_match, segments,des->des_dst_cnt);
break; break;
@ -79,9 +80,6 @@ void mca_pml_ob1_recv_frag_callback(
hdr->hdr_ack.hdr_src_req.pval; hdr->hdr_ack.hdr_src_req.pval;
sendreq->req_recv = hdr->hdr_ack.hdr_dst_req; sendreq->req_recv = hdr->hdr_ack.hdr_dst_req;
sendreq->req_rdma_offset = hdr->hdr_ack.hdr_rdma_offset; sendreq->req_rdma_offset = hdr->hdr_ack.hdr_rdma_offset;
#if MCA_PML_OB1_TIMESTAMPS
sendreq->t_send1 = get_profiler_timestamp();
#endif
MCA_PML_OB1_SEND_REQUEST_ADVANCE(sendreq); MCA_PML_OB1_SEND_REQUEST_ADVANCE(sendreq);
break; break;
} }
@ -89,32 +87,21 @@ void mca_pml_ob1_recv_frag_callback(
{ {
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*) mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)
hdr->hdr_frag.hdr_dst_req.pval; hdr->hdr_frag.hdr_dst_req.pval;
mca_pml_ob1_recv_request_progress(recvreq,segments,des->des_dst_cnt); mca_pml_ob1_recv_request_progress(recvreq,btl,segments,des->des_dst_cnt);
break; break;
} }
case MCA_PML_OB1_HDR_TYPE_PUT: case MCA_PML_OB1_HDR_TYPE_PUT:
{ {
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*) mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)
hdr->hdr_rdma.hdr_src.pval; hdr->hdr_rdma.hdr_req.pval;
mca_pml_ob1_send_request_put(sendreq,btl,&hdr->hdr_rdma); mca_pml_ob1_send_request_put(sendreq,btl,&hdr->hdr_rdma);
break; break;
} }
case MCA_PML_OB1_HDR_TYPE_FIN: case MCA_PML_OB1_HDR_TYPE_FIN:
{ {
mca_btl_base_descriptor_t* dst = (mca_btl_base_descriptor_t*) mca_btl_base_descriptor_t* rdma = (mca_btl_base_descriptor_t*)
hdr->hdr_fin.hdr_dst.pval; hdr->hdr_fin.hdr_des.pval;
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)dst->des_cbdata; rdma->des_cbfunc(btl, NULL, rdma, OMPI_SUCCESS);
#if MCA_PML_OB1_TIMESTAMPS
recvreq->fin1[recvreq->fin_index] = get_profiler_timestamp();
btl->btl_free(btl,dst);
recvreq->fin2[recvreq->fin_index] = get_profiler_timestamp();
recvreq->fin_index++;
mca_pml_ob1_recv_request_progress(recvreq,segments,des->des_dst_cnt);
#else
mca_pml_ob1_recv_request_progress(recvreq,segments,des->des_dst_cnt);
btl->btl_free(btl,dst);
#endif
break; break;
} }
default: default:
@ -426,11 +413,11 @@ int mca_pml_ob1_recv_frag_match(
int rc; int rc;
/* communicator pointer */ /* communicator pointer */
comm_ptr=ompi_comm_lookup(hdr->hdr_contextid); comm_ptr=ompi_comm_lookup(hdr->hdr_ctx);
comm=(mca_pml_ob1_comm_t *)comm_ptr->c_pml_comm; comm=(mca_pml_ob1_comm_t *)comm_ptr->c_pml_comm;
/* source sequence number */ /* source sequence number */
frag_msg_seq = hdr->hdr_msg_seq; frag_msg_seq = hdr->hdr_seq;
proc = comm->procs + hdr->hdr_src; proc = comm->procs + hdr->hdr_src;
/* get next expected message sequence number - if threaded /* get next expected message sequence number - if threaded
@ -497,7 +484,7 @@ int mca_pml_ob1_recv_frag_match(
OPAL_THREAD_UNLOCK(&comm->matching_lock); OPAL_THREAD_UNLOCK(&comm->matching_lock);
return rc; return rc;
} }
MCA_PML_OB1_RECV_FRAG_INIT(frag,hdr,segments,num_segments); MCA_PML_OB1_RECV_FRAG_INIT(frag,hdr,segments,num_segments,btl);
opal_list_append( &proc->unexpected_frags, (opal_list_item_t *)frag ); opal_list_append( &proc->unexpected_frags, (opal_list_item_t *)frag );
} }
@ -522,7 +509,7 @@ int mca_pml_ob1_recv_frag_match(
OPAL_THREAD_UNLOCK(&comm->matching_lock); OPAL_THREAD_UNLOCK(&comm->matching_lock);
return rc; return rc;
} }
MCA_PML_OB1_RECV_FRAG_INIT(frag,hdr,segments,num_segments); MCA_PML_OB1_RECV_FRAG_INIT(frag,hdr,segments,num_segments,btl);
opal_list_append(&proc->frags_cant_match, (opal_list_item_t *)frag); opal_list_append(&proc->frags_cant_match, (opal_list_item_t *)frag);
} }
@ -531,15 +518,13 @@ int mca_pml_ob1_recv_frag_match(
/* release matching lock before processing fragment */ /* release matching lock before processing fragment */
if(match != NULL) { if(match != NULL) {
MCA_PML_OB1_RECV_REQUEST_MATCHED(match, hdr); mca_pml_ob1_recv_request_progress(match,btl,segments,num_segments);
mca_pml_ob1_recv_request_progress(match,segments,num_segments);
} }
if(additional_match) { if(additional_match) {
opal_list_item_t* item; opal_list_item_t* item;
while(NULL != (item = opal_list_remove_first(&additional_matches))) { while(NULL != (item = opal_list_remove_first(&additional_matches))) {
mca_pml_ob1_recv_frag_t* frag = (mca_pml_ob1_recv_frag_t*)item; mca_pml_ob1_recv_frag_t* frag = (mca_pml_ob1_recv_frag_t*)item;
MCA_PML_OB1_RECV_REQUEST_MATCHED(frag->request, hdr); mca_pml_ob1_recv_request_progress(frag->request,frag->btl,frag->segments,frag->num_segments);
mca_pml_ob1_recv_request_progress(frag->request,frag->segments,frag->num_segments);
MCA_PML_OB1_RECV_FRAG_RETURN(frag); MCA_PML_OB1_RECV_FRAG_RETURN(frag);
} }
} }
@ -601,7 +586,7 @@ static bool mca_pml_ob1_check_cantmatch_for_match(
/* /*
* If the message has the next expected seq from that proc... * If the message has the next expected seq from that proc...
*/ */
frag_seq=frag->hdr.hdr_match.hdr_msg_seq; frag_seq=frag->hdr.hdr_match.hdr_seq;
if (frag_seq == next_msg_seq_expected) { if (frag_seq == next_msg_seq_expected) {
mca_pml_ob1_match_hdr_t* hdr = &frag->hdr.hdr_match; mca_pml_ob1_match_hdr_t* hdr = &frag->hdr.hdr_match;

Просмотреть файл

@ -38,6 +38,7 @@ struct mca_pml_ob1_recv_frag_t {
mca_pml_ob1_hdr_t hdr; mca_pml_ob1_hdr_t hdr;
struct mca_pml_ob1_recv_request_t* request; struct mca_pml_ob1_recv_request_t* request;
size_t num_segments; size_t num_segments;
mca_btl_base_module_t* btl;
mca_btl_base_segment_t segments[MCA_BTL_DES_MAX_SEGMENTS]; mca_btl_base_segment_t segments[MCA_BTL_DES_MAX_SEGMENTS];
mca_pml_ob1_buffer_t* buffers[MCA_BTL_DES_MAX_SEGMENTS]; mca_pml_ob1_buffer_t* buffers[MCA_BTL_DES_MAX_SEGMENTS];
}; };
@ -54,13 +55,14 @@ do { \
} while(0) } while(0)
#define MCA_PML_OB1_RECV_FRAG_INIT(frag, hdr,segs,cnt) \ #define MCA_PML_OB1_RECV_FRAG_INIT(frag, hdr,segs,cnt,btl) \
do { \ do { \
size_t i; \ size_t i; \
mca_btl_base_segment_t* macro_segments = frag->segments; \ mca_btl_base_segment_t* macro_segments = frag->segments; \
mca_pml_ob1_buffer_t** buffers = frag->buffers; \ mca_pml_ob1_buffer_t** buffers = frag->buffers; \
\ \
/* init recv_frag */ \ /* init recv_frag */ \
frag->btl = btl; \
frag->hdr = *(mca_pml_ob1_hdr_t*)hdr; \ frag->hdr = *(mca_pml_ob1_hdr_t*)hdr; \
frag->num_segments = cnt; \ frag->num_segments = cnt; \
/* copy over data */ \ /* copy over data */ \

Просмотреть файл

@ -24,7 +24,9 @@
#include "pml_ob1_recvreq.h" #include "pml_ob1_recvreq.h"
#include "pml_ob1_recvfrag.h" #include "pml_ob1_recvfrag.h"
#include "pml_ob1_sendreq.h" #include "pml_ob1_sendreq.h"
#include "pml_ob1_rdmafrag.h"
#include "mca/bml/base/base.h" #include "mca/bml/base/base.h"
#include "mca/errmgr/errmgr.h"
static mca_pml_ob1_recv_frag_t* mca_pml_ob1_recv_request_match_specific_proc( static mca_pml_ob1_recv_frag_t* mca_pml_ob1_recv_request_match_specific_proc(
mca_pml_ob1_recv_request_t* request, mca_pml_ob1_comm_proc_t* proc); mca_pml_ob1_recv_request_t* request, mca_pml_ob1_comm_proc_t* proc);
@ -102,7 +104,7 @@ OBJ_CLASS_INSTANCE(
* Release resources. * Release resources.
*/ */
static void mca_pml_ob1_send_ctl_complete( static void mca_pml_ob1_ctl_completion(
mca_btl_base_module_t* btl, mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep, struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* des, struct mca_btl_base_descriptor_t* des,
@ -112,6 +114,21 @@ static void mca_pml_ob1_send_ctl_complete(
MCA_BML_BASE_BTL_DES_RETURN(bml_btl, des); MCA_BML_BASE_BTL_DES_RETURN(bml_btl, des);
} }
/*
* Put operation has completed remotely - update request status
*/
static void mca_pml_ob1_put_completion(
mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* des,
int status)
{
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
mca_pml_ob1_recv_request_t* recvreq = (mca_pml_ob1_recv_request_t*)des->des_cbdata;
mca_pml_ob1_recv_request_progress(recvreq,btl,des->des_dst,des->des_dst_cnt);
MCA_BML_BASE_BTL_DES_RETURN(bml_btl, des);
}
/* /*
* *
@ -143,7 +160,7 @@ static void mca_pml_ob1_recv_request_ack(
* registered. if registered on both sides - do one rdma for * registered. if registered on both sides - do one rdma for
* the entire message. * the entire message.
*/ */
if(hdr->hdr_match.hdr_msg_length > 0) { if(hdr->hdr_msg_length > 0) {
recvreq->req_chunk = mca_mpool_base_find(recvreq->req_recv.req_base.req_addr); recvreq->req_chunk = mca_mpool_base_find(recvreq->req_recv.req_base.req_addr);
if( NULL != recvreq->req_chunk && if( NULL != recvreq->req_chunk &&
@ -170,8 +187,8 @@ static void mca_pml_ob1_recv_request_ack(
/* use convertor to figure out the rdma offset for this request */ /* use convertor to figure out the rdma offset for this request */
recvreq->req_rdma_offset = bml_endpoint->btl_rdma_offset; recvreq->req_rdma_offset = bml_endpoint->btl_rdma_offset;
if(recvreq->req_rdma_offset < hdr->hdr_frag_length) { if(recvreq->req_rdma_offset < recvreq->req_bytes_received) {
recvreq->req_rdma_offset = hdr->hdr_frag_length; recvreq->req_rdma_offset = recvreq->req_bytes_received;
} }
ompi_convertor_set_position( ompi_convertor_set_position(
&recvreq->req_recv.req_convertor, &recvreq->req_recv.req_convertor,
@ -182,7 +199,7 @@ static void mca_pml_ob1_recv_request_ack(
/* start rdma at the current fragment offset - no need to send an ack in this case */ /* start rdma at the current fragment offset - no need to send an ack in this case */
} else { } else {
recvreq->req_rdma_offset = hdr->hdr_frag_length; recvreq->req_rdma_offset = recvreq->req_bytes_received;
return; return;
} }
@ -207,8 +224,7 @@ static void mca_pml_ob1_recv_request_ack(
/* initialize descriptor */ /* initialize descriptor */
des->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY; des->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
des->des_cbfunc = mca_pml_ob1_send_ctl_complete; des->des_cbfunc = mca_pml_ob1_ctl_completion;
des->des_cbdata = bml_btl;
rc = mca_bml_base_send(bml_btl, des, MCA_BTL_TAG_PML); rc = mca_bml_base_send(bml_btl, des, MCA_BTL_TAG_PML);
if(rc != OMPI_SUCCESS) { if(rc != OMPI_SUCCESS) {
@ -227,6 +243,166 @@ retry:
} }
/**
* Return resources used by the RDMA
*/
static void mca_pml_ob1_fin_completion(
mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* des,
int status)
{
mca_pml_ob1_rdma_frag_t* frag = (mca_pml_ob1_rdma_frag_t*)des->des_cbdata;
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
MCA_BML_BASE_BTL_DES_RETURN(bml_btl, des);
}
/*
*
*/
static void mca_pml_ob1_rget_completion(
mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* des,
int status)
{
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*)des->des_context;
mca_pml_ob1_rdma_frag_t* frag = (mca_pml_ob1_rdma_frag_t*)des->des_cbdata;
mca_pml_ob1_recv_request_t* recvreq = frag->rdma_req;
mca_pml_ob1_fin_hdr_t* hdr;
mca_btl_base_descriptor_t *fin;
int rc;
/* is receive request complete */
OPAL_THREAD_LOCK(&ompi_request_lock);
recvreq->req_bytes_received += frag->rdma_length;
recvreq->req_bytes_delivered += frag->rdma_length;
if(recvreq->req_bytes_received == recvreq->req_recv.req_bytes_packed) {
recvreq->req_recv.req_base.req_ompi.req_status._count = recvreq->req_bytes_delivered;
recvreq->req_recv.req_base.req_pml_complete = true;
recvreq->req_recv.req_base.req_ompi.req_complete = true;
if(ompi_request_waiting) {
opal_condition_broadcast(&ompi_request_cond);
}
}
OPAL_THREAD_UNLOCK(&ompi_request_lock);
/* return descriptor */
MCA_BML_BASE_BTL_DES_RETURN(bml_btl, des);
/* queue up a fin control message to source */
MCA_BML_BASE_BTL_DES_ALLOC(bml_btl, fin, sizeof(mca_pml_ob1_fin_hdr_t));
if(NULL == fin) {
opal_output(0, "[%s:%d] unable to allocate descriptor", __FILE__,__LINE__);
orte_errmgr.abort();
}
fin->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
fin->des_cbfunc = mca_pml_ob1_fin_completion;
fin->des_cbdata = frag;
/* fill in header */
hdr = (mca_pml_ob1_fin_hdr_t*)fin->des_src->seg_addr.pval;
hdr->hdr_common.hdr_flags = 0;
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_FIN;
hdr->hdr_des = frag->rdma_hdr.hdr_rget.hdr_des;
/* queue request */
rc = mca_bml_base_send(
bml_btl,
fin,
MCA_BTL_TAG_PML
);
if(OMPI_SUCCESS != rc) {
opal_output(0, "[%s:%d] unable to queue fin", __FILE__,__LINE__);
orte_errmgr.abort();
}
}
/*
*
*/
static void mca_pml_ob1_recv_request_rget(
mca_pml_ob1_recv_request_t* recvreq,
mca_btl_base_module_t* btl,
mca_pml_ob1_rget_hdr_t* hdr)
{
mca_bml_base_endpoint_t* bml_endpoint = NULL;
mca_bml_base_btl_t* bml_btl;
mca_pml_ob1_rdma_frag_t* frag;
mca_btl_base_descriptor_t* descriptor;
mca_mpool_base_registration_t* registration = NULL;
size_t i, size = 0;
int rc;
/* lookup bml datastructures */
bml_endpoint = (mca_bml_base_endpoint_t*) recvreq->req_proc->proc_pml;
bml_btl = mca_bml_base_btl_array_find(&bml_endpoint->btl_eager, btl);
if(NULL == bml_btl) {
opal_output(0, "[%s:%d] invalid bml for rdma get", __FILE__, __LINE__);
orte_errmgr.abort();
}
/* allocate/initialize a fragment */
MCA_PML_OB1_RDMA_FRAG_ALLOC(frag,rc);
for(i=0; i<hdr->hdr_seg_cnt; i++) {
size += hdr->hdr_segs[i].seg_len;
frag->rdma_segs[i] = hdr->hdr_segs[i];
}
frag->rdma_hdr.hdr_rget = *hdr;
frag->rdma_req = recvreq;
frag->rdma_ep = bml_endpoint;
frag->rdma_state = MCA_PML_OB1_RDMA_PREPARE;
/* is there an existing registration for this btl */
recvreq->req_chunk = mca_mpool_base_find(recvreq->req_recv.req_base.req_addr);
if( NULL != recvreq->req_chunk ) {
struct mca_mpool_base_reg_mpool_t *reg = recvreq->req_chunk->mpools;
while(reg->mpool != NULL) {
if(NULL != mca_bml_base_btl_array_find(&bml_endpoint->btl_rdma,
(mca_btl_base_module_t*) reg->user_data)) {
recvreq->req_mpool = reg;
registration = reg->mpool_registration;
break;
}
reg++;
}
}
/* prepare descriptor */
mca_bml_base_prepare_dst(
bml_btl,
registration,
&recvreq->req_recv.req_convertor,
0,
&size,
&descriptor);
if(NULL == descriptor) {
opal_output(0, "[%s:%d] unable to allocate descriptor for rdma get", __FILE__, __LINE__);
orte_errmgr.abort();
}
frag->rdma_length = size;
descriptor->des_src = frag->rdma_segs;
descriptor->des_src_cnt = hdr->hdr_seg_cnt;
descriptor->des_cbdata = frag;
descriptor->des_cbfunc = mca_pml_ob1_rget_completion;
/* queue up get request */
rc = mca_bml_base_get(bml_btl,descriptor);
if(rc != OMPI_SUCCESS) {
opal_output(0, "[%s:%d] rdma get failed with error %d", __FILE__, __LINE__, rc);
orte_errmgr.abort();
}
}
/* /*
* Update the recv request status to reflect the number of bytes * Update the recv request status to reflect the number of bytes
* received and actually delivered to the application. * received and actually delivered to the application.
@ -234,6 +410,7 @@ retry:
void mca_pml_ob1_recv_request_progress( void mca_pml_ob1_recv_request_progress(
mca_pml_ob1_recv_request_t* recvreq, mca_pml_ob1_recv_request_t* recvreq,
mca_btl_base_module_t* btl,
mca_btl_base_segment_t* segments, mca_btl_base_segment_t* segments,
size_t num_segments) size_t num_segments)
{ {
@ -242,11 +419,17 @@ void mca_pml_ob1_recv_request_progress(
size_t data_offset = 0; size_t data_offset = 0;
mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval; mca_pml_ob1_hdr_t* hdr = (mca_pml_ob1_hdr_t*)segments->seg_addr.pval;
bool schedule = false; bool schedule = false;
size_t i;
for(i=0; i<num_segments; i++)
bytes_received += segments[i].seg_len;
switch(hdr->hdr_common.hdr_type) { switch(hdr->hdr_common.hdr_type) {
case MCA_PML_OB1_HDR_TYPE_MATCH: case MCA_PML_OB1_HDR_TYPE_MATCH:
bytes_received = hdr->hdr_match.hdr_msg_length; bytes_received -= sizeof(mca_pml_ob1_match_hdr_t);
recvreq->req_recv.req_bytes_packed = bytes_received;
MCA_PML_OB1_RECV_REQUEST_MATCHED(recvreq,&hdr->hdr_match);
MCA_PML_OB1_RECV_REQUEST_UNPACK( MCA_PML_OB1_RECV_REQUEST_UNPACK(
recvreq, recvreq,
segments, segments,
@ -259,12 +442,11 @@ void mca_pml_ob1_recv_request_progress(
case MCA_PML_OB1_HDR_TYPE_RNDV: case MCA_PML_OB1_HDR_TYPE_RNDV:
#if MCA_PML_OB1_TIMESTAMPS bytes_received -= sizeof(mca_pml_ob1_rendezvous_hdr_t);
recvreq->ack = get_profiler_timestamp(); recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length;
#endif
recvreq->req_send = hdr->hdr_rndv.hdr_src_req; recvreq->req_send = hdr->hdr_rndv.hdr_src_req;
MCA_PML_OB1_RECV_REQUEST_MATCHED(recvreq,&hdr->hdr_match);
mca_pml_ob1_recv_request_ack(recvreq, &hdr->hdr_rndv); mca_pml_ob1_recv_request_ack(recvreq, &hdr->hdr_rndv);
bytes_received = hdr->hdr_rndv.hdr_frag_length;
MCA_PML_OB1_RECV_REQUEST_UNPACK( MCA_PML_OB1_RECV_REQUEST_UNPACK(
recvreq, recvreq,
segments, segments,
@ -275,9 +457,16 @@ void mca_pml_ob1_recv_request_progress(
bytes_delivered); bytes_delivered);
break; break;
case MCA_PML_OB1_HDR_TYPE_RGET:
recvreq->req_recv.req_bytes_packed = hdr->hdr_rndv.hdr_msg_length;
MCA_PML_OB1_RECV_REQUEST_MATCHED(recvreq,&hdr->hdr_match);
mca_pml_ob1_recv_request_rget(recvreq, btl, &hdr->hdr_rget);
return;
case MCA_PML_OB1_HDR_TYPE_FRAG: case MCA_PML_OB1_HDR_TYPE_FRAG:
bytes_received = hdr->hdr_frag.hdr_frag_length; bytes_received -= sizeof(mca_pml_ob1_frag_hdr_t);
data_offset = hdr->hdr_frag.hdr_frag_offset; data_offset = hdr->hdr_frag.hdr_frag_offset;
MCA_PML_OB1_RECV_REQUEST_UNPACK( MCA_PML_OB1_RECV_REQUEST_UNPACK(
recvreq, recvreq,
@ -291,7 +480,6 @@ void mca_pml_ob1_recv_request_progress(
case MCA_PML_OB1_HDR_TYPE_FIN: case MCA_PML_OB1_HDR_TYPE_FIN:
bytes_delivered = bytes_received = hdr->hdr_fin.hdr_rdma_length;
OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth,-1); OPAL_THREAD_ADD_SIZE_T(&recvreq->req_pipeline_depth,-1);
break; break;
@ -310,21 +498,6 @@ void mca_pml_ob1_recv_request_progress(
recvreq->req_recv.req_base.req_pml_complete = true; recvreq->req_recv.req_base.req_pml_complete = true;
recvreq->req_recv.req_base.req_ompi.req_complete = true; recvreq->req_recv.req_base.req_ompi.req_complete = true;
#if MCA_PML_OB1_TIMESTAMPS
if(recvreq->req_bytes_received > 0) {
int i;
opal_output(0, "[%d,%d,%d] dst ack: %llu",
ORTE_NAME_ARGS(orte_process_info.my_name), recvreq->ack);
for(i=0; i<recvreq->pin_index; i++) {
opal_output(0, "[%d,%d,%d] dst pin, %llu %llu",
ORTE_NAME_ARGS(orte_process_info.my_name), recvreq->pin1[i], recvreq->pin2[i] - recvreq->pin1[i]);
}
for(i=0; i<recvreq->fin_index; i++) {
opal_output(0, "[%d,%d,%d] dst fin: %llu %llu",
ORTE_NAME_ARGS(orte_process_info.my_name), recvreq->fin1[i], recvreq->fin2[i] - recvreq->fin1[i]);
}
}
#endif
if(ompi_request_waiting) { if(ompi_request_waiting) {
opal_condition_broadcast(&ompi_request_cond); opal_condition_broadcast(&ompi_request_cond);
} }
@ -409,9 +582,6 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
/* prepare a descriptor for RDMA */ /* prepare a descriptor for RDMA */
ompi_convertor_set_position(&recvreq->req_recv.req_convertor, &recvreq->req_rdma_offset); ompi_convertor_set_position(&recvreq->req_recv.req_convertor, &recvreq->req_rdma_offset);
#if MCA_PML_OB1_TIMESTAMPS
recvreq->pin1[recvreq->pin_index] = get_profiler_timestamp();
#endif
mca_bml_base_prepare_dst( mca_bml_base_prepare_dst(
bml_btl, bml_btl,
reg, reg,
@ -419,16 +589,13 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
0, 0,
&size, &size,
&dst); &dst);
#if MCA_PML_OB1_TIMESTAMPS
recvreq->pin2[recvreq->pin_index] = get_profiler_timestamp();
recvreq->pin_index++;
#endif
if(dst == NULL) { if(dst == NULL) {
OPAL_THREAD_LOCK(&mca_pml_ob1.lock); OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
opal_list_append(&mca_pml_ob1.recv_pending, (opal_list_item_t*)recvreq); opal_list_append(&mca_pml_ob1.recv_pending, (opal_list_item_t*)recvreq);
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
break; break;
} }
dst->des_cbfunc = mca_pml_ob1_put_completion;
dst->des_cbdata = recvreq; dst->des_cbdata = recvreq;
/* prepare a descriptor for rdma control message */ /* prepare a descriptor for rdma control message */
@ -446,15 +613,14 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
break; break;
} }
ctl->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY; ctl->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
ctl->des_cbfunc = mca_pml_ob1_send_ctl_complete; ctl->des_cbfunc = mca_pml_ob1_ctl_completion;
ctl->des_cbdata = bml_btl;
/* fill in rdma header */ /* fill in rdma header */
hdr = (mca_pml_ob1_rdma_hdr_t*)ctl->des_src->seg_addr.pval; hdr = (mca_pml_ob1_rdma_hdr_t*)ctl->des_src->seg_addr.pval;
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_PUT; hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_PUT;
hdr->hdr_common.hdr_flags = 0; hdr->hdr_common.hdr_flags = 0;
hdr->hdr_src = recvreq->req_send; hdr->hdr_req = recvreq->req_send;
hdr->hdr_dst.pval = dst; hdr->hdr_des.pval = dst;
hdr->hdr_rdma_offset = recvreq->req_rdma_offset; hdr->hdr_rdma_offset = recvreq->req_rdma_offset;
hdr->hdr_seg_cnt = dst->des_dst_cnt; hdr->hdr_seg_cnt = dst->des_dst_cnt;
memcpy(hdr->hdr_segs, dst->des_dst, dst->des_dst_cnt * sizeof(mca_btl_base_segment_t)); memcpy(hdr->hdr_segs, dst->des_dst, dst->des_dst_cnt * sizeof(mca_btl_base_segment_t));
@ -506,7 +672,7 @@ void mca_pml_ob1_recv_request_match_specific(mca_pml_ob1_recv_request_t* request
(frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) { (frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) {
OPAL_THREAD_UNLOCK(&comm->matching_lock); OPAL_THREAD_UNLOCK(&comm->matching_lock);
mca_pml_ob1_recv_request_progress(request,frag->segments,frag->num_segments); mca_pml_ob1_recv_request_progress(request,frag->btl,frag->segments,frag->num_segments);
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) || if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) { (MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
MCA_PML_OB1_RECV_FRAG_RETURN(frag); MCA_PML_OB1_RECV_FRAG_RETURN(frag);
@ -560,7 +726,7 @@ void mca_pml_ob1_recv_request_match_wild(mca_pml_ob1_recv_request_t* request)
if ((frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) { if ((frag = mca_pml_ob1_recv_request_match_specific_proc(request, proc)) != NULL) {
OPAL_THREAD_UNLOCK(&comm->matching_lock); OPAL_THREAD_UNLOCK(&comm->matching_lock);
mca_pml_ob1_recv_request_progress(request,frag->segments,frag->num_segments); mca_pml_ob1_recv_request_progress(request,frag->btl,frag->segments,frag->num_segments);
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) || if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) { (MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
MCA_PML_OB1_RECV_FRAG_RETURN(frag); MCA_PML_OB1_RECV_FRAG_RETURN(frag);
@ -620,7 +786,6 @@ static mca_pml_ob1_recv_frag_t* mca_pml_ob1_recv_request_match_specific_proc(
} }
return NULL; return NULL;
find_fragment: find_fragment:
MCA_PML_OB1_RECV_REQUEST_MATCHED(request, hdr);
if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) || if( !((MCA_PML_REQUEST_IPROBE == request->req_recv.req_base.req_type) ||
(MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) { (MCA_PML_REQUEST_PROBE == request->req_recv.req_base.req_type)) ) {
opal_list_remove_item(unexpected_frags, (opal_list_item_t*)frag); opal_list_remove_item(unexpected_frags, (opal_list_item_t*)frag);

Просмотреть файл

@ -28,12 +28,6 @@
extern "C" { extern "C" {
#endif #endif
struct mca_pml_ob1_registration_t {
struct mca_pml_ob1_endpoint_t* endpoint;
struct mca_mpool_base_registration_t* registration;
};
typedef struct mca_pml_ob1_registration_t mca_pml_ob1_registration_t;
struct mca_pml_ob1_recv_request_t { struct mca_pml_ob1_recv_request_t {
mca_pml_base_recv_request_t req_recv; mca_pml_base_recv_request_t req_recv;
@ -46,18 +40,6 @@ struct mca_pml_ob1_recv_request_t {
size_t req_bytes_received; size_t req_bytes_received;
size_t req_bytes_delivered; size_t req_bytes_delivered;
size_t req_rdma_offset; size_t req_rdma_offset;
mca_pml_ob1_registration_t req_reg[MCA_MPOOL_BASE_MAX_REG];
size_t req_num_reg;
#if MCA_PML_OB1_TIMESTAMPS
unsigned long long ack;
unsigned long long pin1[MCA_PML_OB1_NUM_TSTAMPS];
unsigned long long pin2[MCA_PML_OB1_NUM_TSTAMPS];
unsigned long long fin1[MCA_PML_OB1_NUM_TSTAMPS];
unsigned long long fin2[MCA_PML_OB1_NUM_TSTAMPS];
int pin_index;
int fin_index;
#endif
}; };
typedef struct mca_pml_ob1_recv_request_t mca_pml_ob1_recv_request_t; typedef struct mca_pml_ob1_recv_request_t mca_pml_ob1_recv_request_t;
@ -154,13 +136,6 @@ void mca_pml_ob1_recv_request_match_specific(mca_pml_ob1_recv_request_t* request
/** /**
* Initialize diagnostic code for tracing rdma protocol timing * Initialize diagnostic code for tracing rdma protocol timing
*/ */
#if MCA_PML_OB1_TIMESTAMPS
#define MCA_PML_OB1_RECV_REQUEST_TSTAMPS_INIT(recvreq) \
(request)->fin_index = 0; \
(request)->pin_index = 0;
#else
#define MCA_PML_OB1_RECV_REQUEST_TSTAMPS_INIT(recvreq)
#endif
/** /**
* Start an initialized request. * Start an initialized request.
@ -180,7 +155,6 @@ do {
(request)->req_recv.req_base.req_pml_complete = false; \ (request)->req_recv.req_base.req_pml_complete = false; \
(request)->req_recv.req_base.req_ompi.req_complete = false; \ (request)->req_recv.req_base.req_ompi.req_complete = false; \
(request)->req_recv.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \ (request)->req_recv.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \
MCA_PML_OB1_RECV_REQUEST_TSTAMPS_INIT(request); \
\ \
/* always set the req_status.MPI_TAG to ANY_TAG before starting the \ /* always set the req_status.MPI_TAG to ANY_TAG before starting the \
* request. This field is used if cancelled to find out if the request \ * request. This field is used if cancelled to find out if the request \
@ -207,10 +181,8 @@ do {
request, \ request, \
hdr) \ hdr) \
do { \ do { \
(request)->req_recv.req_bytes_packed = (hdr)->hdr_msg_length; \
(request)->req_recv.req_base.req_ompi.req_status.MPI_TAG = (hdr)->hdr_tag; \ (request)->req_recv.req_base.req_ompi.req_status.MPI_TAG = (hdr)->hdr_tag; \
(request)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = (hdr)->hdr_src; \ (request)->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = (hdr)->hdr_src; \
\
if((request)->req_recv.req_bytes_packed != 0) { \ if((request)->req_recv.req_bytes_packed != 0) { \
ompi_proc_t *proc = \ ompi_proc_t *proc = \
ompi_comm_peer_lookup( \ ompi_comm_peer_lookup( \
@ -280,6 +252,7 @@ do {
void mca_pml_ob1_recv_request_progress( void mca_pml_ob1_recv_request_progress(
mca_pml_ob1_recv_request_t* req, mca_pml_ob1_recv_request_t* req,
struct mca_btl_base_module_t* btl,
mca_btl_base_segment_t* segments, mca_btl_base_segment_t* segments,
size_t num_segments); size_t num_segments);

Просмотреть файл

@ -121,13 +121,9 @@ static void mca_pml_ob1_rndv_completion(
} }
/* count bytes of user data actually delivered */ /* count bytes of user data actually delivered */
OPAL_THREAD_LOCK(&ompi_request_lock);
MCA_PML_OB1_SEND_REQUEST_SET_BYTES_DELIVERED(sendreq,descriptor,sizeof(mca_pml_ob1_rendezvous_hdr_t)); MCA_PML_OB1_SEND_REQUEST_SET_BYTES_DELIVERED(sendreq,descriptor,sizeof(mca_pml_ob1_rendezvous_hdr_t));
OPAL_THREAD_UNLOCK(&ompi_request_lock);
#if MCA_PML_OB1_TIMESTAMPS
if(sendreq->req_pipeline_depth == 1) {
sendreq->t_send2 = get_profiler_timestamp();
}
#endif
/* return the descriptor */ /* return the descriptor */
mca_bml_base_free(bml_btl, descriptor); mca_bml_base_free(bml_btl, descriptor);
@ -140,6 +136,46 @@ static void mca_pml_ob1_rndv_completion(
} }
/**
* Completion of a get request.
*/
static void mca_pml_ob1_rget_completion(
mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* des,
int status)
{
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t*)des->des_cbdata;
/* count bytes of user data actually delivered and check for request completion */
OPAL_THREAD_LOCK(&ompi_request_lock);
MCA_PML_OB1_SEND_REQUEST_SET_BYTES_DELIVERED(sendreq,des,0);
if (sendreq->req_bytes_delivered == sendreq->req_send.req_bytes_packed) {
MCA_PML_OB1_SEND_REQUEST_COMPLETE(sendreq);
}
OPAL_THREAD_LOCK(&ompi_request_lock);
/* release resources */
btl->btl_free(btl,des);
}
/**
* Completion of a control message - return resources.
*/
static void mca_pml_ob1_ctl_completion(
mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* descriptor,
int status)
{
/* return the descriptor */
mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) descriptor->des_context;
mca_bml_base_free(bml_btl, descriptor);
}
/** /**
* Completion of additional fragments of a large message - may need * Completion of additional fragments of a large message - may need
* to schedule additional fragments. * to schedule additional fragments.
@ -162,20 +198,11 @@ static void mca_pml_ob1_frag_completion(
orte_errmgr.abort(); orte_errmgr.abort();
} }
/* count bytes of user data actually delivered */
MCA_PML_OB1_SEND_REQUEST_SET_BYTES_DELIVERED(sendreq,descriptor,sizeof(mca_pml_ob1_frag_hdr_t));
#if MCA_PML_OB1_TIMESTAMPS
if(sendreq->req_pipeline_depth == 1) {
sendreq->t_send2 = get_profiler_timestamp();
}
#endif
/* return the descriptor */
mca_bml_base_free(bml_btl, descriptor);
/* check for request completion */ /* check for request completion */
OPAL_THREAD_LOCK(&ompi_request_lock); OPAL_THREAD_LOCK(&ompi_request_lock);
/* count bytes of user data actually delivered */
MCA_PML_OB1_SEND_REQUEST_SET_BYTES_DELIVERED(sendreq,descriptor,sizeof(mca_pml_ob1_frag_hdr_t));
if (OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth,-1) == 0 && if (OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth,-1) == 0 &&
sendreq->req_bytes_delivered == sendreq->req_send.req_bytes_packed) { sendreq->req_bytes_delivered == sendreq->req_send.req_bytes_packed) {
MCA_PML_OB1_SEND_REQUEST_COMPLETE(sendreq); MCA_PML_OB1_SEND_REQUEST_COMPLETE(sendreq);
@ -188,6 +215,9 @@ static void mca_pml_ob1_frag_completion(
mca_pml_ob1_send_request_schedule(sendreq); mca_pml_ob1_send_request_schedule(sendreq);
} }
/* return the descriptor */
mca_bml_base_free(bml_btl, descriptor);
/* check for pending requests */ /* check for pending requests */
MCA_PML_OB1_SEND_REQUEST_PROCESS_PENDING(); MCA_PML_OB1_SEND_REQUEST_PROCESS_PENDING();
} }
@ -250,12 +280,11 @@ int mca_pml_ob1_send_request_start_copy(
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
hdr->hdr_common.hdr_flags = 0; hdr->hdr_common.hdr_flags = 0;
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH; hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH;
hdr->hdr_match.hdr_contextid = sendreq->req_send.req_base.req_comm->c_contextid; hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer; hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer;
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
hdr->hdr_match.hdr_msg_length = sendreq->req_send.req_bytes_packed; hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
hdr->hdr_match.hdr_msg_seq = sendreq->req_send.req_base.req_sequence;
/* update lengths */ /* update lengths */
segment->seg_len = sizeof(mca_pml_ob1_match_hdr_t) + max_data; segment->seg_len = sizeof(mca_pml_ob1_match_hdr_t) + max_data;
@ -270,6 +299,30 @@ int mca_pml_ob1_send_request_start_copy(
/* rendezvous header is required */ /* rendezvous header is required */
} else { } else {
struct mca_mpool_base_registration_t* registration = NULL;
struct mca_bml_base_btl_t* reg_btl = NULL;
bool do_rdma;
/* check to see if memory is registered */
sendreq->req_chunk = mca_mpool_base_find(sendreq->req_send.req_addr);
if(NULL != sendreq->req_chunk) {
struct mca_mpool_base_reg_mpool_t *reg = sendreq->req_chunk->mpools;
while(reg->mpool != NULL) {
if(NULL != (reg_btl = mca_bml_base_btl_array_find(&sendreq->bml_endpoint->btl_rdma,
(mca_btl_base_module_t*) reg->user_data))) {
registration = reg->mpool_registration;
break;
}
reg++;
}
}
/* if the data is already registed or leave_pinned is set - then we
* will attempt to do an rdma of the entire message.
*/
do_rdma = (reg_btl != NULL || mca_pml_ob1.leave_pinned) &&
ompi_convertor_need_buffers(&sendreq->req_send.req_convertor) == false;
if(do_rdma == false || (reg_btl != NULL && (reg_btl->btl_flags & MCA_BTL_FLAGS_GET) == 0)) {
int32_t free_after; int32_t free_after;
/* allocate space for hdr + first fragment */ /* allocate space for hdr + first fragment */
@ -279,15 +332,10 @@ int mca_pml_ob1_send_request_start_copy(
} }
segment = descriptor->des_src; segment = descriptor->des_src;
/* check to see if memory is registered */ /* rdma put is supported rather than rdma get */
sendreq->req_chunk = mca_mpool_base_find(sendreq->req_send.req_addr); if(do_rdma) {
max_data = 0; /* dont eager send any data */
} else {
/* if the buffer is not pinned and leave pinned is false we eagerly send
data to cover the cost of pinning the recv buffers on the peer */
if(size && NULL == sendreq->req_chunk && !mca_pml_ob1.leave_pinned) {
/* pack the data into the supplied buffer */
iov.iov_base = (void*)((unsigned char*)segment->seg_addr.pval + iov.iov_base = (void*)((unsigned char*)segment->seg_addr.pval +
sizeof(mca_pml_ob1_rendezvous_hdr_t)); sizeof(mca_pml_ob1_rendezvous_hdr_t));
iov.iov_len = size; iov.iov_len = size;
@ -302,28 +350,19 @@ int mca_pml_ob1_send_request_start_copy(
mca_bml_base_free(bml_btl , descriptor); mca_bml_base_free(bml_btl , descriptor);
return rc; return rc;
} }
if(max_data != size) {
opal_output(0, "[%s:%d] max_data (%lu) != size (%lu)\n", __FILE__,__LINE__,max_data,size);
}
}
/* if the buffer is pinned or leave pinned is true we do not eagerly send
any data */
else {
max_data = 0;
} }
/* build hdr */ /* build hdr */
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
hdr->hdr_common.hdr_flags = (sendreq->req_chunk != NULL ? MCA_PML_OB1_HDR_FLAGS_PIN : 0); hdr->hdr_common.hdr_flags = (sendreq->req_chunk != NULL ? MCA_PML_OB1_HDR_FLAGS_PIN : 0);
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RNDV; hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RNDV;
hdr->hdr_match.hdr_contextid = sendreq->req_send.req_base.req_comm->c_contextid; hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer; hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer;
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
hdr->hdr_match.hdr_msg_length = sendreq->req_send.req_bytes_packed; hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
hdr->hdr_match.hdr_msg_seq = sendreq->req_send.req_base.req_sequence; hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
hdr->hdr_rndv.hdr_src_req.lval = 0; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */
hdr->hdr_rndv.hdr_src_req.pval = sendreq; hdr->hdr_rndv.hdr_src_req.pval = sendreq;
hdr->hdr_rndv.hdr_frag_length = max_data;
/* update lengths with number of bytes actually packed */ /* update lengths with number of bytes actually packed */
segment->seg_len = sizeof(mca_pml_ob1_rendezvous_hdr_t) + max_data; segment->seg_len = sizeof(mca_pml_ob1_rendezvous_hdr_t) + max_data;
@ -331,17 +370,60 @@ int mca_pml_ob1_send_request_start_copy(
/* first fragment of a long message */ /* first fragment of a long message */
descriptor->des_cbfunc = mca_pml_ob1_rndv_completion; descriptor->des_cbfunc = mca_pml_ob1_rndv_completion;
/* the buffer is already pinned */
} else {
mca_btl_base_descriptor_t* src;
size_t i;
/* prepare descriptor */
if(reg_btl != NULL) {
bml_btl = reg_btl;
}
mca_bml_base_prepare_src(
bml_btl,
registration,
&sendreq->req_send.req_convertor,
0,
&sendreq->req_send.req_bytes_packed,
&src);
if(NULL == src) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
src->des_cbfunc = mca_pml_ob1_rget_completion;
src->des_cbdata = sendreq;
/* allocate space for hdr + segment list */
mca_bml_base_alloc(bml_btl, &descriptor,
sizeof(mca_pml_ob1_rget_hdr_t) + (sizeof(mca_btl_base_segment_t)*(src->des_src_cnt-1)));
if(NULL == descriptor) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
segment = descriptor->des_src;
/* build match header */
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
hdr->hdr_common.hdr_flags = (sendreq->req_chunk != NULL ? MCA_PML_OB1_HDR_FLAGS_PIN : 0);
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RGET;
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer;
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
hdr->hdr_rndv.hdr_src_req.pval = sendreq;
hdr->hdr_rget.hdr_des.pval = src;
hdr->hdr_rget.hdr_seg_cnt = src->des_src_cnt;
for(i=0; i<src->des_src_cnt; i++)
hdr->hdr_rget.hdr_segs[i] = src->des_src[i];
descriptor->des_cbfunc = mca_pml_ob1_ctl_completion;
}
} }
descriptor->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY; descriptor->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
descriptor->des_cbdata = sendreq; descriptor->des_cbdata = sendreq;
/* send */ /* send */
#if MCA_PML_OB1_TIMESTAMPS rc = mca_bml_base_send(bml_btl, descriptor, MCA_BTL_TAG_PML);
sendreq->t_start = get_profiler_timestamp();
#endif
rc = mca_bml_base_send(bml_btl,
descriptor,
MCA_BTL_TAG_PML);
if(OMPI_SUCCESS != rc) { if(OMPI_SUCCESS != rc) {
mca_bml_base_free(bml_btl, descriptor ); mca_bml_base_free(bml_btl, descriptor );
} }
@ -392,12 +474,11 @@ int mca_pml_ob1_send_request_start_prepare(
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
hdr->hdr_common.hdr_flags = 0; hdr->hdr_common.hdr_flags = 0;
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH; hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH;
hdr->hdr_match.hdr_contextid = sendreq->req_send.req_base.req_comm->c_contextid; hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer; hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer;
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
hdr->hdr_match.hdr_msg_length = sendreq->req_send.req_bytes_packed; hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
hdr->hdr_match.hdr_msg_seq = sendreq->req_send.req_base.req_sequence;
/* short message */ /* short message */
descriptor->des_cbfunc = mca_pml_ob1_match_completion; descriptor->des_cbfunc = mca_pml_ob1_match_completion;
@ -446,15 +527,13 @@ int mca_pml_ob1_send_request_start_prepare(
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
hdr->hdr_common.hdr_flags = (sendreq->req_chunk != NULL ? MCA_PML_OB1_HDR_FLAGS_PIN : 0); hdr->hdr_common.hdr_flags = (sendreq->req_chunk != NULL ? MCA_PML_OB1_HDR_FLAGS_PIN : 0);
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RNDV; hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RNDV;
hdr->hdr_match.hdr_contextid = sendreq->req_send.req_base.req_comm->c_contextid; hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer; hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer;
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
hdr->hdr_match.hdr_msg_length = sendreq->req_send.req_bytes_packed; hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
hdr->hdr_match.hdr_msg_seq = sendreq->req_send.req_base.req_sequence; hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
hdr->hdr_rndv.hdr_src_req.lval = 0; /* for VALGRIND/PURIFY - REPLACE WITH MACRO */
hdr->hdr_rndv.hdr_src_req.pval = sendreq; hdr->hdr_rndv.hdr_src_req.pval = sendreq;
hdr->hdr_rndv.hdr_frag_length = size;
/* first fragment of a long message */ /* first fragment of a long message */
descriptor->des_cbfunc = mca_pml_ob1_rndv_completion; descriptor->des_cbfunc = mca_pml_ob1_rndv_completion;
@ -464,10 +543,8 @@ int mca_pml_ob1_send_request_start_prepare(
descriptor->des_cbdata = sendreq; descriptor->des_cbdata = sendreq;
/* send */ /* send */
#if MCA_PML_OB1_TIMESTAMPS rc = mca_bml_base_send(
sendreq->t_start = get_profiler_timestamp(); bml_btl,
#endif
rc = mca_bml_base_send(bml_btl,
descriptor, descriptor,
MCA_BTL_TAG_PML); MCA_BTL_TAG_PML);
if(OMPI_SUCCESS != rc) { if(OMPI_SUCCESS != rc) {
@ -552,7 +629,6 @@ int mca_pml_ob1_send_request_schedule(mca_pml_ob1_send_request_t* sendreq)
hdr = (mca_pml_ob1_frag_hdr_t*)des->des_src->seg_addr.pval; hdr = (mca_pml_ob1_frag_hdr_t*)des->des_src->seg_addr.pval;
hdr->hdr_common.hdr_flags = 0; hdr->hdr_common.hdr_flags = 0;
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_FRAG; hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_FRAG;
hdr->hdr_frag_length = size;
hdr->hdr_frag_offset = sendreq->req_send_offset; hdr->hdr_frag_offset = sendreq->req_send_offset;
hdr->hdr_src_req.pval = sendreq; hdr->hdr_src_req.pval = sendreq;
hdr->hdr_dst_req = sendreq->req_recv; hdr->hdr_dst_req = sendreq->req_recv;
@ -575,10 +651,6 @@ int mca_pml_ob1_send_request_schedule(mca_pml_ob1_send_request_t* sendreq)
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
break; break;
} }
#if MCA_PML_OB1_TIMESTAMPS
if(bytes_remaining == 0)
sendreq->t_scheduled = get_profiler_timestamp();
#endif
mca_pml_ob1_progress(); mca_pml_ob1_progress();
} }
} while (OPAL_THREAD_ADD32(&sendreq->req_lock,-1) > 0); } while (OPAL_THREAD_ADD32(&sendreq->req_lock,-1) > 0);
@ -631,13 +703,6 @@ static void mca_pml_ob1_put_completion(
orte_errmgr.abort(); orte_errmgr.abort();
} }
#if MCA_PML_OB1_TIMESTAMPS
/* update statistics */
sendreq->t_fin[sendreq->t_fin_index++] = get_profiler_timestamp();
if(sendreq->t_fin_index >= MCA_PML_OB1_NUM_TSTAMPS)
sendreq->t_fin_index = 0;
#endif
/* check for request completion */ /* check for request completion */
OPAL_THREAD_LOCK(&ompi_request_lock); OPAL_THREAD_LOCK(&ompi_request_lock);
sendreq->req_bytes_delivered += frag->rdma_length; sendreq->req_bytes_delivered += frag->rdma_length;
@ -667,10 +732,7 @@ static void mca_pml_ob1_put_completion(
hdr = (mca_pml_ob1_fin_hdr_t*)fin->des_src->seg_addr.pval; hdr = (mca_pml_ob1_fin_hdr_t*)fin->des_src->seg_addr.pval;
hdr->hdr_common.hdr_flags = 0; hdr->hdr_common.hdr_flags = 0;
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_FIN; hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_FIN;
hdr->hdr_src = frag->rdma_hdr.hdr_rdma.hdr_src; hdr->hdr_des = frag->rdma_hdr.hdr_rdma.hdr_des;
hdr->hdr_dst = frag->rdma_hdr.hdr_rdma.hdr_dst;
hdr->hdr_rdma_offset = frag->rdma_hdr.hdr_rdma.hdr_rdma_offset;
hdr->hdr_rdma_length = frag->rdma_length;
/* queue request */ /* queue request */
rc = mca_bml_base_send( rc = mca_bml_base_send(
@ -752,12 +814,6 @@ void mca_pml_ob1_send_request_put(
} }
} }
#if MCA_PML_OB1_TIMESTAMPS
sendreq->t_pin[sendreq->t_pin_index++] = get_profiler_timestamp();
if(sendreq->t_pin_index >= MCA_PML_OB1_NUM_TSTAMPS)
sendreq->t_pin_index = 0;
#endif
/* setup descriptor */ /* setup descriptor */
ompi_convertor_set_position(&sendreq->req_send.req_convertor, &offset); ompi_convertor_set_position(&sendreq->req_send.req_convertor, &offset);
@ -782,13 +838,6 @@ void mca_pml_ob1_send_request_put(
des->des_cbfunc = mca_pml_ob1_put_completion; des->des_cbfunc = mca_pml_ob1_put_completion;
des->des_cbdata = frag; des->des_cbdata = frag;
#if MCA_PML_OB1_TIMESTAMPS
/* queue put */
sendreq->t_put[sendreq->t_put_index++] = get_profiler_timestamp();
if(sendreq->t_put_index >= MCA_PML_OB1_NUM_TSTAMPS)
sendreq->t_put_index = 0;
#endif
if(OMPI_SUCCESS != (rc = mca_bml_base_put(bml_btl, des))) { if(OMPI_SUCCESS != (rc = mca_bml_base_put(bml_btl, des))) {
if(rc == OMPI_ERR_OUT_OF_RESOURCE) { if(rc == OMPI_ERR_OUT_OF_RESOURCE) {
OPAL_THREAD_LOCK(&mca_pml_ob1.lock); OPAL_THREAD_LOCK(&mca_pml_ob1.lock);

Просмотреть файл

@ -45,19 +45,6 @@ struct mca_pml_ob1_send_request_t {
size_t req_bytes_delivered; size_t req_bytes_delivered;
size_t req_send_offset; size_t req_send_offset;
size_t req_rdma_offset; size_t req_rdma_offset;
#if MCA_PML_OB1_TIMESTAMPS
unsigned long long t_start;
unsigned long long t_send1;
unsigned long long t_send2;
unsigned long long t_scheduled;
unsigned long long t_pin[MCA_PML_OB1_NUM_TSTAMPS];
unsigned long long t_put[MCA_PML_OB1_NUM_TSTAMPS];
unsigned long long t_fin[MCA_PML_OB1_NUM_TSTAMPS];
int t_pin_index;
int t_put_index;
int t_fin_index;
#endif
}; };
typedef struct mca_pml_ob1_send_request_t mca_pml_ob1_send_request_t; typedef struct mca_pml_ob1_send_request_t mca_pml_ob1_send_request_t;
@ -113,47 +100,6 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_send_request_t);
* Diagnostic output to trace rdma protocol timing * Diagnostic output to trace rdma protocol timing
*/ */
#if MCA_PML_OB1_TIMESTAMPS
#define MCA_PML_OB1_SEND_REQUEST_TSTAMPS_DUMP(sendreq) \
{ \
int i; \
opal_output(0, "[%d,%d,%d] src start, %llu\n", \
ORTE_NAME_ARGS(orte_process_info.my_name), (sendreq)->t_start); \
\
opal_output(0, "[%d,%d,%d] src send start, %llu\n", \
ORTE_NAME_ARGS(orte_process_info.my_name), (sendreq)->t_send1); \
\
opal_output(0, "[%d,%d,%d] src scheduled, %llu\n", \
ORTE_NAME_ARGS(orte_process_info.my_name), (sendreq)->t_scheduled); \
\
opal_output(0, "[%d,%d,%d] src send complete, %llu\n", \
ORTE_NAME_ARGS(orte_process_info.my_name), (sendreq)->t_send2); \
\
for(i=0; i<(sendreq)->t_pin_index; i++) \
opal_output(0, "[%d,%d,%d] src pin, %llu %llu\n", \
ORTE_NAME_ARGS(orte_process_info.my_name), (sendreq)->t_pin[i], \
(sendreq)->t_put[i] - (sendreq)->t_pin[i]); \
for(i=0; i<(sendreq)->t_put_index; i++) \
opal_output(0, "[%d,%d,%d] src put, %llu %llu\n", \
ORTE_NAME_ARGS(orte_process_info.my_name), (sendreq)->t_put[i], \
(sendreq)->t_fin[i] - (sendreq)->t_put[i]); \
for(i=0; i<(sendreq)->t_fin_index; i++) \
opal_output(0, "[%d,%d,%d] src fin, %llu\n", \
ORTE_NAME_ARGS(orte_process_info.my_name), (sendreq)->t_fin[i]); \
}
#define MCA_PML_OB1_SEND_REQUEST_TSTAMPS_INIT(sendreq) \
{ \
sendreq->t_pin_index = 0; \
sendreq->t_put_index = 0; \
sendreq->t_fin_index = 0; \
}
#else
#define MCA_PML_OB1_SEND_REQUEST_TSTAMPS_DUMP(sendreq)
#define MCA_PML_OB1_SEND_REQUEST_TSTAMPS_INIT(sendreq)
#endif
/** /**
* Start a send request. * Start a send request.
@ -170,7 +116,6 @@ do {
break; \ break; \
} \ } \
\ \
MCA_PML_OB1_SEND_REQUEST_TSTAMPS_INIT(sendreq); \
sendreq->req_lock = 0; \ sendreq->req_lock = 0; \
sendreq->req_pipeline_depth = 0; \ sendreq->req_pipeline_depth = 0; \
sendreq->req_bytes_delivered = 0; \ sendreq->req_bytes_delivered = 0; \
@ -205,12 +150,11 @@ do {
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; \ hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; \
hdr->hdr_common.hdr_flags = 0; \ hdr->hdr_common.hdr_flags = 0; \
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH; \ hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH; \
hdr->hdr_match.hdr_contextid = sendreq->req_send.req_base.req_comm->c_contextid; \ hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; \
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; \ hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; \
hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer; \ hdr->hdr_match.hdr_dst = sendreq->req_send.req_base.req_peer; \
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; \ hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; \
hdr->hdr_match.hdr_msg_length = 0; \ hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence; \
hdr->hdr_match.hdr_msg_seq = sendreq->req_send.req_base.req_sequence; \
\ \
/* short message */ \ /* short message */ \
descriptor->des_cbfunc = mca_pml_ob1_match_completion; \ descriptor->des_cbfunc = mca_pml_ob1_match_completion; \
@ -257,7 +201,6 @@ do {
(sendreq)->req_send.req_base.req_ompi.req_status._count = \ (sendreq)->req_send.req_base.req_ompi.req_status._count = \
(sendreq)->req_send.req_bytes_packed; \ (sendreq)->req_send.req_bytes_packed; \
(sendreq)->req_send.req_base.req_ompi.req_complete = true; \ (sendreq)->req_send.req_base.req_ompi.req_complete = true; \
MCA_PML_OB1_SEND_REQUEST_TSTAMPS_DUMP(sendreq); \
if(ompi_request_waiting) { \ if(ompi_request_waiting) { \
opal_condition_broadcast(&ompi_request_cond); \ opal_condition_broadcast(&ompi_request_cond); \
} \ } \