more work on leave pinned option
This commit was SVN r6155.
Этот коммит содержится в:
родитель
32210ea8fa
Коммит
628d4f0915
@ -338,11 +338,6 @@ mca_bmi_base_descriptor_t* mca_bmi_ib_prepare_src(
|
||||
0,
|
||||
registration);
|
||||
|
||||
/* frag->ret = VAPI_deregister_mr( */
|
||||
/* ib_bmi->nic, */
|
||||
/* registration->hndl */
|
||||
/* ); */
|
||||
|
||||
mca_mpool_base_remove((void*) registration->base);
|
||||
|
||||
ib_bmi->ib_pool->mpool_register(ib_bmi->ib_pool,
|
||||
@ -359,7 +354,7 @@ mca_bmi_base_descriptor_t* mca_bmi_ib_prepare_src(
|
||||
max_data,
|
||||
®istration);
|
||||
|
||||
if(frag->base.des_flags && MCA_BMI_DES_FLAGS_LEAVE_PINNED) {
|
||||
if(mca_bmi_ib_component.leave_pinned) {
|
||||
rc = mca_mpool_base_insert(iov.iov_base,
|
||||
iov.iov_len,
|
||||
ib_bmi->ib_pool,
|
||||
@ -421,6 +416,7 @@ mca_bmi_base_descriptor_t* mca_bmi_ib_prepare_dst(
|
||||
frag->segment.seg_len = *size;
|
||||
frag->segment.seg_addr.pval = convertor->pBaseBuf + convertor->bConverted;
|
||||
if(NULL!= registration){
|
||||
reg_len = (unsigned char*)registration->bound - (unsigned char*)frag->segment.seg_addr.pval + 1;
|
||||
if(frag->segment.seg_len > reg_len) {
|
||||
ib_bmi->ib_pool->mpool_deregister(
|
||||
ib_bmi->ib_pool,
|
||||
@ -428,11 +424,6 @@ mca_bmi_base_descriptor_t* mca_bmi_ib_prepare_dst(
|
||||
0,
|
||||
registration);
|
||||
|
||||
/* frag->ret = VAPI_deregister_mr( */
|
||||
/* ib_bmi->nic, */
|
||||
/* registration->hndl */
|
||||
/* ); */
|
||||
|
||||
mca_mpool_base_remove((void*) registration->base);
|
||||
|
||||
ib_bmi->ib_pool->mpool_register(ib_bmi->ib_pool,
|
||||
@ -449,7 +440,7 @@ mca_bmi_base_descriptor_t* mca_bmi_ib_prepare_dst(
|
||||
*size,
|
||||
®istration);
|
||||
|
||||
if(frag->base.des_flags && MCA_BMI_DES_FLAGS_LEAVE_PINNED) {
|
||||
if(mca_bmi_ib_component.leave_pinned) {
|
||||
rc = mca_mpool_base_insert(frag->segment.seg_addr.pval,
|
||||
*size,
|
||||
ib_bmi->ib_pool,
|
||||
|
@ -101,6 +101,7 @@ static inline int mca_bmi_ib_param_register_int(
|
||||
int mca_bmi_ib_component_open(void)
|
||||
{
|
||||
|
||||
int param, value;
|
||||
|
||||
/* initialize state */
|
||||
mca_bmi_ib_component.ib_num_bmis=0;
|
||||
@ -129,15 +130,13 @@ int mca_bmi_ib_component_open(void)
|
||||
mca_bmi_ib_param_register_int ("exclusivity", 0);
|
||||
mca_bmi_ib_module.super.bmi_eager_limit =
|
||||
mca_bmi_ib_param_register_int ("eager_limit",
|
||||
(64*1024
|
||||
- sizeof(mca_bmi_ib_header_t)));
|
||||
(64*1024)) - sizeof(mca_bmi_ib_header_t);
|
||||
mca_bmi_ib_module.super.bmi_min_send_size =
|
||||
mca_bmi_ib_param_register_int ("min_send_size",
|
||||
(64*1024
|
||||
- sizeof(mca_bmi_ib_header_t)));
|
||||
(64*1024))- sizeof(mca_bmi_ib_header_t);
|
||||
mca_bmi_ib_module.super.bmi_max_send_size =
|
||||
mca_bmi_ib_param_register_int ("max_send_size", 128*1024 -
|
||||
sizeof(mca_bmi_ib_header_t));
|
||||
mca_bmi_ib_param_register_int ("max_send_size", (128*1024)) - sizeof(mca_bmi_ib_header_t);
|
||||
|
||||
mca_bmi_ib_module.ib_pin_min =
|
||||
mca_bmi_ib_param_register_int("ib_pin_min", 128*1024);
|
||||
mca_bmi_ib_module.ib_cq_size =
|
||||
@ -195,10 +194,10 @@ int mca_bmi_ib_component_open(void)
|
||||
mca_bmi_ib_module.super.bmi_flags =
|
||||
mca_bmi_ib_param_register_int("flags",
|
||||
MCA_BMI_FLAGS_RDMA);
|
||||
|
||||
mca_bmi_ib_component.leave_pinned =
|
||||
mca_bmi_ib_param_register_int("leave_pinned",
|
||||
0);
|
||||
|
||||
param = mca_base_param_find("mpi", NULL, "leave_pinned");
|
||||
mca_base_param_lookup_int(param, &value);
|
||||
mca_bmi_ib_component.leave_pinned = value;
|
||||
|
||||
|
||||
|
||||
|
@ -378,41 +378,6 @@ int mca_mpool_base_free(void * base)
|
||||
struct mca_mpool_base_chunk_t * mca_mpool_base_find(void * base)
|
||||
{
|
||||
return (mca_mpool_base_chunk_t *)
|
||||
ompi_rb_tree_find(&mca_mpool_base_tree, &base);
|
||||
ompi_rb_tree_find(&mca_mpool_base_tree, &base);
|
||||
}
|
||||
|
||||
|
||||
/* int mca_bmi_ib_tree_node_compare_range(void * key1, void * key2) */
|
||||
/* { */
|
||||
/* if(((mca_mpool_base_key_t *) key1)->bottom < */
|
||||
/* ((mca_mpool_base_key_t *) key2)->bottom) */
|
||||
/* { */
|
||||
/* return -1; */
|
||||
/* } */
|
||||
/* else if((((mca_mpool_base_key_t *) key1)->bottom + ((mca_mpool_base_key_t *) key1)->length) > */
|
||||
/* ((mca_mpool_base_key_t *) key2)->top) */
|
||||
/* { */
|
||||
/* return 1; */
|
||||
/* } */
|
||||
/* else */
|
||||
/* { */
|
||||
/* return 0; */
|
||||
/* } */
|
||||
/* } */
|
||||
|
||||
|
||||
/* /\** */
|
||||
/* * Searches the mpool to see if it has allocated the memory that is passed in. */
|
||||
/* * If so it returns an array of mpools the memory is registered with. */
|
||||
/* * */
|
||||
/* * @param base pointer to the memory to lookup */
|
||||
/* * */
|
||||
/* * @retval NULL if the memory is not in any mpool */
|
||||
/* * @retval pointer to an array of type mca_mpool_base_reg_mpool_t */
|
||||
/* *\/ */
|
||||
/* struct mca_mpool_base_chunk_t * mca_mpool_base_find_range(void * base) */
|
||||
/* { */
|
||||
/* return (mca_mpool_base_chunk_t *) */
|
||||
/* ompi_rb_tree_find(&mca_mpool_base_tree, &base); */
|
||||
/* } */
|
||||
|
||||
|
@ -60,6 +60,7 @@ struct mca_pml_ob1_t {
|
||||
size_t rdma_offset; /* offset at which we attempt to initiate rdma */
|
||||
size_t send_pipeline_depth;
|
||||
size_t recv_pipeline_depth;
|
||||
bool leave_pinned;
|
||||
|
||||
/* lock queue access */
|
||||
ompi_mutex_t lock;
|
||||
|
@ -77,6 +77,7 @@ static inline int mca_pml_ob1_param_register_int(
|
||||
|
||||
int mca_pml_ob1_component_open(void)
|
||||
{
|
||||
int param, value;
|
||||
OBJ_CONSTRUCT(&mca_pml_ob1.lock, ompi_mutex_t);
|
||||
|
||||
/* requests */
|
||||
@ -116,6 +117,11 @@ int mca_pml_ob1_component_open(void)
|
||||
mca_pml_ob1_param_register_int("recv_pipeline_depth", 3);
|
||||
mca_pml_ob1.rdma_offset =
|
||||
mca_pml_ob1_param_register_int("rdma_offset", 1024*1024);
|
||||
|
||||
mca_base_param_register_int("mpi", NULL, "leave_pinned", "leave_pinned", 0);
|
||||
param = mca_base_param_find("mpi", NULL, "leave_pinned");
|
||||
mca_base_param_lookup_int(param, &value);
|
||||
mca_pml_ob1.leave_pinned = value;
|
||||
|
||||
return mca_bmi_base_open();
|
||||
}
|
||||
|
@ -164,7 +164,7 @@ static void mca_pml_ob1_recv_request_ack(
|
||||
* - size is larger than the rdma threshold
|
||||
* - rdma devices are available
|
||||
*/
|
||||
if(recvreq->req_mpool == NULL) {
|
||||
if(NULL == recvreq->req_mpool && !mca_pml_ob1.leave_pinned) {
|
||||
if(recvreq->req_recv.req_bytes_packed > mca_pml_ob1.rdma_offset &&
|
||||
mca_pml_ob1_ep_array_get_size(&proc->bmi_rdma) &&
|
||||
ompi_convertor_need_buffers(&recvreq->req_recv.req_convertor) == 0) {
|
||||
@ -180,7 +180,11 @@ static void mca_pml_ob1_recv_request_ack(
|
||||
ack->hdr_rdma_offset = recvreq->req_recv.req_bytes_packed;
|
||||
}
|
||||
}
|
||||
|
||||
else{
|
||||
recvreq->req_rdma_offset = hdr->hdr_frag_length;
|
||||
ack->hdr_rdma_offset = hdr->hdr_frag_length;
|
||||
}
|
||||
|
||||
/* initialize descriptor */
|
||||
des->des_flags |= MCA_BMI_DES_FLAGS_PRIORITY;
|
||||
des->des_cbfunc = mca_pml_ob1_send_ctl_complete;
|
||||
@ -345,7 +349,7 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
|
||||
* registed with. Otherwise, schedule round-robin across the
|
||||
* available RDMA nics.
|
||||
*/
|
||||
if(recvreq->req_mpool == NULL) {
|
||||
if(recvreq->req_mpool == NULL && !mca_pml_ob1.leave_pinned) {
|
||||
ep = mca_pml_ob1_ep_array_get_next(&proc->bmi_rdma);
|
||||
|
||||
/* if there is only one bmi available or the size is less than
|
||||
@ -384,23 +388,34 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
|
||||
recvreq->pin2[recvreq->pin_index] = get_profiler_timestamp();
|
||||
#endif
|
||||
} else {
|
||||
|
||||
/* find the endpoint corresponding to this bmi and schedule the entire message */
|
||||
ep = mca_pml_ob1_ep_array_find(&proc->bmi_rdma, (mca_bmi_base_module_t*) recvreq->req_mpool->user_data);
|
||||
struct mca_mpool_base_registration_t * reg;
|
||||
size = bytes_remaining;
|
||||
|
||||
/* prepare a descriptor for RDMA */
|
||||
ompi_convertor_set_position(&recvreq->req_recv.req_convertor, &recvreq->req_rdma_offset);
|
||||
|
||||
if(NULL != recvreq->req_mpool){
|
||||
/* find the endpoint corresponding to this bmi and schedule the entire message */
|
||||
ep = mca_pml_ob1_ep_array_find(&proc->bmi_rdma, (mca_bmi_base_module_t*) recvreq->req_mpool->user_data);
|
||||
reg = recvreq->req_mpool->mpool_registration;
|
||||
|
||||
|
||||
}
|
||||
else{
|
||||
ep = mca_pml_ob1_ep_array_get_next(&proc->bmi_rdma);
|
||||
reg = NULL;
|
||||
}
|
||||
|
||||
#if MCA_PML_OB1_TIMESTAMPS
|
||||
recvreq->pin1[recvreq->pin_index] = get_profiler_timestamp();
|
||||
#endif
|
||||
dst = ep->bmi_prepare_dst(
|
||||
ep->bmi,
|
||||
ep->bmi_endpoint,
|
||||
recvreq->req_mpool->mpool_registration,
|
||||
&recvreq->req_recv.req_convertor,
|
||||
0,
|
||||
&size);
|
||||
ep->bmi,
|
||||
ep->bmi_endpoint,
|
||||
reg,
|
||||
&recvreq->req_recv.req_convertor,
|
||||
0,
|
||||
&size);
|
||||
#if MCA_PML_OB1_TIMESTAMPS
|
||||
recvreq->pin2[recvreq->pin_index] = get_profiler_timestamp();
|
||||
#endif
|
||||
|
@ -297,22 +297,31 @@ int mca_pml_ob1_send_request_start(
|
||||
/* check to see if memory is registered */
|
||||
sendreq->req_chunk = mca_mpool_base_find(sendreq->req_send.req_addr);
|
||||
|
||||
/* pack the data into the supplied buffer */
|
||||
iov.iov_base = (void*)((unsigned char*)segment->seg_addr.pval +
|
||||
sizeof(mca_pml_ob1_rendezvous_hdr_t));
|
||||
iov.iov_len = size;
|
||||
iov_count = 1;
|
||||
max_data = size;
|
||||
if((rc = ompi_convertor_pack(
|
||||
&sendreq->req_send.req_convertor,
|
||||
&iov,
|
||||
&iov_count,
|
||||
&max_data,
|
||||
&free_after)) < 0) {
|
||||
endpoint->bmi_free(endpoint->bmi, descriptor);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* if the buffer is not pinned and leave pinned is false we eagerly send
|
||||
data to cover the cost of pinning the recv buffers on the peer */
|
||||
if(NULL == sendreq->req_chunk && !mca_pml_ob1.leave_pinned){
|
||||
/* pack the data into the supplied buffer */
|
||||
iov.iov_base = (void*)((unsigned char*)segment->seg_addr.pval +
|
||||
sizeof(mca_pml_ob1_rendezvous_hdr_t));
|
||||
iov.iov_len = size;
|
||||
iov_count = 1;
|
||||
max_data = size;
|
||||
if((rc = ompi_convertor_pack(
|
||||
&sendreq->req_send.req_convertor,
|
||||
&iov,
|
||||
&iov_count,
|
||||
&max_data,
|
||||
&free_after)) < 0) {
|
||||
endpoint->bmi_free(endpoint->bmi, descriptor);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
/* if the buffer is pinned or leave pinned is true we do not eagerly send
|
||||
any data */
|
||||
else {
|
||||
max_data = 0;
|
||||
}
|
||||
/* build hdr */
|
||||
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
|
||||
hdr->hdr_common.hdr_flags = (sendreq->req_chunk != NULL ? MCA_PML_OB1_HDR_FLAGS_PIN : 0);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user