1
1

more work on leave pinned option

This commit was SVN r6155.
Этот коммит содержится в:
Galen Shipman 2005-06-23 19:24:44 +00:00
родитель 32210ea8fa
Коммит 628d4f0915
7 изменённых файлов: 71 добавлений и 85 удалений

Просмотреть файл

@ -338,11 +338,6 @@ mca_bmi_base_descriptor_t* mca_bmi_ib_prepare_src(
0,
registration);
/* frag->ret = VAPI_deregister_mr( */
/* ib_bmi->nic, */
/* registration->hndl */
/* ); */
mca_mpool_base_remove((void*) registration->base);
ib_bmi->ib_pool->mpool_register(ib_bmi->ib_pool,
@ -359,7 +354,7 @@ mca_bmi_base_descriptor_t* mca_bmi_ib_prepare_src(
max_data,
&registration);
if(frag->base.des_flags && MCA_BMI_DES_FLAGS_LEAVE_PINNED) {
if(mca_bmi_ib_component.leave_pinned) {
rc = mca_mpool_base_insert(iov.iov_base,
iov.iov_len,
ib_bmi->ib_pool,
@ -421,6 +416,7 @@ mca_bmi_base_descriptor_t* mca_bmi_ib_prepare_dst(
frag->segment.seg_len = *size;
frag->segment.seg_addr.pval = convertor->pBaseBuf + convertor->bConverted;
if(NULL!= registration){
reg_len = (unsigned char*)registration->bound - (unsigned char*)frag->segment.seg_addr.pval + 1;
if(frag->segment.seg_len > reg_len) {
ib_bmi->ib_pool->mpool_deregister(
ib_bmi->ib_pool,
@ -428,11 +424,6 @@ mca_bmi_base_descriptor_t* mca_bmi_ib_prepare_dst(
0,
registration);
/* frag->ret = VAPI_deregister_mr( */
/* ib_bmi->nic, */
/* registration->hndl */
/* ); */
mca_mpool_base_remove((void*) registration->base);
ib_bmi->ib_pool->mpool_register(ib_bmi->ib_pool,
@ -449,7 +440,7 @@ mca_bmi_base_descriptor_t* mca_bmi_ib_prepare_dst(
*size,
&registration);
if(frag->base.des_flags && MCA_BMI_DES_FLAGS_LEAVE_PINNED) {
if(mca_bmi_ib_component.leave_pinned) {
rc = mca_mpool_base_insert(frag->segment.seg_addr.pval,
*size,
ib_bmi->ib_pool,

Просмотреть файл

@ -101,6 +101,7 @@ static inline int mca_bmi_ib_param_register_int(
int mca_bmi_ib_component_open(void)
{
int param, value;
/* initialize state */
mca_bmi_ib_component.ib_num_bmis=0;
@ -129,15 +130,13 @@ int mca_bmi_ib_component_open(void)
mca_bmi_ib_param_register_int ("exclusivity", 0);
mca_bmi_ib_module.super.bmi_eager_limit =
mca_bmi_ib_param_register_int ("eager_limit",
(64*1024
- sizeof(mca_bmi_ib_header_t)));
(64*1024)) - sizeof(mca_bmi_ib_header_t);
mca_bmi_ib_module.super.bmi_min_send_size =
mca_bmi_ib_param_register_int ("min_send_size",
(64*1024
- sizeof(mca_bmi_ib_header_t)));
(64*1024))- sizeof(mca_bmi_ib_header_t);
mca_bmi_ib_module.super.bmi_max_send_size =
mca_bmi_ib_param_register_int ("max_send_size", 128*1024 -
sizeof(mca_bmi_ib_header_t));
mca_bmi_ib_param_register_int ("max_send_size", (128*1024)) - sizeof(mca_bmi_ib_header_t);
mca_bmi_ib_module.ib_pin_min =
mca_bmi_ib_param_register_int("ib_pin_min", 128*1024);
mca_bmi_ib_module.ib_cq_size =
@ -195,10 +194,10 @@ int mca_bmi_ib_component_open(void)
mca_bmi_ib_module.super.bmi_flags =
mca_bmi_ib_param_register_int("flags",
MCA_BMI_FLAGS_RDMA);
mca_bmi_ib_component.leave_pinned =
mca_bmi_ib_param_register_int("leave_pinned",
0);
param = mca_base_param_find("mpi", NULL, "leave_pinned");
mca_base_param_lookup_int(param, &value);
mca_bmi_ib_component.leave_pinned = value;

Просмотреть файл

@ -378,41 +378,6 @@ int mca_mpool_base_free(void * base)
struct mca_mpool_base_chunk_t * mca_mpool_base_find(void * base)
{
return (mca_mpool_base_chunk_t *)
ompi_rb_tree_find(&mca_mpool_base_tree, &base);
ompi_rb_tree_find(&mca_mpool_base_tree, &base);
}
/* int mca_bmi_ib_tree_node_compare_range(void * key1, void * key2) */
/* { */
/* if(((mca_mpool_base_key_t *) key1)->bottom < */
/* ((mca_mpool_base_key_t *) key2)->bottom) */
/* { */
/* return -1; */
/* } */
/* else if((((mca_mpool_base_key_t *) key1)->bottom + ((mca_mpool_base_key_t *) key1)->length) > */
/* ((mca_mpool_base_key_t *) key2)->top) */
/* { */
/* return 1; */
/* } */
/* else */
/* { */
/* return 0; */
/* } */
/* } */
/* /\** */
/* * Searches the mpool to see if it has allocated the memory that is passed in. */
/* * If so it returns an array of mpools the memory is registered with. */
/* * */
/* * @param base pointer to the memory to lookup */
/* * */
/* * @retval NULL if the memory is not in any mpool */
/* * @retval pointer to an array of type mca_mpool_base_reg_mpool_t */
/* *\/ */
/* struct mca_mpool_base_chunk_t * mca_mpool_base_find_range(void * base) */
/* { */
/* return (mca_mpool_base_chunk_t *) */
/* ompi_rb_tree_find(&mca_mpool_base_tree, &base); */
/* } */

Просмотреть файл

@ -60,6 +60,7 @@ struct mca_pml_ob1_t {
size_t rdma_offset; /* offset at which we attempt to initiate rdma */
size_t send_pipeline_depth;
size_t recv_pipeline_depth;
bool leave_pinned;
/* lock queue access */
ompi_mutex_t lock;

Просмотреть файл

@ -77,6 +77,7 @@ static inline int mca_pml_ob1_param_register_int(
int mca_pml_ob1_component_open(void)
{
int param, value;
OBJ_CONSTRUCT(&mca_pml_ob1.lock, ompi_mutex_t);
/* requests */
@ -116,6 +117,11 @@ int mca_pml_ob1_component_open(void)
mca_pml_ob1_param_register_int("recv_pipeline_depth", 3);
mca_pml_ob1.rdma_offset =
mca_pml_ob1_param_register_int("rdma_offset", 1024*1024);
mca_base_param_register_int("mpi", NULL, "leave_pinned", "leave_pinned", 0);
param = mca_base_param_find("mpi", NULL, "leave_pinned");
mca_base_param_lookup_int(param, &value);
mca_pml_ob1.leave_pinned = value;
return mca_bmi_base_open();
}

Просмотреть файл

@ -164,7 +164,7 @@ static void mca_pml_ob1_recv_request_ack(
* - size is larger than the rdma threshold
* - rdma devices are available
*/
if(recvreq->req_mpool == NULL) {
if(NULL == recvreq->req_mpool && !mca_pml_ob1.leave_pinned) {
if(recvreq->req_recv.req_bytes_packed > mca_pml_ob1.rdma_offset &&
mca_pml_ob1_ep_array_get_size(&proc->bmi_rdma) &&
ompi_convertor_need_buffers(&recvreq->req_recv.req_convertor) == 0) {
@ -180,7 +180,11 @@ static void mca_pml_ob1_recv_request_ack(
ack->hdr_rdma_offset = recvreq->req_recv.req_bytes_packed;
}
}
else{
recvreq->req_rdma_offset = hdr->hdr_frag_length;
ack->hdr_rdma_offset = hdr->hdr_frag_length;
}
/* initialize descriptor */
des->des_flags |= MCA_BMI_DES_FLAGS_PRIORITY;
des->des_cbfunc = mca_pml_ob1_send_ctl_complete;
@ -345,7 +349,7 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
* registed with. Otherwise, schedule round-robin across the
* available RDMA nics.
*/
if(recvreq->req_mpool == NULL) {
if(recvreq->req_mpool == NULL && !mca_pml_ob1.leave_pinned) {
ep = mca_pml_ob1_ep_array_get_next(&proc->bmi_rdma);
/* if there is only one bmi available or the size is less than
@ -384,23 +388,34 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq)
recvreq->pin2[recvreq->pin_index] = get_profiler_timestamp();
#endif
} else {
/* find the endpoint corresponding to this bmi and schedule the entire message */
ep = mca_pml_ob1_ep_array_find(&proc->bmi_rdma, (mca_bmi_base_module_t*) recvreq->req_mpool->user_data);
struct mca_mpool_base_registration_t * reg;
size = bytes_remaining;
/* prepare a descriptor for RDMA */
ompi_convertor_set_position(&recvreq->req_recv.req_convertor, &recvreq->req_rdma_offset);
if(NULL != recvreq->req_mpool){
/* find the endpoint corresponding to this bmi and schedule the entire message */
ep = mca_pml_ob1_ep_array_find(&proc->bmi_rdma, (mca_bmi_base_module_t*) recvreq->req_mpool->user_data);
reg = recvreq->req_mpool->mpool_registration;
}
else{
ep = mca_pml_ob1_ep_array_get_next(&proc->bmi_rdma);
reg = NULL;
}
#if MCA_PML_OB1_TIMESTAMPS
recvreq->pin1[recvreq->pin_index] = get_profiler_timestamp();
#endif
dst = ep->bmi_prepare_dst(
ep->bmi,
ep->bmi_endpoint,
recvreq->req_mpool->mpool_registration,
&recvreq->req_recv.req_convertor,
0,
&size);
ep->bmi,
ep->bmi_endpoint,
reg,
&recvreq->req_recv.req_convertor,
0,
&size);
#if MCA_PML_OB1_TIMESTAMPS
recvreq->pin2[recvreq->pin_index] = get_profiler_timestamp();
#endif

Просмотреть файл

@ -297,22 +297,31 @@ int mca_pml_ob1_send_request_start(
/* check to see if memory is registered */
sendreq->req_chunk = mca_mpool_base_find(sendreq->req_send.req_addr);
/* pack the data into the supplied buffer */
iov.iov_base = (void*)((unsigned char*)segment->seg_addr.pval +
sizeof(mca_pml_ob1_rendezvous_hdr_t));
iov.iov_len = size;
iov_count = 1;
max_data = size;
if((rc = ompi_convertor_pack(
&sendreq->req_send.req_convertor,
&iov,
&iov_count,
&max_data,
&free_after)) < 0) {
endpoint->bmi_free(endpoint->bmi, descriptor);
return rc;
}
/* if the buffer is not pinned and leave pinned is false we eagerly send
data to cover the cost of pinning the recv buffers on the peer */
if(NULL == sendreq->req_chunk && !mca_pml_ob1.leave_pinned){
/* pack the data into the supplied buffer */
iov.iov_base = (void*)((unsigned char*)segment->seg_addr.pval +
sizeof(mca_pml_ob1_rendezvous_hdr_t));
iov.iov_len = size;
iov_count = 1;
max_data = size;
if((rc = ompi_convertor_pack(
&sendreq->req_send.req_convertor,
&iov,
&iov_count,
&max_data,
&free_after)) < 0) {
endpoint->bmi_free(endpoint->bmi, descriptor);
return rc;
}
}
/* if the buffer is pinned or leave pinned is true we do not eagerly send
any data */
else {
max_data = 0;
}
/* build hdr */
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
hdr->hdr_common.hdr_flags = (sendreq->req_chunk != NULL ? MCA_PML_OB1_HDR_FLAGS_PIN : 0);