1
1

Fix a bug on 32-bit systems introduced by r26626. This fix ensures that all supported btls (with exception of wv-- shiqing will need to help bring that one up to date with r26626) set the lval in prepare_src/dst when preparing a put or get segment. This fix also ensures a consistent use of lval in put and get for both local and remote segments.

This commit was SVN r26793.

The following SVN revision numbers were found above:
  r26626 --> open-mpi/ompi@249066e06d
Этот коммит содержится в:
Nathan Hjelm 2012-07-13 21:19:16 +00:00
родитель e1ced2320a
Коммит 4d1920ee87
10 изменённых файлов: 66 добавлений и 44 удалений

Просмотреть файл

@ -1041,6 +1041,8 @@ int mca_btl_openib_free(
to_com_frag(des)->sg_entry.addr =
(uint64_t)(uintptr_t)to_send_frag(des)->hdr;
to_send_frag(des)->coalesced_length = 0;
to_base_frag(des)->segment.base.seg_addr.pval =
to_send_frag(des)->hdr + 1;
assert(!opal_list_get_size(&to_send_frag(des)->coalesced_frags));
/* fall throug */
case MCA_BTL_OPENIB_FRAG_SEND_USER:
@ -1094,6 +1096,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
struct iovec iov;
uint32_t iov_count = 1;
size_t max_data = *size;
void *ptr;
int rc;
openib_btl = (mca_btl_openib_module_t*)btl;
@ -1133,7 +1136,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
to_base_frag(frag)->base.order = order;
to_base_frag(frag)->base.des_flags = flags;
to_base_frag(frag)->segment.base.seg_len = max_data;
to_base_frag(frag)->segment.base.seg_addr.pval = iov.iov_base;
to_base_frag(frag)->segment.base.seg_addr.lval = (uint64_t)(uintptr_t) iov.iov_base;
to_base_frag(frag)->segment.key = frag->sg_entry.lkey;
assert(MCA_BTL_NO_ORDER == order);
@ -1151,17 +1154,28 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
max_data = btl->btl_max_send_size - reserve;
}
frag = (mca_btl_openib_com_frag_t*)(reserve ?
mca_btl_openib_alloc(btl, endpoint, order, max_data + reserve,
flags) :
ib_frag_alloc(openib_btl, max_data, order, flags));
if (OPAL_UNLIKELY(0 == reserve)) {
frag = (mca_btl_openib_com_frag_t *) ib_frag_alloc(openib_btl, max_data, order, flags);
if(NULL == frag)
return NULL;
/* NTH: this frag will be ue used for either a get or put so we need to set the lval to be
consistent with the usage in get and put. the pval will be restored in mca_btl_openib_free */
ptr = to_base_frag(frag)->segment.base.seg_addr.pval;
to_base_frag(frag)->segment.base.seg_addr.lval =
(uint64_t)(uintptr_t) ptr;
} else {
frag =
(mca_btl_openib_com_frag_t *) mca_btl_openib_alloc(btl, endpoint, order,
max_data + reserve, flags);
if(NULL == frag)
return NULL;
ptr = to_base_frag(frag)->segment.base.seg_addr.pval;
}
iov.iov_len = max_data;
iov.iov_base = (IOVBASE_TYPE *) ( (unsigned char*)to_base_frag(frag)->segment.base.seg_addr.pval +
reserve );
iov.iov_base = (IOVBASE_TYPE *) ( (unsigned char*) ptr + reserve );
rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data);
*size = max_data;
@ -1250,7 +1264,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
frag->sg_entry.lkey = openib_reg->mr->lkey;
frag->sg_entry.addr = (uint64_t)(uintptr_t)buffer;
to_base_frag(frag)->segment.base.seg_addr.pval = buffer;
to_base_frag(frag)->segment.base.seg_addr.lval = (uint64_t)(uintptr_t) buffer;
to_base_frag(frag)->segment.base.seg_len = *size;
to_base_frag(frag)->segment.key = openib_reg->mr->rkey;
to_base_frag(frag)->base.order = order;
@ -1649,8 +1663,7 @@ int mca_btl_openib_put( mca_btl_base_module_t* btl,
frag->sr_desc.wr.rdma.remote_addr = rem_addr;
frag->sr_desc.wr.rdma.rkey = rkey;
to_com_frag(frag)->sg_entry.addr =
(uint64_t)(uintptr_t)src_seg->base.seg_addr.pval;
to_com_frag(frag)->sg_entry.addr = src_seg->base.seg_addr.lval;
to_com_frag(frag)->sg_entry.length = src_seg->base.seg_len;
to_com_frag(frag)->endpoint = ep;
#if HAVE_XRC
@ -1732,8 +1745,7 @@ int mca_btl_openib_get(mca_btl_base_module_t* btl,
frag->sr_desc.wr.rdma.remote_addr = rem_addr;
frag->sr_desc.wr.rdma.rkey = rkey;
to_com_frag(frag)->sg_entry.addr =
(uint64_t)(uintptr_t)dst_seg->base.seg_addr.pval;
to_com_frag(frag)->sg_entry.addr = dst_seg->base.seg_addr.lval;
to_com_frag(frag)->sg_entry.length = dst_seg->base.seg_len;
to_com_frag(frag)->endpoint = ep;

Просмотреть файл

@ -1884,7 +1884,7 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
"eager RDMA and progress threads", true);
}
mpool_resources.pool_name = "verbs";
asprintf (&mpool_resources.pool_name, "verbs.%" PRIu64, device->ib_dev_attr.node_guid);
mpool_resources.reg_data = (void*)device;
mpool_resources.sizeof_reg = sizeof(mca_btl_openib_reg_t);
mpool_resources.register_mem = openib_reg_mr;

Просмотреть файл

@ -229,7 +229,7 @@ mca_btl_self_prepare_src( struct mca_btl_base_module_t* btl,
MCA_BTL_SELF_FRAG_RETURN_RDMA(frag);
return NULL;
}
frag->segment.seg_addr.pval = iov.iov_base;
frag->segment.seg_addr.lval = (uint64_t)(uintptr_t) iov.iov_base;
frag->segment.seg_len = max_data;
*size = max_data;
}
@ -255,6 +255,7 @@ mca_btl_self_prepare_dst( struct mca_btl_base_module_t* btl,
{
mca_btl_self_frag_t* frag;
size_t max_data = *size;
void *ptr;
int rc;
MCA_BTL_SELF_FRAG_ALLOC_RDMA(frag, rc);
@ -263,7 +264,9 @@ mca_btl_self_prepare_dst( struct mca_btl_base_module_t* btl,
}
/* setup descriptor to point directly to user buffer */
opal_convertor_get_current_pointer( convertor, (void**)&(frag->segment.seg_addr.pval) );
opal_convertor_get_current_pointer( convertor, &ptr );
frag->segment.seg_addr.lval = (uint64_t)(uintptr_t) ptr;
frag->segment.seg_len = reserve + max_data;
frag->base.des_dst = &frag->segment;
frag->base.des_dst_cnt = 1;
@ -322,9 +325,9 @@ int mca_btl_self_rdma( struct mca_btl_base_module_t* btl,
mca_btl_base_segment_t* dst = des->des_dst;
size_t src_cnt = des->des_src_cnt;
size_t dst_cnt = des->des_dst_cnt;
unsigned char* src_addr = (unsigned char*)src->seg_addr.pval;
unsigned char* src_addr = (unsigned char *)(uintptr_t) src->seg_addr.lval;
size_t src_len = src->seg_len;
unsigned char* dst_addr = (unsigned char*)dst->seg_addr.pval;
unsigned char* dst_addr = (unsigned char *)(uintptr_t) dst->seg_addr.lval;
size_t dst_len = dst->seg_len;
int btl_ownership = (des->des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);

Просмотреть файл

@ -720,7 +720,7 @@ struct mca_btl_base_descriptor_t* mca_btl_sm_prepare_src(
MCA_BTL_SM_FRAG_RETURN(frag);
return NULL;
}
frag->segment.base.seg_addr.pval = iov.iov_base;
frag->segment.base.seg_addr.lval = (uint64_t)(uintptr_t) iov.iov_base;
frag->segment.base.seg_len = max_data;
#if OMPI_BTL_SM_HAVE_KNEM
@ -930,6 +930,7 @@ struct mca_btl_base_descriptor_t* mca_btl_sm_prepare_dst(
uint32_t flags)
{
int rc;
void *ptr;
mca_btl_sm_frag_t* frag;
MCA_BTL_SM_FRAG_ALLOC_USER(frag, rc);
@ -938,7 +939,8 @@ struct mca_btl_base_descriptor_t* mca_btl_sm_prepare_dst(
}
frag->segment.base.seg_len = *size;
opal_convertor_get_current_pointer( convertor, (void**)&(frag->segment.base.seg_addr.pval) );
opal_convertor_get_current_pointer( convertor, &ptr );
frag->segment.base.seg_addr.lval = (uint64_t)(uintptr_t) ptr;
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
@ -971,7 +973,7 @@ int mca_btl_sm_get_sync(struct mca_btl_base_module_t* btl,
/* Fill in the ioctl data fields. There's no async completion, so
we don't need to worry about getting a slot, etc. */
recv_iovec.base = (uintptr_t) dst->base.seg_addr.pval;
recv_iovec.base = (uintptr_t) dst->base.seg_addr.lval;
recv_iovec.len = dst->base.seg_len;
icopy.local_iovec_array = (uintptr_t)&recv_iovec;
icopy.local_iovec_nr = 1;
@ -1008,17 +1010,17 @@ int mca_btl_sm_get_sync(struct mca_btl_base_module_t* btl,
pid_t remote_pid;
int val;
remote_address = (char *) src->base.seg_addr.pval;
remote_address = (char *)(uintptr_t) src->base.seg_addr.lval;
remote_length = src->base.seg_len;
local_address = (char *) dst->base.seg_addr.pval;
local_address = (char *)(uintptr_t) dst->base.seg_addr.lval;
local_length = dst->base.seg_len;
remote_pid = src->key;
remote.iov_base = src->base.seg_addr.pval;
remote.iov_len = src->base.seg_len;
local.iov_base = dst->base.seg_addr.pval;
local.iov_len = dst->base.seg_len;
remote.iov_base = remote_address;
remote.iov_len = remote_length;
local.iov_base = local_address;
local.iov_len = local_length;
val = process_vm_readv(remote_pid, &local, 1, &remote, 1, 0);
@ -1083,7 +1085,7 @@ int mca_btl_sm_get_async(struct mca_btl_base_module_t* btl,
/* We have a slot, so fill in the data fields. Bump the
first_avail and num_used counters. */
recv_iovec.base = (uintptr_t) dst->base.seg_addr.pval;
recv_iovec.base = (uintptr_t) dst->base.seg_addr.lval;
recv_iovec.len = dst->base.seg_len;
icopy.local_iovec_array = (uintptr_t)&recv_iovec;
icopy.local_iovec_nr = 1;

Просмотреть файл

@ -746,7 +746,7 @@ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_src(
MCA_BTL_SMCUDA_FRAG_RETURN(frag);
return NULL;
}
frag->segment.base.seg_addr.pval = iov.iov_base;
frag->segment.base.seg_addr.lval = (uint64_t)(uintptr_t) iov.iov_base;
frag->segment.base.seg_len = max_data;
memcpy(frag->segment.key, ((mca_mpool_common_cuda_reg_t *)registration)->memHandle,
sizeof(((mca_mpool_common_cuda_reg_t *)registration)->memHandle) +
@ -938,6 +938,7 @@ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_dst(
uint32_t flags)
{
int rc;
void *ptr;
mca_btl_smcuda_frag_t* frag;
/* Only support GPU buffers */
@ -951,7 +952,8 @@ struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_dst(
}
frag->segment.base.seg_len = *size;
opal_convertor_get_current_pointer( convertor, (void**)&(frag->segment.base.seg_addr.pval) );
opal_convertor_get_current_pointer( convertor, &ptr );
frag->segment.base.seg_addr.lval = (uint64_t)(uintptr_t) ptr;
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
@ -1021,9 +1023,10 @@ int mca_btl_smcuda_get_cuda(struct mca_btl_base_module_t* btl,
* rget_reg, not reg_ptr, as we do not cache the event. */
mca_common_wait_stream_synchronize(&rget_reg);
rc = mca_common_cuda_memcpy(dst_seg->base.seg_addr.pval, remote_memory_address,
dst_seg->base.seg_len, "mca_btl_smcuda_get",
(mca_btl_base_descriptor_t *)frag, &done);
rc = mca_common_cuda_memcpy((void *)(uintptr_t) dst_seg->base.seg_addr.lval,
remote_memory_address, dst_seg->base.seg_len,
"mca_btl_smcuda_get", (mca_btl_base_descriptor_t *)frag,
&done);
if (OMPI_SUCCESS != rc) {
/* Out of resources can be handled by upper layers. */
if (OMPI_ERR_OUT_OF_RESOURCE != rc) {

Просмотреть файл

@ -300,7 +300,7 @@ mca_btl_ugni_prepare_dst (mca_btl_base_module_t *btl,
frag->segments[0].memory_handle = ((mca_btl_ugni_reg_t *)registration)->memory_hdl;
frag->segments[0].base.seg_len = *size;
frag->segments[0].base.seg_addr.pval = data_ptr;
frag->segments[0].base.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
frag->base.des_dst = &frag->segments->base;
frag->base.des_dst_cnt = 1;

Просмотреть файл

@ -216,7 +216,7 @@ mca_btl_ugni_prepare_src_rdma (struct mca_btl_base_module_t *btl,
frag->segments[0].extra_byte_count = 0;
}
frag->segments[0].base.seg_addr.pval = data_ptr;
frag->segments[0].base.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
frag->segments[0].base.seg_len = *size;
frag->base.des_src = &frag->segments->base;

Просмотреть файл

@ -33,15 +33,16 @@ int mca_btl_vader_get (struct mca_btl_base_module_t *btl,
mca_mpool_base_registration_t *reg;
void *rem_ptr;
reg = vader_get_registation (endpoint->peer_smp_rank, src->seg_addr.pval,
reg = vader_get_registation (endpoint->peer_smp_rank,
(void *)(uintptr_t) src->seg_addr.lval,
src->seg_len, 0);
if (OPAL_UNLIKELY(NULL == reg)) {
return OMPI_ERROR;
}
rem_ptr = vader_reg_to_ptr (reg, src->seg_addr.pval);
rem_ptr = vader_reg_to_ptr (reg, (void *)(uintptr_t) src->seg_addr.lval);
vader_memmove (dst->seg_addr.pval, rem_ptr, size);
vader_memmove ((void *)(uintptr_t) dst->seg_addr.lval, rem_ptr, size);
vader_return_registration (reg, endpoint->peer_smp_rank);

Просмотреть файл

@ -570,7 +570,7 @@ struct mca_btl_base_descriptor_t *vader_prepare_dst(struct mca_btl_base_module_t
opal_convertor_get_current_pointer (convertor, (void **) &data_ptr);
frag->segment.seg_addr.pval = data_ptr;
frag->segment.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
frag->segment.seg_len = *size;
frag->base.des_dst = &frag->segment;
@ -665,7 +665,7 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
return NULL;
}
frag->segment.seg_addr.pval = data_ptr;
frag->segment.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
frag->segment.seg_len = reserve + *size;
}

Просмотреть файл

@ -33,15 +33,16 @@ int mca_btl_vader_put (struct mca_btl_base_module_t *btl,
mca_mpool_base_registration_t *reg;
void *rem_ptr;
reg = vader_get_registation (endpoint->peer_smp_rank, dst->seg_addr.pval,
reg = vader_get_registation (endpoint->peer_smp_rank,
(void *)(uintptr_t) dst->seg_addr.lval,
dst->seg_len, 0);
if (OPAL_UNLIKELY(NULL == reg)) {
return OMPI_ERROR;
}
rem_ptr = vader_reg_to_ptr (reg, dst->seg_addr.pval);
rem_ptr = vader_reg_to_ptr (reg, (void *)(uintptr_t) dst->seg_addr.lval);
vader_memmove (rem_ptr, src->seg_addr.pval, size);
vader_memmove (rem_ptr, (void *)(uintptr_t) src->seg_addr.lval, size);
vader_return_registration (reg, endpoint->peer_smp_rank);