1
1

btl/vader: fix bug in single copy code that could cause ob1 sends to not get marked complete.

cmr:v1.7

This commit was SVN r27671.
Этот коммит содержится в:
Nathan Hjelm 2012-12-13 23:18:53 +00:00
родитель f779b1ded9
Коммит ba5b2b0540
6 изменённых файлов: 46 добавлений и 46 удалений

Просмотреть файл

@ -294,15 +294,14 @@ static int mca_btl_vader_component_progress (void)
mca_btl_active_message_callback_t *reg; mca_btl_active_message_callback_t *reg;
mca_btl_vader_frag_t frag; mca_btl_vader_frag_t frag;
mca_btl_vader_hdr_t *hdr; mca_btl_vader_hdr_t *hdr;
mca_btl_base_segment_t segments[2];
mca_mpool_base_registration_t *xpmem_reg = NULL; mca_mpool_base_registration_t *xpmem_reg = NULL;
/* check for messages in fast boxes */
mca_btl_vader_check_fboxes ();
/* check active sends for completion */ /* check active sends for completion */
mca_btl_vader_progress_sends (); mca_btl_vader_progress_sends ();
/* check for messages in fast boxes */
mca_btl_vader_check_fboxes ();
/* poll the fifo once */ /* poll the fifo once */
hdr = vader_fifo_read (fifo); hdr = vader_fifo_read (fifo);
if (NULL == hdr) { if (NULL == hdr) {
@ -310,19 +309,16 @@ static int mca_btl_vader_component_progress (void)
} }
reg = mca_btl_base_active_message_trigger + hdr->tag; reg = mca_btl_base_active_message_trigger + hdr->tag;
frag.base.des_dst = segments; frag.base.des_dst = frag.segments;
frag.segments[0].seg_addr.pval = (void *) (hdr + 1);
segments[0].seg_addr.pval = (void *) (hdr + 1); frag.segments[0].seg_len = hdr->len;
segments[0].seg_len = hdr->len;
if (OPAL_UNLIKELY(hdr->flags & MCA_BTL_VADER_FLAG_SINGLE_COPY)) { if (OPAL_UNLIKELY(hdr->flags & MCA_BTL_VADER_FLAG_SINGLE_COPY)) {
struct iovec *rem_mem = (struct iovec *) ((uintptr_t)segments[0].seg_addr.pval + hdr->len); xpmem_reg = vader_get_registation (hdr->my_smp_rank, hdr->sc_iov.iov_base,
hdr->sc_iov.iov_len, 0);
xpmem_reg = vader_get_registation (hdr->my_smp_rank, rem_mem->iov_base, frag.segments[1].seg_addr.pval = vader_reg_to_ptr (xpmem_reg, hdr->sc_iov.iov_base);
rem_mem->iov_len, 0); frag.segments[1].seg_len = hdr->sc_iov.iov_len;
segments[1].seg_addr.pval = vader_reg_to_ptr (xpmem_reg, rem_mem->iov_base);
segments[1].seg_len = rem_mem->iov_len;
/* recv upcall */ /* recv upcall */
frag.base.des_dst_cnt = 2; frag.base.des_dst_cnt = 2;

Просмотреть файл

@ -106,10 +106,10 @@ static inline void mca_btl_vader_check_fboxes (void)
reg = mca_btl_base_active_message_trigger + tag; reg = mca_btl_base_active_message_trigger + tag;
frag.segment.seg_addr.pval = fbox + 2; frag.segments[0].seg_addr.pval = fbox + 2;
frag.segment.seg_len = size; frag.segments[0].seg_len = size;
frag.base.des_dst = &frag.segment; frag.base.des_dst = frag.segments;
frag.base.des_dst_cnt = 1; frag.base.des_dst_cnt = 1;
reg->cbfunc(&mca_btl_vader.super, tag, &(frag.base), reg->cbdata); reg->cbfunc(&mca_btl_vader.super, tag, &(frag.base), reg->cbdata);

Просмотреть файл

@ -38,6 +38,7 @@ struct mca_btl_vader_hdr_t {
int flags; /* vader send flags */ int flags; /* vader send flags */
int my_smp_rank; /* smp rank of owning process */ int my_smp_rank; /* smp rank of owning process */
size_t len; /* length of data following this header */ size_t len; /* length of data following this header */
struct iovec sc_iov; /* io vector containing pointer to single-copy data */
}; };
typedef struct mca_btl_vader_hdr_t mca_btl_vader_hdr_t; typedef struct mca_btl_vader_hdr_t mca_btl_vader_hdr_t;
@ -46,7 +47,7 @@ typedef struct mca_btl_vader_hdr_t mca_btl_vader_hdr_t;
*/ */
struct mca_btl_vader_frag_t { struct mca_btl_vader_frag_t {
mca_btl_base_descriptor_t base; mca_btl_base_descriptor_t base;
mca_btl_base_segment_t segment; mca_btl_base_segment_t segments[2];
struct mca_btl_base_endpoint_t *endpoint; struct mca_btl_base_endpoint_t *endpoint;
mca_btl_vader_hdr_t *hdr; /* in the shared memory region */ mca_btl_vader_hdr_t *hdr; /* in the shared memory region */
ompi_free_list_t *my_list; ompi_free_list_t *my_list;
@ -65,7 +66,7 @@ static inline int mca_btl_vader_frag_alloc (mca_btl_vader_frag_t **frag, ompi_fr
if (OPAL_LIKELY(NULL != item)) { if (OPAL_LIKELY(NULL != item)) {
(*frag)->hdr->complete = false; (*frag)->hdr->complete = false;
(*frag)->hdr->flags = MCA_BTL_VADER_FLAG_INLINE; (*frag)->hdr->flags = MCA_BTL_VADER_FLAG_INLINE;
(*frag)->segment.seg_addr.pval = (char *)((*frag)->hdr + 1); (*frag)->segments[0].seg_addr.pval = (char *)((*frag)->hdr + 1);
(*frag)->my_list = list; (*frag)->my_list = list;
} }

Просмотреть файл

@ -526,14 +526,14 @@ mca_btl_base_descriptor_t *mca_btl_vader_alloc(struct mca_btl_base_module_t *btl
} }
if (OPAL_LIKELY(frag != NULL)) { if (OPAL_LIKELY(frag != NULL)) {
frag->segment.seg_len = size; frag->segments[0].seg_len = size;
frag->endpoint = endpoint; frag->endpoint = endpoint;
frag->base.des_flags = flags; frag->base.des_flags = flags;
frag->base.order = order; frag->base.order = order;
frag->base.des_src = &frag->segment; frag->base.des_src = frag->segments;
frag->base.des_src_cnt = 1; frag->base.des_src_cnt = 1;
frag->base.des_dst = &frag->segment; frag->base.des_dst = frag->segments;
frag->base.des_src_cnt = 1; frag->base.des_src_cnt = 1;
} }
@ -570,10 +570,10 @@ struct mca_btl_base_descriptor_t *vader_prepare_dst(struct mca_btl_base_module_t
opal_convertor_get_current_pointer (convertor, (void **) &data_ptr); opal_convertor_get_current_pointer (convertor, (void **) &data_ptr);
frag->segment.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr; frag->segments[0].seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
frag->segment.seg_len = *size; frag->segments[0].seg_len = *size;
frag->base.des_dst = &frag->segment; frag->base.des_dst = frag->segments;
frag->base.des_dst_cnt = 1; frag->base.des_dst_cnt = 1;
frag->base.order = order; frag->base.order = order;
frag->base.des_flags = flags; frag->base.des_flags = flags;
@ -614,7 +614,7 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
iov.iov_len = *size; iov.iov_len = *size;
iov.iov_base = iov.iov_base =
(IOVBASE_TYPE *)(((uintptr_t)(frag->segment.seg_addr.pval)) + (IOVBASE_TYPE *)(((uintptr_t)(frag->segments[0].seg_addr.pval)) +
reserve); reserve);
rc = opal_convertor_pack (convertor, &iov, &iov_count, size); rc = opal_convertor_pack (convertor, &iov, &iov_count, size);
@ -623,7 +623,8 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
return NULL; return NULL;
} }
frag->segment.seg_len = reserve + *size; frag->segments[0].seg_len = reserve + *size;
frag->base.des_src_cnt = 1;
} else { } else {
(void) MCA_BTL_VADER_FRAG_ALLOC_USER(frag); (void) MCA_BTL_VADER_FRAG_ALLOC_USER(frag);
if (OPAL_UNLIKELY(NULL == frag)) { if (OPAL_UNLIKELY(NULL == frag)) {
@ -632,15 +633,16 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
if ((*size + reserve) > (size_t) mca_btl_vader_max_inline_send) { if ((*size + reserve) > (size_t) mca_btl_vader_max_inline_send) {
/* single copy send */ /* single copy send */
/* pack the iovec after the reserved memory */
lcl_mem = (struct iovec *) ((uintptr_t)frag->segment.seg_addr.pval + reserve);
frag->hdr->flags = MCA_BTL_VADER_FLAG_SINGLE_COPY; frag->hdr->flags = MCA_BTL_VADER_FLAG_SINGLE_COPY;
lcl_mem->iov_base = data_ptr; /* set up single copy io vector */
lcl_mem->iov_len = *size; frag->hdr->sc_iov.iov_base = data_ptr;
frag->hdr->sc_iov.iov_len = *size;
frag->segment.seg_len = reserve; frag->segments[0].seg_len = reserve;
frag->segments[1].seg_len = *size;
frag->segments[1].seg_addr.pval = data_ptr;
frag->base.des_src_cnt = 2;
} else { } else {
/* inline send */ /* inline send */
@ -649,13 +651,14 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
if (fbox_ptr) { if (fbox_ptr) {
frag->hdr->flags |= MCA_BTL_VADER_FLAG_FBOX; frag->hdr->flags |= MCA_BTL_VADER_FLAG_FBOX;
frag->segment.seg_addr.pval = fbox_ptr; frag->segments[0].seg_addr.pval = fbox_ptr;
} }
/* NTH: the covertor adds some latency so we bypass it here */ /* NTH: the covertor adds some latency so we bypass it here */
vader_memmove ((void *)((uintptr_t)frag->segment.seg_addr.pval + reserve), vader_memmove ((void *)((uintptr_t)frag->segments[0].seg_addr.pval + reserve),
data_ptr, *size); data_ptr, *size);
frag->segment.seg_len = reserve + *size; frag->segments[0].seg_len = reserve + *size;
frag->base.des_src_cnt = 1;
} }
} }
} else { } else {
@ -665,12 +668,12 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
return NULL; return NULL;
} }
frag->segment.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr; frag->segments[0].seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
frag->segment.seg_len = reserve + *size; frag->segments[0].seg_len = reserve + *size;
frag->base.des_src_cnt = 1;
} }
frag->base.des_src = &frag->segment; frag->base.des_src = frag->segments;
frag->base.des_src_cnt = 1;
frag->base.order = order; frag->base.order = order;
frag->base.des_flags = flags; frag->base.des_flags = flags;

Просмотреть файл

@ -42,15 +42,15 @@ int mca_btl_vader_send (struct mca_btl_base_module_t *btl,
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) descriptor; mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) descriptor;
if (frag->hdr->flags & MCA_BTL_VADER_FLAG_FBOX) { if (frag->hdr->flags & MCA_BTL_VADER_FLAG_FBOX) {
mca_btl_vader_fbox_send (frag->segment.seg_addr.pval, tag, frag->segment.seg_len); mca_btl_vader_fbox_send (frag->segments[0].seg_addr.pval, tag, frag->segments[0].seg_len);
mca_btl_vader_frag_complete (frag); mca_btl_vader_frag_complete (frag);
return 1; return 1;
} }
/* available header space */ /* header (+ optional inline data) */
frag->hdr->len = frag->segment.seg_len; frag->hdr->len = frag->segments[0].seg_len;
/* type of message, pt-2-pt, one-sided, etc */ /* type of message, pt-2-pt, one-sided, etc */
frag->hdr->tag = tag; frag->hdr->tag = tag;

Просмотреть файл

@ -78,7 +78,7 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
frag->hdr->tag = tag; frag->hdr->tag = tag;
/* write the match header (with MPI comm/tag/etc. info) */ /* write the match header (with MPI comm/tag/etc. info) */
memcpy (frag->segment.seg_addr.pval, header, header_size); memcpy (frag->segments[0].seg_addr.pval, header, header_size);
/* write the message data if there is any */ /* write the message data if there is any */
/* /*
@ -88,7 +88,7 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
complete if we return success */ complete if we return success */
if (OPAL_UNLIKELY(payload_size && opal_convertor_need_buffers (convertor))) { if (OPAL_UNLIKELY(payload_size && opal_convertor_need_buffers (convertor))) {
/* pack the data into the supplied buffer */ /* pack the data into the supplied buffer */
iov.iov_base = (IOVBASE_TYPE *)((uintptr_t)frag->segment.seg_addr.pval + header_size); iov.iov_base = (IOVBASE_TYPE *)((uintptr_t)frag->segments[0].seg_addr.pval + header_size);
iov.iov_len = max_data = payload_size; iov.iov_len = max_data = payload_size;
(void) opal_convertor_pack (convertor, &iov, &iov_count, &max_data); (void) opal_convertor_pack (convertor, &iov, &iov_count, &max_data);
@ -97,7 +97,7 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
} else if (payload_size) { } else if (payload_size) {
/* bypassing the convertor may speed things up a little */ /* bypassing the convertor may speed things up a little */
opal_convertor_get_current_pointer (convertor, &data_ptr); opal_convertor_get_current_pointer (convertor, &data_ptr);
memcpy ((void *)((uintptr_t)frag->segment.seg_addr.pval + header_size), data_ptr, payload_size); memcpy ((void *)((uintptr_t)frag->segments[0].seg_addr.pval + header_size), data_ptr, payload_size);
} }
opal_list_append (&mca_btl_vader_component.active_sends, (opal_list_item_t *) frag); opal_list_append (&mca_btl_vader_component.active_sends, (opal_list_item_t *) frag);