1
1

btl/vader: fix bug in single copy code that could cause ob1 sends to not get marked complete.

cmr:v1.7

This commit was SVN r27671.
Этот коммит содержится в:
Nathan Hjelm 2012-12-13 23:18:53 +00:00
родитель f779b1ded9
Коммит ba5b2b0540
6 изменённых файлов: 46 добавлений и 46 удалений

Просмотреть файл

@ -294,15 +294,14 @@ static int mca_btl_vader_component_progress (void)
mca_btl_active_message_callback_t *reg;
mca_btl_vader_frag_t frag;
mca_btl_vader_hdr_t *hdr;
mca_btl_base_segment_t segments[2];
mca_mpool_base_registration_t *xpmem_reg = NULL;
/* check for messages in fast boxes */
mca_btl_vader_check_fboxes ();
/* check active sends for completion */
mca_btl_vader_progress_sends ();
/* check for messages in fast boxes */
mca_btl_vader_check_fboxes ();
/* poll the fifo once */
hdr = vader_fifo_read (fifo);
if (NULL == hdr) {
@ -310,19 +309,16 @@ static int mca_btl_vader_component_progress (void)
}
reg = mca_btl_base_active_message_trigger + hdr->tag;
frag.base.des_dst = segments;
segments[0].seg_addr.pval = (void *) (hdr + 1);
segments[0].seg_len = hdr->len;
frag.base.des_dst = frag.segments;
frag.segments[0].seg_addr.pval = (void *) (hdr + 1);
frag.segments[0].seg_len = hdr->len;
if (OPAL_UNLIKELY(hdr->flags & MCA_BTL_VADER_FLAG_SINGLE_COPY)) {
struct iovec *rem_mem = (struct iovec *) ((uintptr_t)segments[0].seg_addr.pval + hdr->len);
xpmem_reg = vader_get_registation (hdr->my_smp_rank, hdr->sc_iov.iov_base,
hdr->sc_iov.iov_len, 0);
xpmem_reg = vader_get_registation (hdr->my_smp_rank, rem_mem->iov_base,
rem_mem->iov_len, 0);
segments[1].seg_addr.pval = vader_reg_to_ptr (xpmem_reg, rem_mem->iov_base);
segments[1].seg_len = rem_mem->iov_len;
frag.segments[1].seg_addr.pval = vader_reg_to_ptr (xpmem_reg, hdr->sc_iov.iov_base);
frag.segments[1].seg_len = hdr->sc_iov.iov_len;
/* recv upcall */
frag.base.des_dst_cnt = 2;

Просмотреть файл

@ -106,10 +106,10 @@ static inline void mca_btl_vader_check_fboxes (void)
reg = mca_btl_base_active_message_trigger + tag;
frag.segment.seg_addr.pval = fbox + 2;
frag.segment.seg_len = size;
frag.segments[0].seg_addr.pval = fbox + 2;
frag.segments[0].seg_len = size;
frag.base.des_dst = &frag.segment;
frag.base.des_dst = frag.segments;
frag.base.des_dst_cnt = 1;
reg->cbfunc(&mca_btl_vader.super, tag, &(frag.base), reg->cbdata);

Просмотреть файл

@ -38,6 +38,7 @@ struct mca_btl_vader_hdr_t {
int flags; /* vader send flags */
int my_smp_rank; /* smp rank of owning process */
size_t len; /* length of data following this header */
struct iovec sc_iov; /* io vector containing pointer to single-copy data */
};
typedef struct mca_btl_vader_hdr_t mca_btl_vader_hdr_t;
@ -46,7 +47,7 @@ typedef struct mca_btl_vader_hdr_t mca_btl_vader_hdr_t;
*/
struct mca_btl_vader_frag_t {
mca_btl_base_descriptor_t base;
mca_btl_base_segment_t segment;
mca_btl_base_segment_t segments[2];
struct mca_btl_base_endpoint_t *endpoint;
mca_btl_vader_hdr_t *hdr; /* in the shared memory region */
ompi_free_list_t *my_list;
@ -65,7 +66,7 @@ static inline int mca_btl_vader_frag_alloc (mca_btl_vader_frag_t **frag, ompi_fr
if (OPAL_LIKELY(NULL != item)) {
(*frag)->hdr->complete = false;
(*frag)->hdr->flags = MCA_BTL_VADER_FLAG_INLINE;
(*frag)->segment.seg_addr.pval = (char *)((*frag)->hdr + 1);
(*frag)->segments[0].seg_addr.pval = (char *)((*frag)->hdr + 1);
(*frag)->my_list = list;
}

Просмотреть файл

@ -526,14 +526,14 @@ mca_btl_base_descriptor_t *mca_btl_vader_alloc(struct mca_btl_base_module_t *btl
}
if (OPAL_LIKELY(frag != NULL)) {
frag->segment.seg_len = size;
frag->segments[0].seg_len = size;
frag->endpoint = endpoint;
frag->base.des_flags = flags;
frag->base.order = order;
frag->base.des_src = &frag->segment;
frag->base.des_src = frag->segments;
frag->base.des_src_cnt = 1;
frag->base.des_dst = &frag->segment;
frag->base.des_dst = frag->segments;
frag->base.des_src_cnt = 1;
}
@ -570,10 +570,10 @@ struct mca_btl_base_descriptor_t *vader_prepare_dst(struct mca_btl_base_module_t
opal_convertor_get_current_pointer (convertor, (void **) &data_ptr);
frag->segment.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
frag->segment.seg_len = *size;
frag->segments[0].seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
frag->segments[0].seg_len = *size;
frag->base.des_dst = &frag->segment;
frag->base.des_dst = frag->segments;
frag->base.des_dst_cnt = 1;
frag->base.order = order;
frag->base.des_flags = flags;
@ -614,7 +614,7 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
iov.iov_len = *size;
iov.iov_base =
(IOVBASE_TYPE *)(((uintptr_t)(frag->segment.seg_addr.pval)) +
(IOVBASE_TYPE *)(((uintptr_t)(frag->segments[0].seg_addr.pval)) +
reserve);
rc = opal_convertor_pack (convertor, &iov, &iov_count, size);
@ -623,7 +623,8 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
return NULL;
}
frag->segment.seg_len = reserve + *size;
frag->segments[0].seg_len = reserve + *size;
frag->base.des_src_cnt = 1;
} else {
(void) MCA_BTL_VADER_FRAG_ALLOC_USER(frag);
if (OPAL_UNLIKELY(NULL == frag)) {
@ -632,15 +633,16 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
if ((*size + reserve) > (size_t) mca_btl_vader_max_inline_send) {
/* single copy send */
/* pack the iovec after the reserved memory */
lcl_mem = (struct iovec *) ((uintptr_t)frag->segment.seg_addr.pval + reserve);
frag->hdr->flags = MCA_BTL_VADER_FLAG_SINGLE_COPY;
lcl_mem->iov_base = data_ptr;
lcl_mem->iov_len = *size;
/* set up single copy io vector */
frag->hdr->sc_iov.iov_base = data_ptr;
frag->hdr->sc_iov.iov_len = *size;
frag->segment.seg_len = reserve;
frag->segments[0].seg_len = reserve;
frag->segments[1].seg_len = *size;
frag->segments[1].seg_addr.pval = data_ptr;
frag->base.des_src_cnt = 2;
} else {
/* inline send */
@ -649,13 +651,14 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
if (fbox_ptr) {
frag->hdr->flags |= MCA_BTL_VADER_FLAG_FBOX;
frag->segment.seg_addr.pval = fbox_ptr;
frag->segments[0].seg_addr.pval = fbox_ptr;
}
/* NTH: the covertor adds some latency so we bypass it here */
vader_memmove ((void *)((uintptr_t)frag->segment.seg_addr.pval + reserve),
vader_memmove ((void *)((uintptr_t)frag->segments[0].seg_addr.pval + reserve),
data_ptr, *size);
frag->segment.seg_len = reserve + *size;
frag->segments[0].seg_len = reserve + *size;
frag->base.des_src_cnt = 1;
}
}
} else {
@ -665,12 +668,12 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
return NULL;
}
frag->segment.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
frag->segment.seg_len = reserve + *size;
frag->segments[0].seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
frag->segments[0].seg_len = reserve + *size;
frag->base.des_src_cnt = 1;
}
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_src = frag->segments;
frag->base.order = order;
frag->base.des_flags = flags;

Просмотреть файл

@ -42,15 +42,15 @@ int mca_btl_vader_send (struct mca_btl_base_module_t *btl,
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) descriptor;
if (frag->hdr->flags & MCA_BTL_VADER_FLAG_FBOX) {
mca_btl_vader_fbox_send (frag->segment.seg_addr.pval, tag, frag->segment.seg_len);
mca_btl_vader_fbox_send (frag->segments[0].seg_addr.pval, tag, frag->segments[0].seg_len);
mca_btl_vader_frag_complete (frag);
return 1;
}
/* available header space */
frag->hdr->len = frag->segment.seg_len;
/* header (+ optional inline data) */
frag->hdr->len = frag->segments[0].seg_len;
/* type of message, pt-2-pt, one-sided, etc */
frag->hdr->tag = tag;

Просмотреть файл

@ -78,7 +78,7 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
frag->hdr->tag = tag;
/* write the match header (with MPI comm/tag/etc. info) */
memcpy (frag->segment.seg_addr.pval, header, header_size);
memcpy (frag->segments[0].seg_addr.pval, header, header_size);
/* write the message data if there is any */
/*
@ -88,7 +88,7 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
complete if we return success */
if (OPAL_UNLIKELY(payload_size && opal_convertor_need_buffers (convertor))) {
/* pack the data into the supplied buffer */
iov.iov_base = (IOVBASE_TYPE *)((uintptr_t)frag->segment.seg_addr.pval + header_size);
iov.iov_base = (IOVBASE_TYPE *)((uintptr_t)frag->segments[0].seg_addr.pval + header_size);
iov.iov_len = max_data = payload_size;
(void) opal_convertor_pack (convertor, &iov, &iov_count, &max_data);
@ -97,7 +97,7 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
} else if (payload_size) {
/* bypassing the convertor may speed things up a little */
opal_convertor_get_current_pointer (convertor, &data_ptr);
memcpy ((void *)((uintptr_t)frag->segment.seg_addr.pval + header_size), data_ptr, payload_size);
memcpy ((void *)((uintptr_t)frag->segments[0].seg_addr.pval + header_size), data_ptr, payload_size);
}
opal_list_append (&mca_btl_vader_component.active_sends, (opal_list_item_t *) frag);