btl/vader: fix bug in single copy code that could cause ob1 sends to not get marked complete.
cmr:v1.7 This commit was SVN r27671.
Этот коммит содержится в:
родитель
f779b1ded9
Коммит
ba5b2b0540
@ -294,15 +294,14 @@ static int mca_btl_vader_component_progress (void)
|
||||
mca_btl_active_message_callback_t *reg;
|
||||
mca_btl_vader_frag_t frag;
|
||||
mca_btl_vader_hdr_t *hdr;
|
||||
mca_btl_base_segment_t segments[2];
|
||||
mca_mpool_base_registration_t *xpmem_reg = NULL;
|
||||
|
||||
/* check for messages in fast boxes */
|
||||
mca_btl_vader_check_fboxes ();
|
||||
|
||||
/* check active sends for completion */
|
||||
mca_btl_vader_progress_sends ();
|
||||
|
||||
/* check for messages in fast boxes */
|
||||
mca_btl_vader_check_fboxes ();
|
||||
|
||||
/* poll the fifo once */
|
||||
hdr = vader_fifo_read (fifo);
|
||||
if (NULL == hdr) {
|
||||
@ -310,19 +309,16 @@ static int mca_btl_vader_component_progress (void)
|
||||
}
|
||||
|
||||
reg = mca_btl_base_active_message_trigger + hdr->tag;
|
||||
frag.base.des_dst = segments;
|
||||
|
||||
segments[0].seg_addr.pval = (void *) (hdr + 1);
|
||||
segments[0].seg_len = hdr->len;
|
||||
frag.base.des_dst = frag.segments;
|
||||
frag.segments[0].seg_addr.pval = (void *) (hdr + 1);
|
||||
frag.segments[0].seg_len = hdr->len;
|
||||
|
||||
if (OPAL_UNLIKELY(hdr->flags & MCA_BTL_VADER_FLAG_SINGLE_COPY)) {
|
||||
struct iovec *rem_mem = (struct iovec *) ((uintptr_t)segments[0].seg_addr.pval + hdr->len);
|
||||
xpmem_reg = vader_get_registation (hdr->my_smp_rank, hdr->sc_iov.iov_base,
|
||||
hdr->sc_iov.iov_len, 0);
|
||||
|
||||
xpmem_reg = vader_get_registation (hdr->my_smp_rank, rem_mem->iov_base,
|
||||
rem_mem->iov_len, 0);
|
||||
|
||||
segments[1].seg_addr.pval = vader_reg_to_ptr (xpmem_reg, rem_mem->iov_base);
|
||||
segments[1].seg_len = rem_mem->iov_len;
|
||||
frag.segments[1].seg_addr.pval = vader_reg_to_ptr (xpmem_reg, hdr->sc_iov.iov_base);
|
||||
frag.segments[1].seg_len = hdr->sc_iov.iov_len;
|
||||
|
||||
/* recv upcall */
|
||||
frag.base.des_dst_cnt = 2;
|
||||
|
@ -106,10 +106,10 @@ static inline void mca_btl_vader_check_fboxes (void)
|
||||
|
||||
reg = mca_btl_base_active_message_trigger + tag;
|
||||
|
||||
frag.segment.seg_addr.pval = fbox + 2;
|
||||
frag.segment.seg_len = size;
|
||||
frag.segments[0].seg_addr.pval = fbox + 2;
|
||||
frag.segments[0].seg_len = size;
|
||||
|
||||
frag.base.des_dst = &frag.segment;
|
||||
frag.base.des_dst = frag.segments;
|
||||
frag.base.des_dst_cnt = 1;
|
||||
reg->cbfunc(&mca_btl_vader.super, tag, &(frag.base), reg->cbdata);
|
||||
|
||||
|
@ -38,6 +38,7 @@ struct mca_btl_vader_hdr_t {
|
||||
int flags; /* vader send flags */
|
||||
int my_smp_rank; /* smp rank of owning process */
|
||||
size_t len; /* length of data following this header */
|
||||
struct iovec sc_iov; /* io vector containing pointer to single-copy data */
|
||||
};
|
||||
typedef struct mca_btl_vader_hdr_t mca_btl_vader_hdr_t;
|
||||
|
||||
@ -46,7 +47,7 @@ typedef struct mca_btl_vader_hdr_t mca_btl_vader_hdr_t;
|
||||
*/
|
||||
struct mca_btl_vader_frag_t {
|
||||
mca_btl_base_descriptor_t base;
|
||||
mca_btl_base_segment_t segment;
|
||||
mca_btl_base_segment_t segments[2];
|
||||
struct mca_btl_base_endpoint_t *endpoint;
|
||||
mca_btl_vader_hdr_t *hdr; /* in the shared memory region */
|
||||
ompi_free_list_t *my_list;
|
||||
@ -65,7 +66,7 @@ static inline int mca_btl_vader_frag_alloc (mca_btl_vader_frag_t **frag, ompi_fr
|
||||
if (OPAL_LIKELY(NULL != item)) {
|
||||
(*frag)->hdr->complete = false;
|
||||
(*frag)->hdr->flags = MCA_BTL_VADER_FLAG_INLINE;
|
||||
(*frag)->segment.seg_addr.pval = (char *)((*frag)->hdr + 1);
|
||||
(*frag)->segments[0].seg_addr.pval = (char *)((*frag)->hdr + 1);
|
||||
(*frag)->my_list = list;
|
||||
}
|
||||
|
||||
|
@ -526,14 +526,14 @@ mca_btl_base_descriptor_t *mca_btl_vader_alloc(struct mca_btl_base_module_t *btl
|
||||
}
|
||||
|
||||
if (OPAL_LIKELY(frag != NULL)) {
|
||||
frag->segment.seg_len = size;
|
||||
frag->segments[0].seg_len = size;
|
||||
frag->endpoint = endpoint;
|
||||
|
||||
frag->base.des_flags = flags;
|
||||
frag->base.order = order;
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src = frag->segments;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = &frag->segment;
|
||||
frag->base.des_dst = frag->segments;
|
||||
frag->base.des_src_cnt = 1;
|
||||
}
|
||||
|
||||
@ -570,10 +570,10 @@ struct mca_btl_base_descriptor_t *vader_prepare_dst(struct mca_btl_base_module_t
|
||||
|
||||
opal_convertor_get_current_pointer (convertor, (void **) &data_ptr);
|
||||
|
||||
frag->segment.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
|
||||
frag->segment.seg_len = *size;
|
||||
frag->segments[0].seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
|
||||
frag->segments[0].seg_len = *size;
|
||||
|
||||
frag->base.des_dst = &frag->segment;
|
||||
frag->base.des_dst = frag->segments;
|
||||
frag->base.des_dst_cnt = 1;
|
||||
frag->base.order = order;
|
||||
frag->base.des_flags = flags;
|
||||
@ -614,7 +614,7 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
|
||||
|
||||
iov.iov_len = *size;
|
||||
iov.iov_base =
|
||||
(IOVBASE_TYPE *)(((uintptr_t)(frag->segment.seg_addr.pval)) +
|
||||
(IOVBASE_TYPE *)(((uintptr_t)(frag->segments[0].seg_addr.pval)) +
|
||||
reserve);
|
||||
|
||||
rc = opal_convertor_pack (convertor, &iov, &iov_count, size);
|
||||
@ -623,7 +623,8 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frag->segment.seg_len = reserve + *size;
|
||||
frag->segments[0].seg_len = reserve + *size;
|
||||
frag->base.des_src_cnt = 1;
|
||||
} else {
|
||||
(void) MCA_BTL_VADER_FRAG_ALLOC_USER(frag);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
@ -632,15 +633,16 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
|
||||
|
||||
if ((*size + reserve) > (size_t) mca_btl_vader_max_inline_send) {
|
||||
/* single copy send */
|
||||
/* pack the iovec after the reserved memory */
|
||||
lcl_mem = (struct iovec *) ((uintptr_t)frag->segment.seg_addr.pval + reserve);
|
||||
|
||||
frag->hdr->flags = MCA_BTL_VADER_FLAG_SINGLE_COPY;
|
||||
|
||||
lcl_mem->iov_base = data_ptr;
|
||||
lcl_mem->iov_len = *size;
|
||||
/* set up single copy io vector */
|
||||
frag->hdr->sc_iov.iov_base = data_ptr;
|
||||
frag->hdr->sc_iov.iov_len = *size;
|
||||
|
||||
frag->segment.seg_len = reserve;
|
||||
frag->segments[0].seg_len = reserve;
|
||||
frag->segments[1].seg_len = *size;
|
||||
frag->segments[1].seg_addr.pval = data_ptr;
|
||||
frag->base.des_src_cnt = 2;
|
||||
} else {
|
||||
/* inline send */
|
||||
|
||||
@ -649,13 +651,14 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
|
||||
|
||||
if (fbox_ptr) {
|
||||
frag->hdr->flags |= MCA_BTL_VADER_FLAG_FBOX;
|
||||
frag->segment.seg_addr.pval = fbox_ptr;
|
||||
frag->segments[0].seg_addr.pval = fbox_ptr;
|
||||
}
|
||||
|
||||
/* NTH: the covertor adds some latency so we bypass it here */
|
||||
vader_memmove ((void *)((uintptr_t)frag->segment.seg_addr.pval + reserve),
|
||||
vader_memmove ((void *)((uintptr_t)frag->segments[0].seg_addr.pval + reserve),
|
||||
data_ptr, *size);
|
||||
frag->segment.seg_len = reserve + *size;
|
||||
frag->segments[0].seg_len = reserve + *size;
|
||||
frag->base.des_src_cnt = 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -665,12 +668,12 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frag->segment.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
|
||||
frag->segment.seg_len = reserve + *size;
|
||||
frag->segments[0].seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
|
||||
frag->segments[0].seg_len = reserve + *size;
|
||||
frag->base.des_src_cnt = 1;
|
||||
}
|
||||
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_src = frag->segments;
|
||||
frag->base.order = order;
|
||||
frag->base.des_flags = flags;
|
||||
|
||||
|
@ -42,15 +42,15 @@ int mca_btl_vader_send (struct mca_btl_base_module_t *btl,
|
||||
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) descriptor;
|
||||
|
||||
if (frag->hdr->flags & MCA_BTL_VADER_FLAG_FBOX) {
|
||||
mca_btl_vader_fbox_send (frag->segment.seg_addr.pval, tag, frag->segment.seg_len);
|
||||
mca_btl_vader_fbox_send (frag->segments[0].seg_addr.pval, tag, frag->segments[0].seg_len);
|
||||
|
||||
mca_btl_vader_frag_complete (frag);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* available header space */
|
||||
frag->hdr->len = frag->segment.seg_len;
|
||||
/* header (+ optional inline data) */
|
||||
frag->hdr->len = frag->segments[0].seg_len;
|
||||
/* type of message, pt-2-pt, one-sided, etc */
|
||||
frag->hdr->tag = tag;
|
||||
|
||||
|
@ -78,7 +78,7 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
|
||||
frag->hdr->tag = tag;
|
||||
|
||||
/* write the match header (with MPI comm/tag/etc. info) */
|
||||
memcpy (frag->segment.seg_addr.pval, header, header_size);
|
||||
memcpy (frag->segments[0].seg_addr.pval, header, header_size);
|
||||
|
||||
/* write the message data if there is any */
|
||||
/*
|
||||
@ -88,7 +88,7 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
|
||||
complete if we return success */
|
||||
if (OPAL_UNLIKELY(payload_size && opal_convertor_need_buffers (convertor))) {
|
||||
/* pack the data into the supplied buffer */
|
||||
iov.iov_base = (IOVBASE_TYPE *)((uintptr_t)frag->segment.seg_addr.pval + header_size);
|
||||
iov.iov_base = (IOVBASE_TYPE *)((uintptr_t)frag->segments[0].seg_addr.pval + header_size);
|
||||
iov.iov_len = max_data = payload_size;
|
||||
|
||||
(void) opal_convertor_pack (convertor, &iov, &iov_count, &max_data);
|
||||
@ -97,7 +97,7 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
|
||||
} else if (payload_size) {
|
||||
/* bypassing the convertor may speed things up a little */
|
||||
opal_convertor_get_current_pointer (convertor, &data_ptr);
|
||||
memcpy ((void *)((uintptr_t)frag->segment.seg_addr.pval + header_size), data_ptr, payload_size);
|
||||
memcpy ((void *)((uintptr_t)frag->segments[0].seg_addr.pval + header_size), data_ptr, payload_size);
|
||||
}
|
||||
|
||||
opal_list_append (&mca_btl_vader_component.active_sends, (opal_list_item_t *) frag);
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user