btl/vader: fix bug in single copy code that could cause ob1 sends to not get marked complete.
cmr:v1.7 This commit was SVN r27671.
Этот коммит содержится в:
родитель
f779b1ded9
Коммит
ba5b2b0540
@ -294,15 +294,14 @@ static int mca_btl_vader_component_progress (void)
|
|||||||
mca_btl_active_message_callback_t *reg;
|
mca_btl_active_message_callback_t *reg;
|
||||||
mca_btl_vader_frag_t frag;
|
mca_btl_vader_frag_t frag;
|
||||||
mca_btl_vader_hdr_t *hdr;
|
mca_btl_vader_hdr_t *hdr;
|
||||||
mca_btl_base_segment_t segments[2];
|
|
||||||
mca_mpool_base_registration_t *xpmem_reg = NULL;
|
mca_mpool_base_registration_t *xpmem_reg = NULL;
|
||||||
|
|
||||||
/* check for messages in fast boxes */
|
|
||||||
mca_btl_vader_check_fboxes ();
|
|
||||||
|
|
||||||
/* check active sends for completion */
|
/* check active sends for completion */
|
||||||
mca_btl_vader_progress_sends ();
|
mca_btl_vader_progress_sends ();
|
||||||
|
|
||||||
|
/* check for messages in fast boxes */
|
||||||
|
mca_btl_vader_check_fboxes ();
|
||||||
|
|
||||||
/* poll the fifo once */
|
/* poll the fifo once */
|
||||||
hdr = vader_fifo_read (fifo);
|
hdr = vader_fifo_read (fifo);
|
||||||
if (NULL == hdr) {
|
if (NULL == hdr) {
|
||||||
@ -310,19 +309,16 @@ static int mca_btl_vader_component_progress (void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
reg = mca_btl_base_active_message_trigger + hdr->tag;
|
reg = mca_btl_base_active_message_trigger + hdr->tag;
|
||||||
frag.base.des_dst = segments;
|
frag.base.des_dst = frag.segments;
|
||||||
|
frag.segments[0].seg_addr.pval = (void *) (hdr + 1);
|
||||||
segments[0].seg_addr.pval = (void *) (hdr + 1);
|
frag.segments[0].seg_len = hdr->len;
|
||||||
segments[0].seg_len = hdr->len;
|
|
||||||
|
|
||||||
if (OPAL_UNLIKELY(hdr->flags & MCA_BTL_VADER_FLAG_SINGLE_COPY)) {
|
if (OPAL_UNLIKELY(hdr->flags & MCA_BTL_VADER_FLAG_SINGLE_COPY)) {
|
||||||
struct iovec *rem_mem = (struct iovec *) ((uintptr_t)segments[0].seg_addr.pval + hdr->len);
|
xpmem_reg = vader_get_registation (hdr->my_smp_rank, hdr->sc_iov.iov_base,
|
||||||
|
hdr->sc_iov.iov_len, 0);
|
||||||
|
|
||||||
xpmem_reg = vader_get_registation (hdr->my_smp_rank, rem_mem->iov_base,
|
frag.segments[1].seg_addr.pval = vader_reg_to_ptr (xpmem_reg, hdr->sc_iov.iov_base);
|
||||||
rem_mem->iov_len, 0);
|
frag.segments[1].seg_len = hdr->sc_iov.iov_len;
|
||||||
|
|
||||||
segments[1].seg_addr.pval = vader_reg_to_ptr (xpmem_reg, rem_mem->iov_base);
|
|
||||||
segments[1].seg_len = rem_mem->iov_len;
|
|
||||||
|
|
||||||
/* recv upcall */
|
/* recv upcall */
|
||||||
frag.base.des_dst_cnt = 2;
|
frag.base.des_dst_cnt = 2;
|
||||||
|
@ -106,10 +106,10 @@ static inline void mca_btl_vader_check_fboxes (void)
|
|||||||
|
|
||||||
reg = mca_btl_base_active_message_trigger + tag;
|
reg = mca_btl_base_active_message_trigger + tag;
|
||||||
|
|
||||||
frag.segment.seg_addr.pval = fbox + 2;
|
frag.segments[0].seg_addr.pval = fbox + 2;
|
||||||
frag.segment.seg_len = size;
|
frag.segments[0].seg_len = size;
|
||||||
|
|
||||||
frag.base.des_dst = &frag.segment;
|
frag.base.des_dst = frag.segments;
|
||||||
frag.base.des_dst_cnt = 1;
|
frag.base.des_dst_cnt = 1;
|
||||||
reg->cbfunc(&mca_btl_vader.super, tag, &(frag.base), reg->cbdata);
|
reg->cbfunc(&mca_btl_vader.super, tag, &(frag.base), reg->cbdata);
|
||||||
|
|
||||||
|
@ -38,6 +38,7 @@ struct mca_btl_vader_hdr_t {
|
|||||||
int flags; /* vader send flags */
|
int flags; /* vader send flags */
|
||||||
int my_smp_rank; /* smp rank of owning process */
|
int my_smp_rank; /* smp rank of owning process */
|
||||||
size_t len; /* length of data following this header */
|
size_t len; /* length of data following this header */
|
||||||
|
struct iovec sc_iov; /* io vector containing pointer to single-copy data */
|
||||||
};
|
};
|
||||||
typedef struct mca_btl_vader_hdr_t mca_btl_vader_hdr_t;
|
typedef struct mca_btl_vader_hdr_t mca_btl_vader_hdr_t;
|
||||||
|
|
||||||
@ -46,7 +47,7 @@ typedef struct mca_btl_vader_hdr_t mca_btl_vader_hdr_t;
|
|||||||
*/
|
*/
|
||||||
struct mca_btl_vader_frag_t {
|
struct mca_btl_vader_frag_t {
|
||||||
mca_btl_base_descriptor_t base;
|
mca_btl_base_descriptor_t base;
|
||||||
mca_btl_base_segment_t segment;
|
mca_btl_base_segment_t segments[2];
|
||||||
struct mca_btl_base_endpoint_t *endpoint;
|
struct mca_btl_base_endpoint_t *endpoint;
|
||||||
mca_btl_vader_hdr_t *hdr; /* in the shared memory region */
|
mca_btl_vader_hdr_t *hdr; /* in the shared memory region */
|
||||||
ompi_free_list_t *my_list;
|
ompi_free_list_t *my_list;
|
||||||
@ -65,7 +66,7 @@ static inline int mca_btl_vader_frag_alloc (mca_btl_vader_frag_t **frag, ompi_fr
|
|||||||
if (OPAL_LIKELY(NULL != item)) {
|
if (OPAL_LIKELY(NULL != item)) {
|
||||||
(*frag)->hdr->complete = false;
|
(*frag)->hdr->complete = false;
|
||||||
(*frag)->hdr->flags = MCA_BTL_VADER_FLAG_INLINE;
|
(*frag)->hdr->flags = MCA_BTL_VADER_FLAG_INLINE;
|
||||||
(*frag)->segment.seg_addr.pval = (char *)((*frag)->hdr + 1);
|
(*frag)->segments[0].seg_addr.pval = (char *)((*frag)->hdr + 1);
|
||||||
(*frag)->my_list = list;
|
(*frag)->my_list = list;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -526,14 +526,14 @@ mca_btl_base_descriptor_t *mca_btl_vader_alloc(struct mca_btl_base_module_t *btl
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (OPAL_LIKELY(frag != NULL)) {
|
if (OPAL_LIKELY(frag != NULL)) {
|
||||||
frag->segment.seg_len = size;
|
frag->segments[0].seg_len = size;
|
||||||
frag->endpoint = endpoint;
|
frag->endpoint = endpoint;
|
||||||
|
|
||||||
frag->base.des_flags = flags;
|
frag->base.des_flags = flags;
|
||||||
frag->base.order = order;
|
frag->base.order = order;
|
||||||
frag->base.des_src = &frag->segment;
|
frag->base.des_src = frag->segments;
|
||||||
frag->base.des_src_cnt = 1;
|
frag->base.des_src_cnt = 1;
|
||||||
frag->base.des_dst = &frag->segment;
|
frag->base.des_dst = frag->segments;
|
||||||
frag->base.des_src_cnt = 1;
|
frag->base.des_src_cnt = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -570,10 +570,10 @@ struct mca_btl_base_descriptor_t *vader_prepare_dst(struct mca_btl_base_module_t
|
|||||||
|
|
||||||
opal_convertor_get_current_pointer (convertor, (void **) &data_ptr);
|
opal_convertor_get_current_pointer (convertor, (void **) &data_ptr);
|
||||||
|
|
||||||
frag->segment.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
|
frag->segments[0].seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
|
||||||
frag->segment.seg_len = *size;
|
frag->segments[0].seg_len = *size;
|
||||||
|
|
||||||
frag->base.des_dst = &frag->segment;
|
frag->base.des_dst = frag->segments;
|
||||||
frag->base.des_dst_cnt = 1;
|
frag->base.des_dst_cnt = 1;
|
||||||
frag->base.order = order;
|
frag->base.order = order;
|
||||||
frag->base.des_flags = flags;
|
frag->base.des_flags = flags;
|
||||||
@ -614,7 +614,7 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
|
|||||||
|
|
||||||
iov.iov_len = *size;
|
iov.iov_len = *size;
|
||||||
iov.iov_base =
|
iov.iov_base =
|
||||||
(IOVBASE_TYPE *)(((uintptr_t)(frag->segment.seg_addr.pval)) +
|
(IOVBASE_TYPE *)(((uintptr_t)(frag->segments[0].seg_addr.pval)) +
|
||||||
reserve);
|
reserve);
|
||||||
|
|
||||||
rc = opal_convertor_pack (convertor, &iov, &iov_count, size);
|
rc = opal_convertor_pack (convertor, &iov, &iov_count, size);
|
||||||
@ -623,7 +623,8 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
frag->segment.seg_len = reserve + *size;
|
frag->segments[0].seg_len = reserve + *size;
|
||||||
|
frag->base.des_src_cnt = 1;
|
||||||
} else {
|
} else {
|
||||||
(void) MCA_BTL_VADER_FRAG_ALLOC_USER(frag);
|
(void) MCA_BTL_VADER_FRAG_ALLOC_USER(frag);
|
||||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||||
@ -632,15 +633,16 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
|
|||||||
|
|
||||||
if ((*size + reserve) > (size_t) mca_btl_vader_max_inline_send) {
|
if ((*size + reserve) > (size_t) mca_btl_vader_max_inline_send) {
|
||||||
/* single copy send */
|
/* single copy send */
|
||||||
/* pack the iovec after the reserved memory */
|
|
||||||
lcl_mem = (struct iovec *) ((uintptr_t)frag->segment.seg_addr.pval + reserve);
|
|
||||||
|
|
||||||
frag->hdr->flags = MCA_BTL_VADER_FLAG_SINGLE_COPY;
|
frag->hdr->flags = MCA_BTL_VADER_FLAG_SINGLE_COPY;
|
||||||
|
|
||||||
lcl_mem->iov_base = data_ptr;
|
/* set up single copy io vector */
|
||||||
lcl_mem->iov_len = *size;
|
frag->hdr->sc_iov.iov_base = data_ptr;
|
||||||
|
frag->hdr->sc_iov.iov_len = *size;
|
||||||
|
|
||||||
frag->segment.seg_len = reserve;
|
frag->segments[0].seg_len = reserve;
|
||||||
|
frag->segments[1].seg_len = *size;
|
||||||
|
frag->segments[1].seg_addr.pval = data_ptr;
|
||||||
|
frag->base.des_src_cnt = 2;
|
||||||
} else {
|
} else {
|
||||||
/* inline send */
|
/* inline send */
|
||||||
|
|
||||||
@ -649,13 +651,14 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
|
|||||||
|
|
||||||
if (fbox_ptr) {
|
if (fbox_ptr) {
|
||||||
frag->hdr->flags |= MCA_BTL_VADER_FLAG_FBOX;
|
frag->hdr->flags |= MCA_BTL_VADER_FLAG_FBOX;
|
||||||
frag->segment.seg_addr.pval = fbox_ptr;
|
frag->segments[0].seg_addr.pval = fbox_ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* NTH: the covertor adds some latency so we bypass it here */
|
/* NTH: the covertor adds some latency so we bypass it here */
|
||||||
vader_memmove ((void *)((uintptr_t)frag->segment.seg_addr.pval + reserve),
|
vader_memmove ((void *)((uintptr_t)frag->segments[0].seg_addr.pval + reserve),
|
||||||
data_ptr, *size);
|
data_ptr, *size);
|
||||||
frag->segment.seg_len = reserve + *size;
|
frag->segments[0].seg_len = reserve + *size;
|
||||||
|
frag->base.des_src_cnt = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -665,12 +668,12 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
frag->segment.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
|
frag->segments[0].seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
|
||||||
frag->segment.seg_len = reserve + *size;
|
frag->segments[0].seg_len = reserve + *size;
|
||||||
|
frag->base.des_src_cnt = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
frag->base.des_src = &frag->segment;
|
frag->base.des_src = frag->segments;
|
||||||
frag->base.des_src_cnt = 1;
|
|
||||||
frag->base.order = order;
|
frag->base.order = order;
|
||||||
frag->base.des_flags = flags;
|
frag->base.des_flags = flags;
|
||||||
|
|
||||||
|
@ -42,15 +42,15 @@ int mca_btl_vader_send (struct mca_btl_base_module_t *btl,
|
|||||||
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) descriptor;
|
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) descriptor;
|
||||||
|
|
||||||
if (frag->hdr->flags & MCA_BTL_VADER_FLAG_FBOX) {
|
if (frag->hdr->flags & MCA_BTL_VADER_FLAG_FBOX) {
|
||||||
mca_btl_vader_fbox_send (frag->segment.seg_addr.pval, tag, frag->segment.seg_len);
|
mca_btl_vader_fbox_send (frag->segments[0].seg_addr.pval, tag, frag->segments[0].seg_len);
|
||||||
|
|
||||||
mca_btl_vader_frag_complete (frag);
|
mca_btl_vader_frag_complete (frag);
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* available header space */
|
/* header (+ optional inline data) */
|
||||||
frag->hdr->len = frag->segment.seg_len;
|
frag->hdr->len = frag->segments[0].seg_len;
|
||||||
/* type of message, pt-2-pt, one-sided, etc */
|
/* type of message, pt-2-pt, one-sided, etc */
|
||||||
frag->hdr->tag = tag;
|
frag->hdr->tag = tag;
|
||||||
|
|
||||||
|
@ -78,7 +78,7 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
|
|||||||
frag->hdr->tag = tag;
|
frag->hdr->tag = tag;
|
||||||
|
|
||||||
/* write the match header (with MPI comm/tag/etc. info) */
|
/* write the match header (with MPI comm/tag/etc. info) */
|
||||||
memcpy (frag->segment.seg_addr.pval, header, header_size);
|
memcpy (frag->segments[0].seg_addr.pval, header, header_size);
|
||||||
|
|
||||||
/* write the message data if there is any */
|
/* write the message data if there is any */
|
||||||
/*
|
/*
|
||||||
@ -88,7 +88,7 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
|
|||||||
complete if we return success */
|
complete if we return success */
|
||||||
if (OPAL_UNLIKELY(payload_size && opal_convertor_need_buffers (convertor))) {
|
if (OPAL_UNLIKELY(payload_size && opal_convertor_need_buffers (convertor))) {
|
||||||
/* pack the data into the supplied buffer */
|
/* pack the data into the supplied buffer */
|
||||||
iov.iov_base = (IOVBASE_TYPE *)((uintptr_t)frag->segment.seg_addr.pval + header_size);
|
iov.iov_base = (IOVBASE_TYPE *)((uintptr_t)frag->segments[0].seg_addr.pval + header_size);
|
||||||
iov.iov_len = max_data = payload_size;
|
iov.iov_len = max_data = payload_size;
|
||||||
|
|
||||||
(void) opal_convertor_pack (convertor, &iov, &iov_count, &max_data);
|
(void) opal_convertor_pack (convertor, &iov, &iov_count, &max_data);
|
||||||
@ -97,7 +97,7 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
|
|||||||
} else if (payload_size) {
|
} else if (payload_size) {
|
||||||
/* bypassing the convertor may speed things up a little */
|
/* bypassing the convertor may speed things up a little */
|
||||||
opal_convertor_get_current_pointer (convertor, &data_ptr);
|
opal_convertor_get_current_pointer (convertor, &data_ptr);
|
||||||
memcpy ((void *)((uintptr_t)frag->segment.seg_addr.pval + header_size), data_ptr, payload_size);
|
memcpy ((void *)((uintptr_t)frag->segments[0].seg_addr.pval + header_size), data_ptr, payload_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
opal_list_append (&mca_btl_vader_component.active_sends, (opal_list_item_t *) frag);
|
opal_list_append (&mca_btl_vader_component.active_sends, (opal_list_item_t *) frag);
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user