btl/openib: fix message coalescing
There was a bug in the openib btl handling this valid sequence of calls: desc = btl_alloc (); btl_free (desc); When triggered the bug would cause either fragment loss or undefined behavior (SEGV, etc). The problem occured because btl_alloc contained the logic to modify the pending fragment (length, etc) and these changes were not corrected if the fragment was freed instead of sent. To fix this issue I 1) moved some of the coalescing logic to the btl_send function, and 2) retry the coalesced fragment on btl_free if it was never sent. This appears to completely address the issue.
Этот коммит содержится в:
родитель
9aaac11648
Коммит
9bae131589
@ -1133,7 +1133,8 @@ ib_frag_alloc(mca_btl_openib_module_t *btl, size_t size, uint8_t order,
|
||||
|
||||
/* check if pending fragment has enough space for coalescing */
|
||||
static mca_btl_openib_send_frag_t *check_coalescing(opal_list_t *frag_list,
|
||||
opal_mutex_t *lock, mca_btl_base_endpoint_t *ep, size_t size)
|
||||
opal_mutex_t *lock, struct mca_btl_base_endpoint_t *ep, size_t size,
|
||||
mca_btl_openib_coalesced_frag_t **cfrag)
|
||||
{
|
||||
mca_btl_openib_send_frag_t *frag = NULL;
|
||||
|
||||
@ -1158,11 +1159,21 @@ static mca_btl_openib_send_frag_t *check_coalescing(opal_list_t *frag_list,
|
||||
|
||||
qp = to_base_frag(frag)->base.order;
|
||||
|
||||
if(total_length <= mca_btl_openib_component.qp_infos[qp].size)
|
||||
if(total_length <= mca_btl_openib_component.qp_infos[qp].size) {
|
||||
/* make sure we can allocate a coalescing frag before returning success */
|
||||
*cfrag = alloc_coalesced_frag();
|
||||
if (OPAL_LIKELY(NULL != cfrag)) {
|
||||
(*cfrag)->send_frag = frag;
|
||||
(*cfrag)->sent = false;
|
||||
|
||||
opal_list_remove_first(frag_list);
|
||||
else
|
||||
} else {
|
||||
frag = NULL;
|
||||
}
|
||||
} else {
|
||||
frag = NULL;
|
||||
}
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(lock);
|
||||
|
||||
return frag;
|
||||
@ -1188,7 +1199,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_alloc(
|
||||
mca_btl_openib_module_t *obtl = (mca_btl_openib_module_t*)btl;
|
||||
int qp = frag_size_to_order(obtl, size);
|
||||
mca_btl_openib_send_frag_t *sfrag = NULL;
|
||||
mca_btl_openib_coalesced_frag_t *cfrag;
|
||||
mca_btl_openib_coalesced_frag_t *cfrag = NULL;
|
||||
|
||||
assert(qp != MCA_BTL_NO_ORDER);
|
||||
|
||||
@ -1197,26 +1208,25 @@ mca_btl_base_descriptor_t* mca_btl_openib_alloc(
|
||||
int prio = !(flags & MCA_BTL_DES_FLAGS_PRIORITY);
|
||||
|
||||
sfrag = check_coalescing(&ep->qps[qp].no_wqe_pending_frags[prio],
|
||||
&ep->endpoint_lock, ep, size);
|
||||
&ep->endpoint_lock, ep, size, &cfrag);
|
||||
|
||||
if (NULL == sfrag) {
|
||||
if(BTL_OPENIB_QP_TYPE_PP(qp)) {
|
||||
sfrag = check_coalescing(&ep->qps[qp].no_credits_pending_frags[prio],
|
||||
&ep->endpoint_lock, ep, size);
|
||||
&ep->endpoint_lock, ep, size, &cfrag);
|
||||
} else {
|
||||
sfrag = check_coalescing(
|
||||
&obtl->qps[qp].u.srq_qp.pending_frags[prio],
|
||||
&obtl->ib_lock, ep, size);
|
||||
&obtl->ib_lock, ep, size, &cfrag);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(NULL == sfrag)
|
||||
if (NULL == sfrag) {
|
||||
return ib_frag_alloc((mca_btl_openib_module_t*)btl, size, order, flags);
|
||||
}
|
||||
|
||||
/* begin coalescing message */
|
||||
cfrag = alloc_coalesced_frag();
|
||||
cfrag->send_frag = sfrag;
|
||||
|
||||
/* fix up new coalescing header if this is the first coalesced frag */
|
||||
if(sfrag->hdr != sfrag->chdr) {
|
||||
@ -1250,10 +1260,9 @@ mca_btl_base_descriptor_t* mca_btl_openib_alloc(
|
||||
to_base_frag(cfrag)->segment.base.seg_addr.pval = cfrag->hdr + 1;
|
||||
to_base_frag(cfrag)->segment.base.seg_len = size;
|
||||
|
||||
/* save coalesced fragment on a main fragment; we will need it after send
|
||||
* completion to free it and to call upper layer callback */
|
||||
opal_list_append(&sfrag->coalesced_frags, (opal_list_item_t*)cfrag);
|
||||
sfrag->coalesced_length += (size+sizeof(mca_btl_openib_header_coalesced_t));
|
||||
/* NTH: there is no reason to append the coalesced fragment here. No more
|
||||
* fragments will be added until either send or free has been called on
|
||||
* the coalesced frag. */
|
||||
|
||||
to_base_frag(cfrag)->base.des_flags = flags;
|
||||
|
||||
@ -1306,6 +1315,15 @@ int mca_btl_openib_free(
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_COALESCED && !to_coalesced_frag(des)->sent) {
|
||||
mca_btl_openib_send_frag_t *sfrag = to_coalesced_frag(des)->send_frag;
|
||||
|
||||
/* the coalesced fragment would have sent the original fragment but that
|
||||
* will not happen so send the fragment now */
|
||||
mca_btl_openib_endpoint_send(to_com_frag(sfrag)->endpoint, sfrag);
|
||||
}
|
||||
|
||||
MCA_BTL_IB_FRAG_RETURN(des);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
@ -1888,11 +1906,19 @@ int mca_btl_openib_send(
|
||||
openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_COALESCED);
|
||||
|
||||
if(openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_COALESCED) {
|
||||
frag = to_coalesced_frag(des)->send_frag;
|
||||
|
||||
/* save coalesced fragment on a main fragment; we will need it after send
|
||||
* completion to free it and to call upper layer callback */
|
||||
opal_list_append(&frag->coalesced_frags, (opal_list_item_t*) des);
|
||||
frag->coalesced_length += to_coalesced_frag(des)->hdr->alloc_size +
|
||||
sizeof(mca_btl_openib_header_coalesced_t);
|
||||
|
||||
to_coalesced_frag(des)->sent = true;
|
||||
to_coalesced_frag(des)->hdr->tag = tag;
|
||||
to_coalesced_frag(des)->hdr->size = des->des_local->seg_len;
|
||||
if(ep->nbo)
|
||||
BTL_OPENIB_HEADER_COALESCED_HTON(*to_coalesced_frag(des)->hdr);
|
||||
frag = to_coalesced_frag(des)->send_frag;
|
||||
} else {
|
||||
frag = to_send_frag(des);
|
||||
to_com_frag(des)->endpoint = ep;
|
||||
|
@ -371,6 +371,7 @@ typedef struct mca_btl_openib_coalesced_frag_t {
|
||||
mca_btl_openib_frag_t super;
|
||||
mca_btl_openib_send_frag_t *send_frag;
|
||||
mca_btl_openib_header_coalesced_t *hdr;
|
||||
bool sent;
|
||||
} mca_btl_openib_coalesced_frag_t;
|
||||
OBJ_CLASS_DECLARATION(mca_btl_openib_coalesced_frag_t);
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user