usnic: SEGV in OSU benchmarks
Prevent frag from being freed out from under us in the case the PML callback routine calls usnic_free(). We accomplish this by delaying decrement of sf_bytes_to_ack until after the callback is performed, since sf_bytes_to_ack == 0 is condition of freeing the frag. Fixes Cisco bug CSCuj45094. Authored-by: Reese Faucette <rfaucett@cisco.com> cmr=v1.7.3 This commit was SVN r29264.
Этот коммит содержится в:
родитель
e4e3e411fc
Коммит
a42fa78da7
@ -127,22 +127,29 @@ ompi_btl_usnic_handle_ack(
|
||||
bytes_acked = sseg->ss_base.us_btl_header->payload_len;
|
||||
frag = sseg->ss_parent_frag;
|
||||
|
||||
/* when no bytes left to ACK, fragment send is truly done */
|
||||
frag->sf_ack_bytes_left -= bytes_acked;
|
||||
#if MSGDEBUG1
|
||||
opal_output(0, " ACKED seg %p, frag %p, ack_bytes=%"PRIu32", left=%zd\n",
|
||||
(void*)sseg, (void*)frag, bytes_acked, frag->sf_ack_bytes_left);
|
||||
(void*)sseg, (void*)frag, bytes_acked,
|
||||
frag->sf_ack_bytes_left-bytes_acked);
|
||||
#endif
|
||||
|
||||
/* If all ACKs received, and this is a put or a regular send
|
||||
* that needs a callback, perform the callback now
|
||||
*
|
||||
* NOTE on sf_ack_bytes_left - here we check for
|
||||
* sf_ack_bytes_left == bytes_acked
|
||||
* as opposed to adjusting sf_ack_bytes_left and checking for 0 because
|
||||
* if we don't, the callback function may call usnic_free() and free
|
||||
* the fragment out from under us which we do not want. If the
|
||||
* fragment really needs to be freed, we'll take care of it in a few
|
||||
* lines below.
|
||||
*/
|
||||
if (frag->sf_ack_bytes_left == 0 &&
|
||||
if (frag->sf_ack_bytes_left == bytes_acked &&
|
||||
((frag->sf_base.uf_dst_seg[0].seg_addr.pval != NULL) ||
|
||||
(frag->sf_base.uf_base.des_flags &
|
||||
MCA_BTL_DES_SEND_ALWAYS_CALLBACK))) {
|
||||
#if MSGDEBUG2
|
||||
opal_output(0, "completion callback for put frag=%p, dest=%p\n",
|
||||
opal_output(0, "send completion callback frag=%p, dest=%p\n",
|
||||
(void*)frag, frag->sf_base.uf_dst_seg[0].seg_addr.pval);
|
||||
#endif
|
||||
frag->sf_base.uf_base.des_cbfunc(&module->super,
|
||||
@ -158,6 +165,10 @@ ompi_btl_usnic_handle_ack(
|
||||
ompi_btl_usnic_release_send_segment(module, frag, sseg);
|
||||
}
|
||||
|
||||
/* when no bytes left to ACK, fragment send is truly done */
|
||||
/* see note above on why this is done here as opposed to earlier */
|
||||
frag->sf_ack_bytes_left -= bytes_acked;
|
||||
|
||||
/* OK to return this fragment? */
|
||||
ompi_btl_usnic_send_frag_return_cond(module, frag);
|
||||
|
||||
|
@ -1397,11 +1397,11 @@ usnic_send(
|
||||
#if MSGDEBUG1
|
||||
opal_output(0, "immediate callback for frag %p\n", (void *)frag);
|
||||
#endif
|
||||
descriptor->des_flags &= ~MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
|
||||
frag->sf_base.uf_base.des_cbfunc(&module->super,
|
||||
frag->sf_endpoint, &frag->sf_base.uf_base,
|
||||
OMPI_SUCCESS);
|
||||
rc = 0;
|
||||
descriptor->des_flags &= ~MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
|
||||
} else {
|
||||
#if MSGDEBUG1
|
||||
opal_output(0, "skipping callback for frag %p\n", (void *)frag);
|
||||
|
@ -230,12 +230,13 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
|
||||
abort();
|
||||
}
|
||||
#if MSGDEBUG1
|
||||
opal_output(0, "Start large recv to %p, size=%d\n",
|
||||
fip->rfi_data, chunk_hdr->ch_frag_size);
|
||||
opal_output(0, "Start large recv to %p, size=%"PRIu32"\n",
|
||||
(void *)fip->rfi_data, chunk_hdr->ch_frag_size);
|
||||
#endif
|
||||
} else {
|
||||
#if MSGDEBUG1
|
||||
opal_output(0, "Start PUT to %p\n", chunk_hdr->ch_hdr.put_addr);
|
||||
opal_output(0, "Start PUT to %p\n",
|
||||
(void *)chunk_hdr->ch_hdr.put_addr);
|
||||
#endif
|
||||
fip->rfi_data = chunk_hdr->ch_hdr.put_addr;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user