usnic: check packet/segment lengths
Lower layer (hardware or software) bugs can result in a mismatch between our BTL-layer payload size and the actual packet length. We now check that in order to catch these cases, which otherwise can result in MPI-layer message corruption. Reviewed-by: Jeff Squyres <jsquyres@cisco.com> cmr=v1.7.5:ticket=trac:4253 This commit was SVN r30843. The following Trac tickets were found above: Ticket 4253 --> https://svn.open-mpi.org/trac/ompi/ticket/4253
Этот коммит содержится в:
родитель
3b5b87c325
Коммит
62dc42f628
@ -965,7 +965,8 @@ static int usnic_component_progress(void)
|
||||
if (OPAL_LIKELY(wc.opcode == IBV_WC_RECV &&
|
||||
wc.status == IBV_WC_SUCCESS)) {
|
||||
rseg = (ompi_btl_usnic_recv_segment_t*)(intptr_t)wc.wr_id;
|
||||
ompi_btl_usnic_recv_fast(module, rseg, channel);
|
||||
ompi_btl_usnic_recv_fast(module, rseg, channel,
|
||||
wc.byte_len);
|
||||
fastpath_ok = false; /* prevent starvation */
|
||||
return 1;
|
||||
} else {
|
||||
@ -1048,7 +1049,7 @@ static int usnic_handle_completion(
|
||||
/**** Receive completions ****/
|
||||
case OMPI_BTL_USNIC_SEG_RECV:
|
||||
assert(IBV_WC_RECV == cwc->opcode);
|
||||
ompi_btl_usnic_recv(module, rseg, channel);
|
||||
ompi_btl_usnic_recv(module, rseg, channel, cwc->byte_len);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -600,6 +600,36 @@ ompi_btl_usnic_ack_segment_return(
|
||||
OMPI_FREE_LIST_RETURN_MT(&(module->ack_segs), &(ack->ss_base.us_list));
|
||||
}
|
||||
|
||||
/* returns the expected L2 packet size in bytes for the given FRAG recv
|
||||
* segment, based on the payload_len */
|
||||
static inline uint32_t
|
||||
ompi_btl_usnic_frag_seg_proto_size(ompi_btl_usnic_recv_segment_t *rseg)
|
||||
{
|
||||
ompi_btl_usnic_segment_t *bseg = &rseg->rs_base;
|
||||
|
||||
MSGDEBUG1_OUT("us_type=%d\n", bseg->us_type);
|
||||
assert(OMPI_BTL_USNIC_PAYLOAD_TYPE_FRAG == bseg->us_btl_header->payload_type);
|
||||
|
||||
return (OMPI_BTL_USNIC_PROTO_HDR_SZ +
|
||||
sizeof(*bseg->us_btl_header) +
|
||||
bseg->us_btl_header->payload_len);
|
||||
}
|
||||
|
||||
/* returns the expected L2 packet size in bytes for the given CHUNK recv
|
||||
* segment, based on the payload_len */
|
||||
static inline uint32_t
|
||||
ompi_btl_usnic_chunk_seg_proto_size(ompi_btl_usnic_recv_segment_t *rseg)
|
||||
{
|
||||
ompi_btl_usnic_segment_t *bseg = &rseg->rs_base;
|
||||
|
||||
assert(OMPI_BTL_USNIC_PAYLOAD_TYPE_CHUNK ==
|
||||
bseg->us_btl_chunk_header->ch_hdr.payload_type);
|
||||
|
||||
return (OMPI_BTL_USNIC_PROTO_HDR_SZ +
|
||||
sizeof(*bseg->us_btl_chunk_header) +
|
||||
bseg->us_btl_chunk_header->ch_hdr.payload_len);
|
||||
}
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
@ -50,7 +50,8 @@
|
||||
*/
|
||||
void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
|
||||
ompi_btl_usnic_recv_segment_t *seg,
|
||||
ompi_btl_usnic_channel_t *channel)
|
||||
ompi_btl_usnic_channel_t *channel,
|
||||
uint32_t l2_bytes_rcvd)
|
||||
{
|
||||
ompi_btl_usnic_segment_t *bseg;
|
||||
mca_btl_active_message_callback_t* reg;
|
||||
@ -132,6 +133,14 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (OPAL_UNLIKELY(ompi_btl_usnic_frag_seg_proto_size(seg) !=
|
||||
l2_bytes_rcvd)) {
|
||||
BTL_ERROR(("L2 packet size and segment payload len do not agree!"
|
||||
" l2_bytes_rcvd=%" PRIu32 " expected=%" PRIu32,
|
||||
l2_bytes_rcvd, ompi_btl_usnic_frag_seg_proto_size(seg)));
|
||||
abort();
|
||||
}
|
||||
|
||||
/* If this it not a PUT, Pass this segment up to the PML.
|
||||
* Be sure to get the payload length from the BTL header because
|
||||
* the L2 layer may artificially inflate (or otherwise change)
|
||||
@ -172,6 +181,14 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
|
||||
int frag_index;
|
||||
ompi_btl_usnic_rx_frag_info_t *fip;
|
||||
|
||||
if (OPAL_UNLIKELY(ompi_btl_usnic_chunk_seg_proto_size(seg) !=
|
||||
l2_bytes_rcvd)) {
|
||||
BTL_ERROR(("L2 packet size and segment payload len do not agree!"
|
||||
" l2_bytes_rcvd=%" PRIu32 " expected=%" PRIu32,
|
||||
l2_bytes_rcvd, ompi_btl_usnic_chunk_seg_proto_size(seg)));
|
||||
abort();
|
||||
}
|
||||
|
||||
/* Is incoming sequence # ok? */
|
||||
if (OPAL_UNLIKELY(ompi_btl_usnic_check_rx_seq(endpoint, seg,
|
||||
&window_index) != 0)) {
|
||||
|
@ -13,13 +13,15 @@
|
||||
#include <infiniband/verbs.h>
|
||||
|
||||
#include "btl_usnic.h"
|
||||
#include "btl_usnic_util.h"
|
||||
#include "btl_usnic_frag.h"
|
||||
#include "btl_usnic_proc.h"
|
||||
|
||||
|
||||
void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
|
||||
ompi_btl_usnic_recv_segment_t *rseg,
|
||||
ompi_btl_usnic_channel_t *channel);
|
||||
ompi_btl_usnic_channel_t *channel,
|
||||
uint32_t l2_bytes_rcvd);
|
||||
|
||||
/*
|
||||
* Given an incoming segment, lookup the endpoint that sent it
|
||||
@ -241,7 +243,8 @@ dup_needs_ack:
|
||||
static inline void
|
||||
ompi_btl_usnic_recv_fast(ompi_btl_usnic_module_t *module,
|
||||
ompi_btl_usnic_recv_segment_t *seg,
|
||||
ompi_btl_usnic_channel_t *channel)
|
||||
ompi_btl_usnic_channel_t *channel,
|
||||
uint32_t l2_bytes_rcvd)
|
||||
{
|
||||
ompi_btl_usnic_segment_t *bseg;
|
||||
mca_btl_active_message_callback_t* reg;
|
||||
@ -293,7 +296,7 @@ drop:
|
||||
channel->chan_deferred_recv = seg;
|
||||
|
||||
} else {
|
||||
ompi_btl_usnic_recv_call(module, seg, channel);
|
||||
ompi_btl_usnic_recv_call(module, seg, channel, l2_bytes_rcvd);
|
||||
}
|
||||
}
|
||||
|
||||
@ -350,7 +353,8 @@ repost:
|
||||
static inline void
|
||||
ompi_btl_usnic_recv(ompi_btl_usnic_module_t *module,
|
||||
ompi_btl_usnic_recv_segment_t *seg,
|
||||
ompi_btl_usnic_channel_t *channel)
|
||||
ompi_btl_usnic_channel_t *channel,
|
||||
uint32_t l2_bytes_rcvd)
|
||||
{
|
||||
ompi_btl_usnic_segment_t *bseg;
|
||||
mca_btl_active_message_callback_t* reg;
|
||||
@ -372,6 +376,14 @@ ompi_btl_usnic_recv(ompi_btl_usnic_module_t *module,
|
||||
(void*) endpoint, bseg->us_btl_header->pkt_seq,
|
||||
bseg->us_btl_header->payload_len);
|
||||
|
||||
if (OPAL_UNLIKELY(ompi_btl_usnic_frag_seg_proto_size(seg) !=
|
||||
l2_bytes_rcvd)) {
|
||||
BTL_ERROR(("L2 packet size and segment payload len do not agree!"
|
||||
" l2_bytes_rcvd=%" PRIu32 " expected=%" PRIu32,
|
||||
l2_bytes_rcvd, ompi_btl_usnic_frag_seg_proto_size(seg)));
|
||||
abort();
|
||||
}
|
||||
|
||||
/* do the receive bookkeeping */
|
||||
rc = ompi_btl_usnic_recv_frag_bookkeeping(module, seg, channel);
|
||||
if (rc != 0) {
|
||||
@ -390,7 +402,7 @@ ompi_btl_usnic_recv(ompi_btl_usnic_module_t *module,
|
||||
&seg->rs_desc, reg->cbdata);
|
||||
|
||||
} else {
|
||||
ompi_btl_usnic_recv_call(module, seg, channel);
|
||||
ompi_btl_usnic_recv_call(module, seg, channel, l2_bytes_rcvd);
|
||||
}
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user