1
1

usnic: check packet/segment lengths

Lower layer (hardware or software) bugs can result in a mismatch between
our BTL-layer payload size and the actual packet length.  We now check
that in order to catch these cases, which otherwise can result in
MPI-layer message corruption.

Reviewed-by: Jeff Squyres <jsquyres@cisco.com>

cmr=v1.7.5:ticket=trac:4253

This commit was SVN r30843.

The following Trac tickets were found above:
  Ticket 4253 --> https://svn.open-mpi.org/trac/ompi/ticket/4253
Этот коммит содержится в:
Dave Goodell 2014-02-26 07:47:19 +00:00
родитель 3b5b87c325
Коммит 62dc42f628
4 изменённых файлов: 72 добавлений и 12 удалений

Просмотреть файл

@ -965,7 +965,8 @@ static int usnic_component_progress(void)
if (OPAL_LIKELY(wc.opcode == IBV_WC_RECV && if (OPAL_LIKELY(wc.opcode == IBV_WC_RECV &&
wc.status == IBV_WC_SUCCESS)) { wc.status == IBV_WC_SUCCESS)) {
rseg = (ompi_btl_usnic_recv_segment_t*)(intptr_t)wc.wr_id; rseg = (ompi_btl_usnic_recv_segment_t*)(intptr_t)wc.wr_id;
ompi_btl_usnic_recv_fast(module, rseg, channel); ompi_btl_usnic_recv_fast(module, rseg, channel,
wc.byte_len);
fastpath_ok = false; /* prevent starvation */ fastpath_ok = false; /* prevent starvation */
return 1; return 1;
} else { } else {
@ -1048,7 +1049,7 @@ static int usnic_handle_completion(
/**** Receive completions ****/ /**** Receive completions ****/
case OMPI_BTL_USNIC_SEG_RECV: case OMPI_BTL_USNIC_SEG_RECV:
assert(IBV_WC_RECV == cwc->opcode); assert(IBV_WC_RECV == cwc->opcode);
ompi_btl_usnic_recv(module, rseg, channel); ompi_btl_usnic_recv(module, rseg, channel, cwc->byte_len);
break; break;
default: default:

Просмотреть файл

@ -600,6 +600,36 @@ ompi_btl_usnic_ack_segment_return(
OMPI_FREE_LIST_RETURN_MT(&(module->ack_segs), &(ack->ss_base.us_list)); OMPI_FREE_LIST_RETURN_MT(&(module->ack_segs), &(ack->ss_base.us_list));
} }
/* returns the expected L2 packet size in bytes for the given FRAG recv
* segment, based on the payload_len */
static inline uint32_t
ompi_btl_usnic_frag_seg_proto_size(ompi_btl_usnic_recv_segment_t *rseg)
{
ompi_btl_usnic_segment_t *bseg = &rseg->rs_base;
MSGDEBUG1_OUT("us_type=%d\n", bseg->us_type);
assert(OMPI_BTL_USNIC_PAYLOAD_TYPE_FRAG == bseg->us_btl_header->payload_type);
return (OMPI_BTL_USNIC_PROTO_HDR_SZ +
sizeof(*bseg->us_btl_header) +
bseg->us_btl_header->payload_len);
}
/* returns the expected L2 packet size in bytes for the given CHUNK recv
* segment, based on the payload_len */
static inline uint32_t
ompi_btl_usnic_chunk_seg_proto_size(ompi_btl_usnic_recv_segment_t *rseg)
{
ompi_btl_usnic_segment_t *bseg = &rseg->rs_base;
assert(OMPI_BTL_USNIC_PAYLOAD_TYPE_CHUNK ==
bseg->us_btl_chunk_header->ch_hdr.payload_type);
return (OMPI_BTL_USNIC_PROTO_HDR_SZ +
sizeof(*bseg->us_btl_chunk_header) +
bseg->us_btl_chunk_header->ch_hdr.payload_len);
}
END_C_DECLS END_C_DECLS
#endif #endif

Просмотреть файл

@ -50,7 +50,8 @@
*/ */
void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module, void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
ompi_btl_usnic_recv_segment_t *seg, ompi_btl_usnic_recv_segment_t *seg,
ompi_btl_usnic_channel_t *channel) ompi_btl_usnic_channel_t *channel,
uint32_t l2_bytes_rcvd)
{ {
ompi_btl_usnic_segment_t *bseg; ompi_btl_usnic_segment_t *bseg;
mca_btl_active_message_callback_t* reg; mca_btl_active_message_callback_t* reg;
@ -132,6 +133,14 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
#endif #endif
#endif #endif
if (OPAL_UNLIKELY(ompi_btl_usnic_frag_seg_proto_size(seg) !=
l2_bytes_rcvd)) {
BTL_ERROR(("L2 packet size and segment payload len do not agree!"
" l2_bytes_rcvd=%" PRIu32 " expected=%" PRIu32,
l2_bytes_rcvd, ompi_btl_usnic_frag_seg_proto_size(seg)));
abort();
}
/* If this it not a PUT, Pass this segment up to the PML. /* If this it not a PUT, Pass this segment up to the PML.
* Be sure to get the payload length from the BTL header because * Be sure to get the payload length from the BTL header because
* the L2 layer may artificially inflate (or otherwise change) * the L2 layer may artificially inflate (or otherwise change)
@ -172,6 +181,14 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
int frag_index; int frag_index;
ompi_btl_usnic_rx_frag_info_t *fip; ompi_btl_usnic_rx_frag_info_t *fip;
if (OPAL_UNLIKELY(ompi_btl_usnic_chunk_seg_proto_size(seg) !=
l2_bytes_rcvd)) {
BTL_ERROR(("L2 packet size and segment payload len do not agree!"
" l2_bytes_rcvd=%" PRIu32 " expected=%" PRIu32,
l2_bytes_rcvd, ompi_btl_usnic_chunk_seg_proto_size(seg)));
abort();
}
/* Is incoming sequence # ok? */ /* Is incoming sequence # ok? */
if (OPAL_UNLIKELY(ompi_btl_usnic_check_rx_seq(endpoint, seg, if (OPAL_UNLIKELY(ompi_btl_usnic_check_rx_seq(endpoint, seg,
&window_index) != 0)) { &window_index) != 0)) {

Просмотреть файл

@ -13,13 +13,15 @@
#include <infiniband/verbs.h> #include <infiniband/verbs.h>
#include "btl_usnic.h" #include "btl_usnic.h"
#include "btl_usnic_util.h"
#include "btl_usnic_frag.h" #include "btl_usnic_frag.h"
#include "btl_usnic_proc.h" #include "btl_usnic_proc.h"
void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module, void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
ompi_btl_usnic_recv_segment_t *rseg, ompi_btl_usnic_recv_segment_t *rseg,
ompi_btl_usnic_channel_t *channel); ompi_btl_usnic_channel_t *channel,
uint32_t l2_bytes_rcvd);
/* /*
* Given an incoming segment, lookup the endpoint that sent it * Given an incoming segment, lookup the endpoint that sent it
@ -241,7 +243,8 @@ dup_needs_ack:
static inline void static inline void
ompi_btl_usnic_recv_fast(ompi_btl_usnic_module_t *module, ompi_btl_usnic_recv_fast(ompi_btl_usnic_module_t *module,
ompi_btl_usnic_recv_segment_t *seg, ompi_btl_usnic_recv_segment_t *seg,
ompi_btl_usnic_channel_t *channel) ompi_btl_usnic_channel_t *channel,
uint32_t l2_bytes_rcvd)
{ {
ompi_btl_usnic_segment_t *bseg; ompi_btl_usnic_segment_t *bseg;
mca_btl_active_message_callback_t* reg; mca_btl_active_message_callback_t* reg;
@ -293,7 +296,7 @@ drop:
channel->chan_deferred_recv = seg; channel->chan_deferred_recv = seg;
} else { } else {
ompi_btl_usnic_recv_call(module, seg, channel); ompi_btl_usnic_recv_call(module, seg, channel, l2_bytes_rcvd);
} }
} }
@ -350,7 +353,8 @@ repost:
static inline void static inline void
ompi_btl_usnic_recv(ompi_btl_usnic_module_t *module, ompi_btl_usnic_recv(ompi_btl_usnic_module_t *module,
ompi_btl_usnic_recv_segment_t *seg, ompi_btl_usnic_recv_segment_t *seg,
ompi_btl_usnic_channel_t *channel) ompi_btl_usnic_channel_t *channel,
uint32_t l2_bytes_rcvd)
{ {
ompi_btl_usnic_segment_t *bseg; ompi_btl_usnic_segment_t *bseg;
mca_btl_active_message_callback_t* reg; mca_btl_active_message_callback_t* reg;
@ -372,6 +376,14 @@ ompi_btl_usnic_recv(ompi_btl_usnic_module_t *module,
(void*) endpoint, bseg->us_btl_header->pkt_seq, (void*) endpoint, bseg->us_btl_header->pkt_seq,
bseg->us_btl_header->payload_len); bseg->us_btl_header->payload_len);
if (OPAL_UNLIKELY(ompi_btl_usnic_frag_seg_proto_size(seg) !=
l2_bytes_rcvd)) {
BTL_ERROR(("L2 packet size and segment payload len do not agree!"
" l2_bytes_rcvd=%" PRIu32 " expected=%" PRIu32,
l2_bytes_rcvd, ompi_btl_usnic_frag_seg_proto_size(seg)));
abort();
}
/* do the receive bookkeeping */ /* do the receive bookkeeping */
rc = ompi_btl_usnic_recv_frag_bookkeeping(module, seg, channel); rc = ompi_btl_usnic_recv_frag_bookkeeping(module, seg, channel);
if (rc != 0) { if (rc != 0) {
@ -390,7 +402,7 @@ ompi_btl_usnic_recv(ompi_btl_usnic_module_t *module,
&seg->rs_desc, reg->cbdata); &seg->rs_desc, reg->cbdata);
} else { } else {
ompi_btl_usnic_recv_call(module, seg, channel); ompi_btl_usnic_recv_call(module, seg, channel, l2_bytes_rcvd);
} }
} }