btl/vader: fix fast box support for 32-bit architectures
On 32-bit architectures loads/stores of fast box headers may take multiple instructions. This can lead to a data race between the sender/receiver when reading/writing the sequence number. This can lead to a situation where the receiver could process incomplete data. To fix the issue this commit re-orders the fast box header to put the sequence number and the tag in the same 32-bits to ensure they are always loaded/stored together. Fixes #473 Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
родитель
0c553c2693
Коммит
17b80a987e
@ -11,7 +11,7 @@
|
|||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
|
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
|
||||||
* Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -48,14 +48,16 @@ typedef struct mca_btl_base_endpoint_t {
|
|||||||
/* per peer buffers */
|
/* per peer buffers */
|
||||||
struct {
|
struct {
|
||||||
unsigned char *buffer; /**< starting address of peer's fast box out */
|
unsigned char *buffer; /**< starting address of peer's fast box out */
|
||||||
unsigned int start, seq;
|
|
||||||
uint32_t *startp;
|
uint32_t *startp;
|
||||||
|
unsigned int start;
|
||||||
|
uint16_t seq;
|
||||||
} fbox_in;
|
} fbox_in;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
unsigned char *buffer; /**< starting address of peer's fast box in */
|
unsigned char *buffer; /**< starting address of peer's fast box in */
|
||||||
unsigned int start, end, seq;
|
|
||||||
uint32_t *startp; /**< pointer to location storing start offset */
|
uint32_t *startp; /**< pointer to location storing start offset */
|
||||||
|
unsigned int start, end;
|
||||||
|
uint16_t seq;
|
||||||
} fbox_out;
|
} fbox_out;
|
||||||
|
|
||||||
int32_t peer_smp_rank; /**< my peer's SMP process rank. Used for accessing
|
int32_t peer_smp_rank; /**< my peer's SMP process rank. Used for accessing
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -18,9 +18,16 @@
|
|||||||
|
|
||||||
typedef union mca_btl_vader_fbox_hdr_t {
|
typedef union mca_btl_vader_fbox_hdr_t {
|
||||||
struct {
|
struct {
|
||||||
|
/* NTH: on 32-bit platforms loading/unloading the header may be completed
|
||||||
|
* in multiple instructions. To ensure that seq is never loaded before tag
|
||||||
|
* and the tag is never read before seq put them in the same 32-bits of the
|
||||||
|
* header. */
|
||||||
|
/** message tag */
|
||||||
uint16_t tag;
|
uint16_t tag;
|
||||||
uint16_t size;
|
/** sequence number */
|
||||||
uint32_t seq;
|
uint16_t seq;
|
||||||
|
/** message size */
|
||||||
|
uint32_t size;
|
||||||
} data;
|
} data;
|
||||||
uint64_t ival;
|
uint64_t ival;
|
||||||
} mca_btl_vader_fbox_hdr_t;
|
} mca_btl_vader_fbox_hdr_t;
|
||||||
@ -40,6 +47,13 @@ typedef union mca_btl_vader_fbox_hdr_t {
|
|||||||
|
|
||||||
void mca_btl_vader_poll_handle_frag (mca_btl_vader_hdr_t *hdr, mca_btl_base_endpoint_t *ep);
|
void mca_btl_vader_poll_handle_frag (mca_btl_vader_hdr_t *hdr, mca_btl_base_endpoint_t *ep);
|
||||||
|
|
||||||
|
static inline void mca_btl_vader_fbox_set_header (mca_btl_vader_fbox_hdr_t *hdr, uint16_t tag,
|
||||||
|
uint16_t seq, uint32_t size)
|
||||||
|
{
|
||||||
|
mca_btl_vader_fbox_hdr_t tmp = {.data = {.tag = tag, .seq = seq, .size = size}};
|
||||||
|
hdr->ival = tmp.ival;
|
||||||
|
}
|
||||||
|
|
||||||
/* attempt to reserve a contiguous segment from the remote ep */
|
/* attempt to reserve a contiguous segment from the remote ep */
|
||||||
static inline unsigned char *mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t *ep, size_t size)
|
static inline unsigned char *mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t *ep, size_t size)
|
||||||
{
|
{
|
||||||
@ -88,12 +102,10 @@ static inline unsigned char *mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t
|
|||||||
/* if this is the end of the buffer and the fragment doesn't fit then mark the remaining buffer space to
|
/* if this is the end of the buffer and the fragment doesn't fit then mark the remaining buffer space to
|
||||||
* be skipped and check if the fragment can be written at the beginning of the buffer. */
|
* be skipped and check if the fragment can be written at the beginning of the buffer. */
|
||||||
if (OPAL_UNLIKELY(buffer_free > 0 && buffer_free < size && start <= end)) {
|
if (OPAL_UNLIKELY(buffer_free > 0 && buffer_free < size && start <= end)) {
|
||||||
mca_btl_vader_fbox_hdr_t tmp = {.data = {.size = buffer_free - sizeof (mca_btl_vader_fbox_hdr_t),
|
|
||||||
.seq = ep->fbox_out.seq++, .tag = 0xff}};
|
|
||||||
|
|
||||||
BTL_VERBOSE(("message will not fit in remaining buffer space. skipping to beginning"));
|
BTL_VERBOSE(("message will not fit in remaining buffer space. skipping to beginning"));
|
||||||
|
|
||||||
MCA_BTL_VADER_FBOX_HDR(dst)->ival = tmp.ival;
|
mca_btl_vader_fbox_set_header (MCA_BTL_VADER_FBOX_HDR(dst), 0xff, ep->fbox_out.seq++,
|
||||||
|
buffer_free - sizeof (mca_btl_vader_fbox_hdr_t));
|
||||||
|
|
||||||
end = MCA_BTL_VADER_FBOX_ALIGNMENT;
|
end = MCA_BTL_VADER_FBOX_ALIGNMENT;
|
||||||
/* toggle the high bit */
|
/* toggle the high bit */
|
||||||
@ -114,11 +126,7 @@ static inline unsigned char *mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t
|
|||||||
(unsigned int) size, end, start, end, hbs, buffer_free));
|
(unsigned int) size, end, start, end, hbs, buffer_free));
|
||||||
|
|
||||||
/* write out part of the header now. the tag will be written when the data is available */
|
/* write out part of the header now. the tag will be written when the data is available */
|
||||||
{
|
mca_btl_vader_fbox_set_header (MCA_BTL_VADER_FBOX_HDR(dst), 0, ep->fbox_out.seq++, data_size);
|
||||||
mca_btl_vader_fbox_hdr_t tmp = {.data = {.size = data_size, .tag = 0, .seq = ep->fbox_out.seq++}};
|
|
||||||
|
|
||||||
MCA_BTL_VADER_FBOX_HDR(dst)->ival = tmp.ival;
|
|
||||||
}
|
|
||||||
|
|
||||||
end += size;
|
end += size;
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user