btl/vader: improve performance under heavy load and eliminate a racy
feature This commit should fix a hang seen when running some of the one-sided tests. The downside of this fix is it reduces the maximum size of the messages that use the fast boxes. I will fix this in a later commit. To improve performance under a heavy load I introduced sequencing to ensure messages are given to the pml in order. I have seen little-no impact on the message rate or latency with this change and there is a clear improvement to the heavy message rate case. Lets let this sit in the trunk for a couple of days to ensure that everything is working correctly. cmr=v1.8.2:reviewer=jsquyres This commit was SVN r31522.
Этот коммит содержится в:
родитель
e243805ed8
Коммит
0849d61e38
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
|
||||
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010-2013 Los Alamos National Security, LLC.
|
||||
* Copyright (c) 2010-2014 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -361,17 +361,17 @@ failed:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int mca_btl_vader_poll_fifo (void)
|
||||
static inline int mca_btl_vader_poll_fifo (void)
|
||||
{
|
||||
const mca_btl_active_message_callback_t *reg;
|
||||
struct mca_btl_base_endpoint_t *endpoint;
|
||||
mca_btl_vader_hdr_t *hdr;
|
||||
|
||||
/* poll the fifo until it is empty or a limit has been hit (8 is arbitrary) */
|
||||
for (int fifo_count = 0 ; fifo_count < 8 ; ++fifo_count) {
|
||||
for (int fifo_count = 0 ; fifo_count < 16 ; ++fifo_count) {
|
||||
mca_btl_vader_frag_t frag = {.base = {.des_dst = frag.segments, .des_dst_cnt = 1}};
|
||||
|
||||
hdr = vader_fifo_read (mca_btl_vader_component.my_fifo);
|
||||
hdr = vader_fifo_read (mca_btl_vader_component.my_fifo, &endpoint);
|
||||
if (NULL == hdr) {
|
||||
return fifo_count;
|
||||
}
|
||||
@ -385,8 +385,6 @@ static int mca_btl_vader_poll_fifo (void)
|
||||
frag.segments[0].seg_addr.pval = (void *) (hdr + 1);
|
||||
frag.segments[0].seg_len = hdr->len;
|
||||
|
||||
endpoint = mca_btl_vader_component.endpoints + hdr->src_smp_rank;
|
||||
|
||||
if (hdr->flags & MCA_BTL_VADER_FLAG_SINGLE_COPY) {
|
||||
mca_mpool_base_registration_t *xpmem_reg;
|
||||
|
||||
|
@ -11,6 +11,8 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
|
||||
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -65,6 +67,10 @@ struct mca_btl_base_endpoint_t {
|
||||
#if OMPI_BTL_VADER_HAVE_XPMEM
|
||||
struct mca_rcache_base_module_t *rcache;
|
||||
#endif
|
||||
|
||||
/* enforce ordering */
|
||||
uint16_t next_sequence;
|
||||
uint16_t expected_sequence;
|
||||
};
|
||||
|
||||
#endif /* MCA_BTL_VADER_ENDPOINT_H */
|
||||
|
@ -1,7 +1,7 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -25,6 +25,7 @@
|
||||
/* there should be a power of two number of fast boxes to simplify the math in the
|
||||
* critical path */
|
||||
#define MCA_BTL_VADER_LAST_FBOX 63
|
||||
#define MCA_BTL_VADER_POLL_COUNT 31
|
||||
/* two bytes are reserved for tag and size (update if the header is modified) */
|
||||
#define MCA_BTL_VADER_FBOX_HDR_SIZE 4
|
||||
#define MCA_BTL_VADER_FBOX_MAX_SIZE (MCA_BTL_VADER_FBOX_SIZE - MCA_BTL_VADER_FBOX_HDR_SIZE)
|
||||
@ -34,8 +35,9 @@
|
||||
typedef struct mca_btl_vader_fbox_t {
|
||||
union {
|
||||
struct {
|
||||
uint16_t size;
|
||||
uint16_t tag;
|
||||
uint8_t size;
|
||||
uint8_t tag;
|
||||
uint16_t seqn;
|
||||
} hdr_data;
|
||||
uint32_t ival;
|
||||
} hdr;
|
||||
@ -52,19 +54,14 @@ static inline mca_btl_vader_fbox_t * restrict mca_btl_vader_reserve_fbox (struct
|
||||
const int next_fbox = ep->next_fbox_out;
|
||||
mca_btl_vader_fbox_t * restrict fbox = MCA_BTL_VADER_FBOX_OUT_PTR(ep, next_fbox);
|
||||
|
||||
opal_atomic_mb ();
|
||||
|
||||
/* todo -- need thread locks/atomics here for the multi-threaded case */
|
||||
if (OPAL_LIKELY(size <= MCA_BTL_VADER_FBOX_MAX_SIZE && 0 == fbox->hdr.ival)) {
|
||||
/* mark this fast box as in use */
|
||||
fbox->hdr.hdr_data.size = size;
|
||||
|
||||
ep->next_fbox_out = MCA_BTL_VADER_NEXT_FBOX(next_fbox);
|
||||
return fbox;
|
||||
} else if (OPAL_LIKELY(size <= (MCA_BTL_VADER_FBOX_MAX_SIZE + MCA_BTL_VADER_FBOX_SIZE) && MCA_BTL_VADER_LAST_FBOX != next_fbox &&
|
||||
0 == fbox->hdr.ival && 0 == fbox[1].hdr.ival)) {
|
||||
/* aggregate two fast boxes */
|
||||
fbox->hdr.hdr_data.size = size;
|
||||
|
||||
ep->next_fbox_out = MCA_BTL_VADER_NEXT_FBOX(next_fbox + 1);
|
||||
opal_atomic_mb ();
|
||||
return fbox;
|
||||
}
|
||||
|
||||
@ -72,12 +69,13 @@ static inline mca_btl_vader_fbox_t * restrict mca_btl_vader_reserve_fbox (struct
|
||||
}
|
||||
|
||||
static inline void mca_btl_vader_fbox_send (mca_btl_vader_fbox_t * restrict fbox, unsigned char tag,
|
||||
size_t size)
|
||||
struct mca_btl_base_endpoint_t *endpoint)
|
||||
{
|
||||
/* ensure data writes have completed before we mark the data as available */
|
||||
opal_atomic_wmb ();
|
||||
|
||||
fbox->hdr.hdr_data.seqn = endpoint->next_sequence++;
|
||||
fbox->hdr.hdr_data.tag = tag;
|
||||
opal_atomic_wmb ();
|
||||
}
|
||||
|
||||
static inline int mca_btl_vader_fbox_sendi (struct mca_btl_base_endpoint_t *endpoint, char tag,
|
||||
@ -98,7 +96,7 @@ static inline int mca_btl_vader_fbox_sendi (struct mca_btl_base_endpoint_t *endp
|
||||
}
|
||||
|
||||
/* mark the fbox as sent */
|
||||
mca_btl_vader_fbox_send (fbox, tag, header_size + payload_size);
|
||||
mca_btl_vader_fbox_send (fbox, tag, endpoint);
|
||||
|
||||
/* send complete */
|
||||
return 1;
|
||||
@ -128,8 +126,12 @@ static inline bool mca_btl_vader_check_fboxes (void)
|
||||
processed = true;
|
||||
|
||||
/* process all fast-box messages */
|
||||
while (0 != fbox->hdr.hdr_data.tag) {
|
||||
opal_atomic_rmb ();
|
||||
for (int count = 0 ; count <= MCA_BTL_VADER_POLL_COUNT && 0 != fbox->hdr.hdr_data.tag ; ++count) {
|
||||
if (OPAL_UNLIKELY(endpoint->expected_sequence != fbox->hdr.hdr_data.seqn)) {
|
||||
break;
|
||||
}
|
||||
opal_atomic_mb ();
|
||||
++endpoint->expected_sequence;
|
||||
|
||||
reg = mca_btl_base_active_message_trigger + fbox->hdr.hdr_data.tag;
|
||||
|
||||
@ -138,8 +140,9 @@ static inline bool mca_btl_vader_check_fboxes (void)
|
||||
|
||||
reg->cbfunc(&mca_btl_vader.super, fbox->hdr.hdr_data.tag, &desc, reg->cbdata);
|
||||
|
||||
if (fbox->hdr.hdr_data.size > MCA_BTL_VADER_FBOX_MAX_SIZE) {
|
||||
if (segment.seg_len > MCA_BTL_VADER_FBOX_MAX_SIZE) {
|
||||
fbox[1].hdr.ival = 0;
|
||||
opal_atomic_mb ();
|
||||
++next_fbox;
|
||||
}
|
||||
fbox->hdr.ival = 0;
|
||||
@ -148,6 +151,8 @@ static inline bool mca_btl_vader_check_fboxes (void)
|
||||
fbox = (mca_btl_vader_fbox_t * restrict) MCA_BTL_VADER_FBOX_IN_PTR(endpoint, next_fbox);
|
||||
}
|
||||
|
||||
opal_atomic_mb ();
|
||||
|
||||
endpoint->next_fbox_in = next_fbox;
|
||||
}
|
||||
|
||||
|
@ -91,21 +91,36 @@ static inline void *relative2virtual (fifo_value_t offset)
|
||||
return (void *)(intptr_t)((offset & MCA_BTL_VADER_OFFSET_MASK) + mca_btl_vader_component.endpoints[offset >> MCA_BTL_VADER_OFFSET_BITS].segment_base);
|
||||
}
|
||||
|
||||
static inline mca_btl_vader_hdr_t *vader_fifo_read (vader_fifo_t *fifo)
|
||||
static inline mca_btl_vader_hdr_t *vader_fifo_read (vader_fifo_t *fifo, struct mca_btl_base_endpoint_t **ep)
|
||||
{
|
||||
mca_btl_vader_hdr_t *hdr;
|
||||
fifo_value_t value;
|
||||
static volatile int32_t lock = 0;
|
||||
|
||||
opal_atomic_rmb ();
|
||||
|
||||
value = vader_item_swap (&fifo->fifo_head, VADER_FIFO_FREE);
|
||||
if (VADER_FIFO_FREE == value) {
|
||||
/* fifo is empty or we lost the race with another thread */
|
||||
if (opal_atomic_swap_32 (&lock, 1)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (VADER_FIFO_FREE == fifo->fifo_head) {
|
||||
lock = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
opal_atomic_rmb ();
|
||||
|
||||
value = fifo->fifo_head;
|
||||
|
||||
*ep = &mca_btl_vader_component.endpoints[value >> MCA_BTL_VADER_OFFSET_BITS];
|
||||
hdr = (mca_btl_vader_hdr_t *) relative2virtual (value);
|
||||
|
||||
if (OPAL_UNLIKELY(!(hdr->flags & MCA_BTL_VADER_FLAG_COMPLETE) && ((*ep)->expected_sequence != hdr->seqn))) {
|
||||
lock = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
fifo->fifo_head = VADER_FIFO_FREE;
|
||||
++(*ep)->expected_sequence;
|
||||
|
||||
assert (hdr->next != value);
|
||||
|
||||
if (OPAL_UNLIKELY(VADER_FIFO_FREE == hdr->next)) {
|
||||
@ -123,7 +138,7 @@ static inline mca_btl_vader_hdr_t *vader_fifo_read (vader_fifo_t *fifo)
|
||||
}
|
||||
|
||||
opal_atomic_wmb ();
|
||||
|
||||
lock = 0;
|
||||
return hdr;
|
||||
}
|
||||
|
||||
@ -159,6 +174,7 @@ static inline void vader_fifo_write (vader_fifo_t *fifo, fifo_value_t value)
|
||||
static inline void vader_fifo_write_ep (mca_btl_vader_hdr_t *hdr, struct mca_btl_base_endpoint_t *ep)
|
||||
{
|
||||
hdr->next = VADER_FIFO_FREE;
|
||||
hdr->seqn = ep->next_sequence++;
|
||||
vader_fifo_write (ep->fifo, virtual2relative ((char *) hdr));
|
||||
}
|
||||
|
||||
|
@ -29,7 +29,6 @@ static inline void mca_btl_vader_frag_constructor (mca_btl_vader_frag_t *frag)
|
||||
{
|
||||
frag->hdr = (mca_btl_vader_hdr_t*)frag->base.super.ptr;
|
||||
if(frag->hdr != NULL) {
|
||||
frag->hdr->src_smp_rank = MCA_BTL_VADER_LOCAL_RANK;
|
||||
frag->hdr->frag = frag;
|
||||
frag->hdr->flags = 0;
|
||||
frag->segments[0].seg_addr.pval = (char *)(frag->hdr + 1);
|
||||
|
@ -40,7 +40,7 @@ struct mca_btl_vader_hdr_t {
|
||||
struct mca_btl_vader_frag_t *frag;
|
||||
mca_btl_base_tag_t tag; /* tag associated with this fragment (used to lookup callback) */
|
||||
uint8_t flags; /* vader send flags */
|
||||
uint16_t src_smp_rank; /* smp rank of owning process */
|
||||
uint16_t seqn;
|
||||
int32_t len; /* length of data following this header */
|
||||
struct iovec sc_iov; /* io vector containing pointer to single-copy data */
|
||||
};
|
||||
|
@ -194,6 +194,8 @@ static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct ompi_
|
||||
|
||||
ep->next_fbox_out = 0;
|
||||
ep->next_fbox_in = 0;
|
||||
ep->next_sequence = 0;
|
||||
ep->expected_sequence = 0;
|
||||
|
||||
ep->fbox_in = (struct mca_btl_vader_fbox_t * restrict) (ep->segment_base + MCA_BTL_VADER_FIFO_SIZE +
|
||||
fbox_in_offset * MCA_BTL_VADER_FBOX_PEER_SIZE);
|
||||
@ -526,6 +528,7 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
|
||||
frag->base.des_src_cnt = 2;
|
||||
} else {
|
||||
#endif
|
||||
|
||||
/* inline send */
|
||||
if (OPAL_LIKELY(MCA_BTL_DES_FLAGS_BTL_OWNERSHIP & flags)) {
|
||||
/* try to reserve a fast box for this transfer only if the
|
||||
|
@ -42,7 +42,7 @@ int mca_btl_vader_send (struct mca_btl_base_module_t *btl,
|
||||
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) descriptor;
|
||||
|
||||
if (OPAL_LIKELY(frag->fbox)) {
|
||||
mca_btl_vader_fbox_send (frag->fbox, tag, frag->segments[0].seg_len);
|
||||
mca_btl_vader_fbox_send (frag->fbox, tag, endpoint);
|
||||
mca_btl_vader_frag_complete (frag);
|
||||
|
||||
return 1;
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user