btl/vader: improve small message performance
This commit improved the small message latency and bandwidth when using the vader btl. These improvements should make performance competative with other MPI implementations. This commit was SVN r28760.
Этот коммит содержится в:
родитель
bea54eeeb1
Коммит
b5281778b0
@ -36,7 +36,8 @@ libmca_btl_vader_la_sources = \
|
||||
btl_vader_fbox.h \
|
||||
btl_vader_get.c \
|
||||
btl_vader_put.c \
|
||||
btl_vader_xpmem.c
|
||||
btl_vader_xpmem.c \
|
||||
btl_vader_xpmem.h
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
|
@ -47,6 +47,9 @@
|
||||
|
||||
#include "opal/class/opal_free_list.h"
|
||||
#include "opal/sys/atomic.h"
|
||||
|
||||
#include "ompi/mca/rte/rte.h"
|
||||
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
@ -92,13 +95,9 @@ struct mca_btl_vader_component_t {
|
||||
size_t segment_size; /* size of my_segment */
|
||||
size_t segment_offset; /* start of unused portion of my_segment */
|
||||
int32_t num_smp_procs; /**< current number of smp procs on this host */
|
||||
int32_t my_smp_rank; /**< My SMP process rank. Used for accessing
|
||||
* SMP specfic data structures. */
|
||||
ompi_free_list_t vader_frags_eager; /**< free list of vader send frags */
|
||||
ompi_free_list_t vader_frags_user; /**< free list of vader put/get frags */
|
||||
|
||||
opal_list_t active_sends; /**< list of outstanding fragments */
|
||||
|
||||
int memcpy_limit; /** Limit where we switch from memmove to memcpy */
|
||||
int log_attach_align; /** Log of the alignment for xpmem segments */
|
||||
int max_inline_send; /** Limit for copy-in-copy-out fragments */
|
||||
@ -128,10 +127,23 @@ OMPI_MODULE_DECLSPEC extern mca_btl_vader_t mca_btl_vader;
|
||||
* virtual addresses.
|
||||
*/
|
||||
|
||||
|
||||
/* number of peers on the node (not including self) */
|
||||
#define MCA_BTL_VADER_NUM_LOCAL_PEERS ompi_process_info.num_local_peers
|
||||
|
||||
/* local rank in the group */
|
||||
#define MCA_BTL_VADER_LOCAL_RANK ompi_process_info.my_local_rank
|
||||
|
||||
|
||||
/* This only works for finding the relative address for a pointer within my_segment */
|
||||
static inline int64_t virtual2relative (char *addr)
|
||||
{
|
||||
return (int64_t)(uintptr_t) (addr - mca_btl_vader_component.my_segment) | ((int64_t)mca_btl_vader_component.my_smp_rank << 32);
|
||||
return (int64_t)(uintptr_t) (addr - mca_btl_vader_component.my_segment) | ((int64_t)MCA_BTL_VADER_LOCAL_RANK << 32);
|
||||
}
|
||||
|
||||
static inline int64_t virtual2relativepeer (struct mca_btl_base_endpoint_t *endpoint, char *addr)
|
||||
{
|
||||
return (int64_t)(uintptr_t) (addr - endpoint->segment_base) | ((int64_t)endpoint->peer_smp_rank << 32);
|
||||
}
|
||||
|
||||
static inline void *relative2virtual (int64_t offset)
|
||||
@ -150,15 +162,6 @@ static inline void vader_memmove (void *dst, void *src, size_t size)
|
||||
}
|
||||
}
|
||||
|
||||
/* look up the remote pointer in the peer rcache and attach if
|
||||
* necessary */
|
||||
mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *endpoint, void *rem_ptr,
|
||||
size_t size, int flags);
|
||||
|
||||
void vader_return_registration (mca_mpool_base_registration_t *reg, struct mca_btl_base_endpoint_t *endpoint);
|
||||
|
||||
void *vader_reg_to_ptr (mca_mpool_base_registration_t *reg, void *rem_ptr);
|
||||
|
||||
/**
|
||||
* Initiate a send to the peer.
|
||||
*
|
||||
@ -216,7 +219,6 @@ mca_btl_base_descriptor_t* mca_btl_vader_alloc (struct mca_btl_base_module_t* bt
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
uint8_t order, size_t size, uint32_t flags);
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
@ -167,7 +167,6 @@ static int mca_btl_vader_component_open(void)
|
||||
/* initialize objects */
|
||||
OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_eager, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_user, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&mca_btl_vader_component.active_sends, opal_list_t);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
@ -181,7 +180,6 @@ static int mca_btl_vader_component_close(void)
|
||||
{
|
||||
OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_eager);
|
||||
OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_user);
|
||||
OBJ_DESTRUCT(&mca_btl_vader_component.active_sends);
|
||||
|
||||
if (NULL != mca_btl_vader_component.my_segment) {
|
||||
munmap (mca_btl_vader_component.my_segment, mca_btl_vader_component.segment_size);
|
||||
@ -214,6 +212,11 @@ static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls,
|
||||
|
||||
*num_btls = 0;
|
||||
|
||||
/* disable if there are no local peers */
|
||||
if (0 == MCA_BTL_VADER_NUM_LOCAL_PEERS) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* limit segment alignment to be between 4k and 16M */
|
||||
|
||||
if (mca_btl_vader_component.segment_size < 12) {
|
||||
@ -241,7 +244,7 @@ static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls,
|
||||
}
|
||||
|
||||
component->my_segment = mmap (NULL, mca_btl_vader_component.segment_size, PROT_READ |
|
||||
PROT_WRITE, MAP_ANON | MAP_SHARED, -1, 0);
|
||||
PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
|
||||
if ((void *)-1 == component->my_segment) {
|
||||
free (btls);
|
||||
return NULL;
|
||||
@ -249,6 +252,9 @@ static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls,
|
||||
|
||||
component->segment_offset = 0;
|
||||
|
||||
memset (component->my_segment + 4096, MCA_BTL_VADER_FBOX_FREE, MCA_BTL_VADER_NUM_LOCAL_PEERS *
|
||||
MCA_BTL_VADER_FBOX_PEER_SIZE);
|
||||
|
||||
/* initialize my fifo */
|
||||
rc = vader_fifo_init ((struct vader_fifo_t *) component->my_segment);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
@ -269,76 +275,63 @@ static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls,
|
||||
/* get pointer to the btls */
|
||||
btls[0] = (mca_btl_base_module_t *) &mca_btl_vader;
|
||||
|
||||
/* initialize some BTL data */
|
||||
/* start with no VADER procs */
|
||||
component->num_smp_procs = 0;
|
||||
component->my_smp_rank = -1; /* not defined */
|
||||
|
||||
/* set flag indicating btl not inited */
|
||||
mca_btl_vader.btl_inited = false;
|
||||
|
||||
return btls;
|
||||
}
|
||||
|
||||
static inline void mca_btl_vader_progress_sends (void)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag, *next;
|
||||
|
||||
OPAL_LIST_FOREACH_SAFE(frag, next, &mca_btl_vader_component.active_sends, mca_btl_vader_frag_t) {
|
||||
if (OPAL_LIKELY(frag->hdr->complete)) {
|
||||
opal_list_remove_item (&mca_btl_vader_component.active_sends, (opal_list_item_t *) frag);
|
||||
|
||||
mca_btl_vader_frag_complete (frag);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static int mca_btl_vader_component_progress (void)
|
||||
{
|
||||
int my_smp_rank = mca_btl_vader_component.my_smp_rank;
|
||||
mca_btl_active_message_callback_t *reg;
|
||||
mca_btl_vader_frag_t frag;
|
||||
mca_btl_vader_hdr_t *hdr;
|
||||
mca_btl_vader_frag_t frag = {.base = {.des_dst = frag.segments, .des_dst_cnt = 1}};
|
||||
const int my_smp_rank = MCA_BTL_VADER_LOCAL_RANK;
|
||||
mca_mpool_base_registration_t *xpmem_reg = NULL;
|
||||
const mca_btl_active_message_callback_t *reg;
|
||||
struct mca_btl_base_endpoint_t *endpoint;
|
||||
|
||||
/* check active sends for completion */
|
||||
mca_btl_vader_progress_sends ();
|
||||
mca_btl_vader_hdr_t *hdr;
|
||||
int fifo_count;
|
||||
|
||||
/* check for messages in fast boxes */
|
||||
mca_btl_vader_check_fboxes ();
|
||||
|
||||
/* poll the fifo once */
|
||||
hdr = vader_fifo_read (mca_btl_vader_component.endpoints[my_smp_rank].fifo);
|
||||
if (NULL == hdr) {
|
||||
return 0;
|
||||
/* poll the fifo until it is empty or a limit has been hit (8 is arbitrary) */
|
||||
for (fifo_count = 0 ; fifo_count < 8 ; ++fifo_count) {
|
||||
hdr = vader_fifo_read (mca_btl_vader_component.endpoints[my_smp_rank].fifo);
|
||||
if (NULL == hdr) {
|
||||
return fifo_count;
|
||||
}
|
||||
|
||||
if (hdr->flags & MCA_BTL_VADER_FLAG_COMPLETE) {
|
||||
mca_btl_vader_frag_complete (hdr->frag);
|
||||
continue;
|
||||
}
|
||||
|
||||
reg = mca_btl_base_active_message_trigger + hdr->tag;
|
||||
frag.segments[0].seg_addr.pval = (void *) (hdr + 1);
|
||||
frag.segments[0].seg_len = hdr->len;
|
||||
|
||||
endpoint = mca_btl_vader_component.endpoints + hdr->src_smp_rank;
|
||||
|
||||
if (hdr->flags & MCA_BTL_VADER_FLAG_SINGLE_COPY) {
|
||||
xpmem_reg = vader_get_registation (endpoint, hdr->sc_iov.iov_base,
|
||||
hdr->sc_iov.iov_len, 0,
|
||||
&frag.segments[1].seg_addr.pval);
|
||||
|
||||
frag.segments[1].seg_len = hdr->sc_iov.iov_len;
|
||||
|
||||
/* recv upcall */
|
||||
frag.base.des_dst_cnt = 2;
|
||||
reg->cbfunc(&mca_btl_vader.super, hdr->tag, &(frag.base), reg->cbdata);
|
||||
frag.base.des_dst_cnt = 1;
|
||||
vader_return_registration (xpmem_reg, endpoint);
|
||||
} else {
|
||||
reg->cbfunc(&mca_btl_vader.super, hdr->tag, &(frag.base), reg->cbdata);
|
||||
}
|
||||
|
||||
/* return the fragment */
|
||||
hdr->flags = MCA_BTL_VADER_FLAG_COMPLETE;
|
||||
vader_fifo_write_back (hdr, endpoint);
|
||||
}
|
||||
|
||||
reg = mca_btl_base_active_message_trigger + hdr->tag;
|
||||
frag.base.des_dst = frag.segments;
|
||||
frag.segments[0].seg_addr.pval = (void *) (hdr + 1);
|
||||
frag.segments[0].seg_len = hdr->len;
|
||||
|
||||
if (OPAL_UNLIKELY(hdr->flags & MCA_BTL_VADER_FLAG_SINGLE_COPY)) {
|
||||
endpoint = mca_btl_vader_component.endpoints + hdr->my_smp_rank;
|
||||
xpmem_reg = vader_get_registation (endpoint, hdr->sc_iov.iov_base,
|
||||
hdr->sc_iov.iov_len, 0);
|
||||
|
||||
frag.segments[1].seg_addr.pval = vader_reg_to_ptr (xpmem_reg, hdr->sc_iov.iov_base);
|
||||
frag.segments[1].seg_len = hdr->sc_iov.iov_len;
|
||||
|
||||
/* recv upcall */
|
||||
frag.base.des_dst_cnt = 2;
|
||||
reg->cbfunc(&mca_btl_vader.super, hdr->tag, &(frag.base), reg->cbdata);
|
||||
vader_return_registration (xpmem_reg, endpoint);
|
||||
} else {
|
||||
frag.base.des_dst_cnt = 1;
|
||||
reg->cbfunc(&mca_btl_vader.super, hdr->tag, &(frag.base), reg->cbdata);
|
||||
}
|
||||
|
||||
/* return the fragment */
|
||||
hdr->complete = true;
|
||||
|
||||
return 1;
|
||||
return fifo_count;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -14,38 +14,51 @@
|
||||
|
||||
#include "btl_vader.h"
|
||||
#include "btl_vader_endpoint.h"
|
||||
#include "btl_vader_xpmem.h"
|
||||
|
||||
/* XXX -- FIXME -- make no assumptions if possible */
|
||||
/* Assumptions: page size: 4096, cache line: 64 or 128 bytes, tag = 1 byte */
|
||||
#define FBOX_SIZE 128 /* 2-4 cache lines */
|
||||
#define LAST_FBOX 31 /* page size assumtion: 4096 */
|
||||
#define MAX_MSG 126 /* 1 byte used each for size and tag */
|
||||
/* these hard-coded settings are based on the ideal setup for an Opteron 61xx chip and
|
||||
* may need to be adjusted for other systems. adding an MCA variable is possible but
|
||||
* can cost 20-40 ns on the fast path. this size is limited to 256 maximum bytes */
|
||||
#define MCA_BTL_VADER_FBOX_SIZE 64
|
||||
/* there should be a power of two number of fast boxes to simplify the math in the
|
||||
* critical path */
|
||||
#define MCA_BTL_VADER_LAST_FBOX 63
|
||||
/* two bytes are reserved for tag and size */
|
||||
#define MCA_BTL_VADER_FBOX_MAX_SIZE (MCA_BTL_VADER_FBOX_SIZE - 2)
|
||||
/* total size of all the fast boxes assigned to a particular peer */
|
||||
#define MCA_BTL_VADER_FBOX_PEER_SIZE (MCA_BTL_VADER_FBOX_SIZE * (MCA_BTL_VADER_LAST_FBOX + 1))
|
||||
|
||||
enum {MCA_BTL_VADER_FBOX_FREE = 0xfe, MCA_BTL_VADER_FBOX_RESERVED = 0xff};
|
||||
enum {MCA_BTL_VADER_FBOX_FREE = 0, MCA_BTL_VADER_FBOX_RESERVED = 0x80};
|
||||
|
||||
#define MCA_BTL_VADER_FBOX_OUT_PTR(ep, fbox) ((ep)->fbox_out + FBOX_SIZE * (fbox))
|
||||
#define MCA_BTL_VADER_FBOX_IN_PTR(ep, fbox) ((ep)->fbox_in + FBOX_SIZE * (fbox))
|
||||
#define MCA_BTL_VADER_FBOX_OUT_PTR(ep, fbox) ((ep)->fbox_out + MCA_BTL_VADER_FBOX_SIZE * (fbox))
|
||||
#define MCA_BTL_VADER_FBOX_IN_PTR(ep, fbox) ((ep)->fbox_in + MCA_BTL_VADER_FBOX_SIZE * (fbox))
|
||||
#define MCA_BTL_VADER_NEXT_FBOX(fbox) (((fbox) + 1) & MCA_BTL_VADER_LAST_FBOX)
|
||||
|
||||
static inline unsigned char *mca_btl_vader_reserve_fbox (struct mca_btl_base_endpoint_t *ep, size_t size)
|
||||
static inline unsigned char * restrict mca_btl_vader_reserve_fbox (struct mca_btl_base_endpoint_t *ep, const size_t size)
|
||||
{
|
||||
int next_fbox = ep->next_fbox_out;
|
||||
unsigned char *fbox = (unsigned char *) MCA_BTL_VADER_FBOX_OUT_PTR(ep, next_fbox);
|
||||
const int next_fbox = ep->next_fbox_out;
|
||||
unsigned char * restrict fbox = (unsigned char * restrict) MCA_BTL_VADER_FBOX_OUT_PTR(ep, next_fbox);
|
||||
|
||||
/* todo -- need thread locks/atomics here for the multi-threaded case */
|
||||
if (OPAL_UNLIKELY(size > MAX_MSG || fbox[0] != MCA_BTL_VADER_FBOX_FREE)) {
|
||||
/* fall back on fifo */
|
||||
return NULL;
|
||||
if (OPAL_LIKELY(size <= MCA_BTL_VADER_FBOX_MAX_SIZE && fbox[0] == MCA_BTL_VADER_FBOX_FREE)) {
|
||||
/* mark this fast box as in use */
|
||||
fbox[0] = MCA_BTL_VADER_FBOX_RESERVED;
|
||||
|
||||
ep->next_fbox_out = MCA_BTL_VADER_NEXT_FBOX(next_fbox);
|
||||
return fbox + 2;
|
||||
} else if (OPAL_LIKELY(size <= (MCA_BTL_VADER_FBOX_MAX_SIZE + MCA_BTL_VADER_FBOX_SIZE) && MCA_BTL_VADER_LAST_FBOX != next_fbox &&
|
||||
MCA_BTL_VADER_FBOX_FREE == fbox[0] && MCA_BTL_VADER_FBOX_FREE == fbox[MCA_BTL_VADER_FBOX_SIZE])) {
|
||||
/* aggregate two fast boxes */
|
||||
fbox[0] = MCA_BTL_VADER_FBOX_RESERVED;
|
||||
ep->next_fbox_out = MCA_BTL_VADER_NEXT_FBOX(next_fbox + 1);
|
||||
return fbox + 2;
|
||||
}
|
||||
|
||||
ep->next_fbox_out = (next_fbox + 1) & LAST_FBOX;
|
||||
|
||||
/* mark this fast box as in use */
|
||||
fbox[0] = MCA_BTL_VADER_FBOX_RESERVED;
|
||||
|
||||
return fbox + 2;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void mca_btl_vader_fbox_send (unsigned char *fbox, unsigned char tag, size_t size)
|
||||
static inline void mca_btl_vader_fbox_send (unsigned char * restrict fbox, unsigned char tag,
|
||||
size_t size)
|
||||
{
|
||||
fbox[-1] = tag;
|
||||
|
||||
@ -56,20 +69,20 @@ static inline void mca_btl_vader_fbox_send (unsigned char *fbox, unsigned char t
|
||||
}
|
||||
|
||||
static inline int mca_btl_vader_fbox_sendi (struct mca_btl_base_endpoint_t *endpoint, char tag,
|
||||
void *header, size_t header_size,
|
||||
void *payload, size_t payload_size)
|
||||
void * restrict header, const size_t header_size,
|
||||
void * restrict payload, const size_t payload_size)
|
||||
{
|
||||
unsigned char *fbox;
|
||||
unsigned char * restrict fbox;
|
||||
|
||||
fbox = mca_btl_vader_reserve_fbox(endpoint, header_size + payload_size);
|
||||
if (OPAL_UNLIKELY(NULL == fbox)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
memcpy (fbox, header, header_size);
|
||||
memmove (fbox, header, header_size);
|
||||
if (OPAL_UNLIKELY(payload)) {
|
||||
/* inline sends are typically just pml headers (due to MCA_BTL_FLAGS_SEND_INPLACE) */
|
||||
memcpy (fbox + header_size, payload, payload_size);
|
||||
memmove (fbox + header_size, payload, payload_size);
|
||||
}
|
||||
|
||||
/* mark the fbox as sent */
|
||||
@ -81,40 +94,40 @@ static inline int mca_btl_vader_fbox_sendi (struct mca_btl_base_endpoint_t *endp
|
||||
|
||||
static inline void mca_btl_vader_check_fboxes (void)
|
||||
{
|
||||
int my_smp_rank = mca_btl_vader_component.my_smp_rank;
|
||||
mca_btl_active_message_callback_t *reg;
|
||||
mca_btl_vader_frag_t frag = {.base = {.des_dst = frag.segments, .des_dst_cnt = 1}};
|
||||
const int num_smp_procs = MCA_BTL_VADER_NUM_LOCAL_PEERS + 1;
|
||||
const mca_btl_active_message_callback_t *reg;
|
||||
struct mca_btl_base_endpoint_t *endpoint;
|
||||
unsigned char size, tag, *fbox;
|
||||
mca_btl_vader_frag_t frag;
|
||||
unsigned char * restrict fbox;
|
||||
int i, next_fbox;
|
||||
|
||||
for (i = 0 ; i < mca_btl_vader_component.num_smp_procs ; ++i) {
|
||||
if (my_smp_rank == i) {
|
||||
for (i = 0, endpoint = mca_btl_vader_component.endpoints ; i < num_smp_procs ; ++i, ++endpoint) {
|
||||
if (NULL == endpoint->fbox_in) {
|
||||
continue;
|
||||
}
|
||||
|
||||
endpoint = mca_btl_vader_component.endpoints + i;
|
||||
next_fbox = endpoint->next_fbox_in;
|
||||
fbox = (unsigned char *) MCA_BTL_VADER_FBOX_IN_PTR(endpoint, next_fbox);
|
||||
|
||||
/* process all fast-box messages */
|
||||
while (0xfe != ((size = fbox[0]) & 0xfe)) {
|
||||
opal_atomic_rmb ();
|
||||
while ((frag.segments[0].seg_len = fbox[0]) & 0x7f) {
|
||||
const unsigned char tag = fbox[1];
|
||||
|
||||
tag = fbox[1];
|
||||
opal_atomic_rmb ();
|
||||
|
||||
reg = mca_btl_base_active_message_trigger + tag;
|
||||
|
||||
frag.segments[0].seg_addr.pval = fbox + 2;
|
||||
frag.segments[0].seg_len = size;
|
||||
|
||||
frag.base.des_dst = frag.segments;
|
||||
frag.base.des_dst_cnt = 1;
|
||||
reg->cbfunc(&mca_btl_vader.super, tag, &(frag.base), reg->cbdata);
|
||||
|
||||
if (fbox[0] > MCA_BTL_VADER_FBOX_MAX_SIZE) {
|
||||
fbox[MCA_BTL_VADER_FBOX_SIZE] = MCA_BTL_VADER_FBOX_FREE;
|
||||
++next_fbox;
|
||||
}
|
||||
fbox[0] = MCA_BTL_VADER_FBOX_FREE;
|
||||
|
||||
next_fbox = next_fbox == LAST_FBOX ? 0 : next_fbox + 1;
|
||||
next_fbox = MCA_BTL_VADER_NEXT_FBOX(next_fbox);
|
||||
fbox = (unsigned char *) MCA_BTL_VADER_FBOX_IN_PTR(endpoint, next_fbox);
|
||||
}
|
||||
|
||||
@ -123,3 +136,11 @@ static inline void mca_btl_vader_check_fboxes (void)
|
||||
}
|
||||
|
||||
#endif /* !defined(MCA_BTL_VADER_FBOX_H) */
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2007 Voltaire. All rights reserved.
|
||||
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010-2012 Los Alamos National Security, LLC.
|
||||
* Copyright (c) 2010-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -61,12 +61,9 @@ static inline int vader_fifo_init (vader_fifo_t *fifo)
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static inline void vader_fifo_write (mca_btl_vader_hdr_t *hdr, struct mca_btl_base_endpoint_t *ep)
|
||||
static inline void _vader_fifo_write (vader_fifo_t *fifo, int64_t value)
|
||||
{
|
||||
vader_fifo_t *fifo = ep->fifo;
|
||||
int64_t prev, value = virtual2relative ((char *) hdr);
|
||||
|
||||
hdr->next = VADER_FIFO_FREE;
|
||||
int64_t prev;
|
||||
|
||||
opal_atomic_wmb ();
|
||||
prev = opal_atomic_swap_64 (&fifo->fifo_tail, value);
|
||||
@ -75,7 +72,7 @@ static inline void vader_fifo_write (mca_btl_vader_hdr_t *hdr, struct mca_btl_ba
|
||||
assert (prev != value);
|
||||
|
||||
if (OPAL_LIKELY(VADER_FIFO_FREE != prev)) {
|
||||
hdr = (mca_btl_vader_hdr_t *) relative2virtual (prev);
|
||||
mca_btl_vader_hdr_t *hdr = (mca_btl_vader_hdr_t *) relative2virtual (prev);
|
||||
hdr->next = value;
|
||||
} else {
|
||||
fifo->fifo_head = value;
|
||||
@ -84,6 +81,20 @@ static inline void vader_fifo_write (mca_btl_vader_hdr_t *hdr, struct mca_btl_ba
|
||||
opal_atomic_wmb ();
|
||||
}
|
||||
|
||||
/* write a frag (relative to this process' base) to another rank's fifo */
|
||||
static inline void vader_fifo_write (mca_btl_vader_hdr_t *hdr, struct mca_btl_base_endpoint_t *ep)
|
||||
{
|
||||
hdr->next = VADER_FIFO_FREE;
|
||||
_vader_fifo_write (ep->fifo, virtual2relative ((char *) hdr));
|
||||
}
|
||||
|
||||
/* write a frag (relative to the remote process' base) to the remote fifo. note the remote peer must own hdr */
|
||||
static inline void vader_fifo_write_back (mca_btl_vader_hdr_t *hdr, struct mca_btl_base_endpoint_t *ep)
|
||||
{
|
||||
hdr->next = VADER_FIFO_FREE;
|
||||
_vader_fifo_write(ep->fifo, virtual2relativepeer (ep, (char *) hdr));
|
||||
}
|
||||
|
||||
static inline mca_btl_vader_hdr_t *vader_fifo_read (vader_fifo_t *fifo)
|
||||
{
|
||||
mca_btl_vader_hdr_t *hdr;
|
||||
|
@ -29,7 +29,9 @@ static inline void mca_btl_vader_frag_constructor (mca_btl_vader_frag_t *frag)
|
||||
{
|
||||
frag->hdr = (mca_btl_vader_hdr_t*)frag->base.super.ptr;
|
||||
if(frag->hdr != NULL) {
|
||||
frag->hdr->my_smp_rank = mca_btl_vader_component.my_smp_rank;
|
||||
frag->hdr->src_smp_rank = MCA_BTL_VADER_LOCAL_RANK;
|
||||
frag->hdr->frag = frag;
|
||||
frag->segments[0].seg_addr.pval = (char *)(frag->hdr + 1);
|
||||
}
|
||||
|
||||
frag->base.des_src = frag->segments;
|
||||
@ -59,8 +61,10 @@ void mca_btl_vader_frag_return (mca_btl_vader_frag_t *frag)
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = frag->segments;
|
||||
frag->base.des_dst_cnt = 1;
|
||||
frag->hdr->flags = 0;
|
||||
frag->segments[0].seg_addr.pval = (char *)(frag->hdr + 1);
|
||||
|
||||
OMPI_FREE_LIST_RETURN_MT(frag->my_list, (ompi_free_list_item_t *)frag);
|
||||
OMPI_FREE_LIST_RETURN(frag->my_list, (ompi_free_list_item_t *)frag);
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_btl_vader_frag_t, mca_btl_base_descriptor_t,
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2013 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -29,13 +29,16 @@
|
||||
#define MCA_BTL_VADER_FLAG_INLINE 0
|
||||
#define MCA_BTL_VADER_FLAG_SINGLE_COPY 1
|
||||
#define MCA_BTL_VADER_FLAG_FBOX 2
|
||||
#define MCA_BTL_VADER_FLAG_COMPLETE 4
|
||||
|
||||
struct mca_btl_vader_frag_t;
|
||||
|
||||
struct mca_btl_vader_hdr_t {
|
||||
volatile intptr_t next; /* next item in fifo. many peers may touch this */
|
||||
volatile bool complete; /* fragment completion (usually 1 byte) */
|
||||
struct mca_btl_vader_frag_t *frag;
|
||||
mca_btl_base_tag_t tag; /* tag associated with this fragment (used to lookup callback) */
|
||||
int flags; /* vader send flags */
|
||||
int my_smp_rank; /* smp rank of owning process */
|
||||
uint8_t flags; /* vader send flags */
|
||||
int src_smp_rank; /* smp rank of owning process */
|
||||
size_t len; /* length of data following this header */
|
||||
struct iovec sc_iov; /* io vector containing pointer to single-copy data */
|
||||
};
|
||||
@ -58,23 +61,22 @@ OBJ_CLASS_DECLARATION(mca_btl_vader_frag_t);
|
||||
|
||||
static inline int mca_btl_vader_frag_alloc (mca_btl_vader_frag_t **frag, ompi_free_list_t *list) {
|
||||
ompi_free_list_item_t *item;
|
||||
int rc;
|
||||
|
||||
OMPI_FREE_LIST_GET_MT(list, item);
|
||||
OMPI_FREE_LIST_GET(list, item, rc);
|
||||
*frag = (mca_btl_vader_frag_t *) item;
|
||||
if (OPAL_LIKELY(NULL != item)) {
|
||||
if (NULL == (*frag)->hdr) {
|
||||
OMPI_FREE_LIST_RETURN_MT(list, (ompi_free_list_item_t *)*frag);
|
||||
OMPI_FREE_LIST_RETURN(list, (ompi_free_list_item_t *)*frag);
|
||||
*frag = NULL;
|
||||
return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
(*frag)->hdr->complete = false;
|
||||
(*frag)->hdr->flags = MCA_BTL_VADER_FLAG_INLINE;
|
||||
(*frag)->segments[0].seg_addr.pval = (char *)((*frag)->hdr + 1);
|
||||
(*frag)->my_list = list;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
return rc;
|
||||
}
|
||||
|
||||
void mca_btl_vader_frag_return (mca_btl_vader_frag_t *frag);
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "btl_vader.h"
|
||||
#include "btl_vader_frag.h"
|
||||
#include "btl_vader_endpoint.h"
|
||||
#include "btl_vader_xpmem.h"
|
||||
|
||||
/**
|
||||
* Initiate an synchronous get.
|
||||
@ -29,19 +30,16 @@ int mca_btl_vader_get (struct mca_btl_base_module_t *btl,
|
||||
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) des;
|
||||
mca_btl_base_segment_t *src = des->des_src;
|
||||
mca_btl_base_segment_t *dst = des->des_dst;
|
||||
size_t size = min(dst->seg_len, src->seg_len);
|
||||
const size_t size = min(dst->seg_len, src->seg_len);
|
||||
mca_mpool_base_registration_t *reg;
|
||||
void *rem_ptr;
|
||||
|
||||
reg = vader_get_registation (endpoint, (void *)(uintptr_t) src->seg_addr.lval,
|
||||
src->seg_len, 0);
|
||||
if (OPAL_UNLIKELY(NULL == reg)) {
|
||||
reg = vader_get_registation (endpoint, src->seg_addr.pval, src->seg_len, 0, &rem_ptr);
|
||||
if (OPAL_UNLIKELY(NULL == rem_ptr)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
rem_ptr = vader_reg_to_ptr (reg, (void *)(uintptr_t) src->seg_addr.lval);
|
||||
|
||||
vader_memmove ((void *)(uintptr_t) dst->seg_addr.lval, rem_ptr, size);
|
||||
vader_memmove (dst->seg_addr.pval, rem_ptr, size);
|
||||
|
||||
vader_return_registration (reg, endpoint);
|
||||
|
||||
|
@ -108,25 +108,10 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n)
|
||||
/* generate the endpoints */
|
||||
component->endpoints = (struct mca_btl_base_endpoint_t *) calloc (n, sizeof (struct mca_btl_base_endpoint_t));
|
||||
|
||||
component->segment_offset = (n + 1) * 4096;
|
||||
component->segment_offset = (n - 1) * MCA_BTL_VADER_FBOX_PEER_SIZE + 4096;
|
||||
|
||||
/* initialize fragment descriptor free lists */
|
||||
/* initialize free list for send fragments */
|
||||
rc = ompi_free_list_init_ex_new(&component->vader_frags_eager,
|
||||
sizeof (mca_btl_vader_frag_t),
|
||||
opal_cache_line_size, OBJ_CLASS(mca_btl_vader_frag_t),
|
||||
0, opal_cache_line_size,
|
||||
component->vader_free_list_num,
|
||||
component->vader_free_list_max,
|
||||
component->vader_free_list_inc,
|
||||
NULL, mca_btl_vader_frag_init,
|
||||
(void *) (sizeof (mca_btl_vader_hdr_t) +
|
||||
mca_btl_vader.super.btl_eager_limit));
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* initialize free list for put/get fragments */
|
||||
/* initialize free list for put/get/single copy/inline fragments */
|
||||
rc = ompi_free_list_init_ex_new(&component->vader_frags_user,
|
||||
sizeof(mca_btl_vader_frag_t),
|
||||
opal_cache_line_size, OBJ_CLASS(mca_btl_vader_frag_t),
|
||||
@ -141,6 +126,21 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n)
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* initialize free list for buffered send fragments */
|
||||
rc = ompi_free_list_init_ex_new(&component->vader_frags_eager,
|
||||
sizeof (mca_btl_vader_frag_t),
|
||||
opal_cache_line_size, OBJ_CLASS(mca_btl_vader_frag_t),
|
||||
0, opal_cache_line_size,
|
||||
component->vader_free_list_num,
|
||||
component->vader_free_list_max,
|
||||
component->vader_free_list_inc,
|
||||
NULL, mca_btl_vader_frag_init,
|
||||
(void *) (sizeof (mca_btl_vader_hdr_t) +
|
||||
mca_btl_vader.super.btl_eager_limit));
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* set flag indicating btl has been inited */
|
||||
vader_btl->btl_inited = true;
|
||||
|
||||
@ -148,36 +148,40 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n)
|
||||
}
|
||||
|
||||
|
||||
static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct ompi_proc_t *proc, int local_rank) {
|
||||
static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct ompi_proc_t *proc, int remote_rank) {
|
||||
const int fbox_in_offset = MCA_BTL_VADER_LOCAL_RANK - (MCA_BTL_VADER_LOCAL_RANK > remote_rank);
|
||||
const int fbox_out_offset = remote_rank - (MCA_BTL_VADER_LOCAL_RANK < remote_rank);
|
||||
mca_btl_vader_component_t *component = &mca_btl_vader_component;
|
||||
struct vader_modex_t *modex;
|
||||
size_t msg_size;
|
||||
int rc;
|
||||
|
||||
ep->peer_smp_rank = local_rank;
|
||||
ep->peer_smp_rank = remote_rank;
|
||||
|
||||
if (OMPI_SUCCESS != (rc = ompi_modex_recv(&component->super.btl_version,
|
||||
proc, (void *)&modex, &msg_size))) {
|
||||
return rc;
|
||||
if (remote_rank != MCA_BTL_VADER_LOCAL_RANK) {
|
||||
if (OMPI_SUCCESS != (rc = ompi_modex_recv(&component->super.btl_version,
|
||||
proc, (void *)&modex, &msg_size))) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
ep->apid = xpmem_get (modex->seg_id, XPMEM_RDWR, XPMEM_PERMIT_MODE, (void *) 0666);
|
||||
ep->rcache = mca_rcache_base_module_create("vma");
|
||||
ep->next_fbox_out = 0;
|
||||
ep->next_fbox_in = 0;
|
||||
|
||||
/* attatch to the remote segment */
|
||||
(void) vader_get_registation (ep, modex->segment_base, mca_btl_vader_component.segment_size,
|
||||
MCA_MPOOL_FLAGS_PERSIST, (void **) &ep->segment_base);
|
||||
|
||||
ep->fifo = (struct vader_fifo_t *) ep->segment_base;
|
||||
ep->fbox_in = ep->segment_base + 4096 + fbox_in_offset * MCA_BTL_VADER_FBOX_PEER_SIZE;
|
||||
ep->fbox_out = component->my_segment + 4096 + fbox_out_offset * MCA_BTL_VADER_FBOX_PEER_SIZE;
|
||||
} else {
|
||||
/* set up the segment base so we can calculate a virtual to real for local pointers */
|
||||
ep->segment_base = component->my_segment;
|
||||
ep->fifo = (struct vader_fifo_t *) ep->segment_base;
|
||||
}
|
||||
|
||||
ep->apid = xpmem_get (modex->seg_id, XPMEM_RDWR, XPMEM_PERMIT_MODE, (void *) 0666);
|
||||
ep->rcache = mca_rcache_base_module_create("vma");
|
||||
ep->next_fbox_out = 0;
|
||||
ep->next_fbox_in = 0;
|
||||
|
||||
/* Attatch to the remote process' segment */
|
||||
ep->segment_base =
|
||||
vader_reg_to_ptr (vader_get_registation (ep, modex->segment_base, mca_btl_vader_component.segment_size,
|
||||
MCA_MPOOL_FLAGS_PERSIST),
|
||||
modex->segment_base);
|
||||
|
||||
ep->fifo = (struct vader_fifo_t *) ep->segment_base;
|
||||
ep->fbox_out = ep->segment_base + (1 + component->my_smp_rank) * 4096;
|
||||
ep->fbox_in = component->my_segment + (1 + local_rank) * 4096;
|
||||
|
||||
memset (ep->fbox_in, MCA_BTL_VADER_FBOX_FREE, 4096);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -202,10 +206,9 @@ static int vader_add_procs (struct mca_btl_base_module_t* btl,
|
||||
{
|
||||
mca_btl_vader_component_t *component = &mca_btl_vader_component;
|
||||
mca_btl_vader_t *vader_btl = (mca_btl_vader_t *) btl;
|
||||
int32_t n_local_procs = 0, proc, local_rank, my_smp_rank = -1;
|
||||
bool have_connected_peer = false;
|
||||
int32_t proc, local_rank;
|
||||
ompi_proc_t *my_proc;
|
||||
int rc = OMPI_SUCCESS;
|
||||
int rc;
|
||||
|
||||
/* initializion */
|
||||
|
||||
@ -214,52 +217,24 @@ static int vader_add_procs (struct mca_btl_base_module_t* btl,
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* Get unique host identifier for each process in the list,
|
||||
* and idetify procs that are on this host. Add procs on this
|
||||
* host to shared memory reachbility list. Also, get number
|
||||
* of local procs in the procs list. */
|
||||
for (proc = 0; proc < (int32_t) nprocs; ++proc) {
|
||||
/* check to see if this proc can be reached via shmem (i.e.,
|
||||
if they're on my local host and in my job) */
|
||||
if (procs[proc]->proc_name.jobid != my_proc->proc_name.jobid ||
|
||||
!OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
n_local_procs++;
|
||||
|
||||
if (my_proc != procs[proc]) {
|
||||
/* we have someone to talk to */
|
||||
have_connected_peer = true;
|
||||
|
||||
/* add this proc to shared memory accessibility list */
|
||||
rc = opal_bitmap_set_bit (reachability, proc);
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
} else {
|
||||
my_smp_rank = mca_btl_vader_component.my_smp_rank = n_local_procs - 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* jump out if there's not someone we can talk to */
|
||||
if (!have_connected_peer) {
|
||||
if (1 > MCA_BTL_VADER_NUM_LOCAL_PEERS) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* make sure that my_smp_rank has been defined */
|
||||
if(-1 == my_smp_rank) {
|
||||
return OMPI_ERROR;
|
||||
/* make sure that my local rank has been defined */
|
||||
if (ORTE_LOCAL_RANK_INVALID == MCA_BTL_VADER_LOCAL_RANK) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if (!vader_btl->btl_inited) {
|
||||
rc = vader_btl_first_time_init (vader_btl, n_local_procs);
|
||||
rc = vader_btl_first_time_init (vader_btl, 1 + MCA_BTL_VADER_NUM_LOCAL_PEERS);
|
||||
if (rc != OMPI_SUCCESS) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
for (proc = 0, local_rank = 0; proc < (int32_t) nprocs; ++proc) {
|
||||
for (proc = 0, local_rank = 0 ; proc < (int32_t) nprocs ; ++proc) {
|
||||
/* check to see if this proc can be reached via shmem (i.e.,
|
||||
if they're on my local host and in my job) */
|
||||
if (procs[proc]->proc_name.jobid != my_proc->proc_name.jobid ||
|
||||
@ -268,19 +243,19 @@ static int vader_add_procs (struct mca_btl_base_module_t* btl,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (my_proc != procs[proc]) {
|
||||
/* add this proc to shared memory accessibility list */
|
||||
rc = opal_bitmap_set_bit (reachability, proc);
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* setup endpoint */
|
||||
peers[proc] = component->endpoints + local_rank;
|
||||
init_vader_endpoint (peers[proc], procs[proc], local_rank++);
|
||||
|
||||
/* check to see if this is me */
|
||||
if (my_proc == procs[proc]) {
|
||||
peers[proc] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* update the local smp process count */
|
||||
component->num_smp_procs += n_local_procs;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -392,7 +367,7 @@ struct mca_btl_base_descriptor_t *vader_prepare_dst(struct mca_btl_base_module_t
|
||||
return NULL;
|
||||
}
|
||||
|
||||
opal_convertor_get_current_pointer (convertor, (void **) &data_ptr);
|
||||
opal_convertor_get_current_pointer (convertor, &data_ptr);
|
||||
|
||||
frag->segments[0].seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
|
||||
frag->segments[0].seg_len = *size;
|
||||
@ -417,6 +392,7 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
|
||||
uint8_t order, size_t reserve, size_t *size,
|
||||
uint32_t flags)
|
||||
{
|
||||
const size_t total_size = reserve + *size;
|
||||
struct iovec iov;
|
||||
mca_btl_vader_frag_t *frag;
|
||||
uint32_t iov_count = 1;
|
||||
@ -445,15 +421,14 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frag->segments[0].seg_len = reserve + *size;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->segments[0].seg_len = total_size;
|
||||
} else {
|
||||
(void) MCA_BTL_VADER_FRAG_ALLOC_USER(frag);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((*size + reserve) > (size_t) mca_btl_vader_component.max_inline_send) {
|
||||
if (total_size > (size_t) mca_btl_vader_component.max_inline_send) {
|
||||
/* single copy send */
|
||||
frag->hdr->flags = MCA_BTL_VADER_FLAG_SINGLE_COPY;
|
||||
|
||||
@ -467,9 +442,8 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
|
||||
frag->base.des_src_cnt = 2;
|
||||
} else {
|
||||
/* inline send */
|
||||
|
||||
/* try to reserve a fast box for this transfer */
|
||||
fbox_ptr = mca_btl_vader_reserve_fbox (endpoint, reserve + *size);
|
||||
fbox_ptr = mca_btl_vader_reserve_fbox (endpoint, total_size);
|
||||
|
||||
if (fbox_ptr) {
|
||||
frag->hdr->flags |= MCA_BTL_VADER_FLAG_FBOX;
|
||||
@ -479,8 +453,7 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
|
||||
/* NTH: the covertor adds some latency so we bypass it here */
|
||||
vader_memmove ((void *)((uintptr_t)frag->segments[0].seg_addr.pval + reserve),
|
||||
data_ptr, *size);
|
||||
frag->segments[0].seg_len = reserve + *size;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->segments[0].seg_len = total_size;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -491,11 +464,9 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
|
||||
}
|
||||
|
||||
frag->segments[0].seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
|
||||
frag->segments[0].seg_len = reserve + *size;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->segments[0].seg_len = total_size;
|
||||
}
|
||||
|
||||
frag->base.des_src = frag->segments;
|
||||
frag->base.order = order;
|
||||
frag->base.des_flags = flags;
|
||||
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "btl_vader.h"
|
||||
#include "btl_vader_frag.h"
|
||||
#include "btl_vader_endpoint.h"
|
||||
#include "btl_vader_xpmem.h"
|
||||
|
||||
/**
|
||||
* Initiate an synchronous put.
|
||||
@ -29,19 +30,16 @@ int mca_btl_vader_put (struct mca_btl_base_module_t *btl,
|
||||
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) des;
|
||||
mca_btl_base_segment_t *src = des->des_src;
|
||||
mca_btl_base_segment_t *dst = des->des_dst;
|
||||
size_t size = min(dst->seg_len, src->seg_len);
|
||||
const size_t size = min(dst->seg_len, src->seg_len);
|
||||
mca_mpool_base_registration_t *reg;
|
||||
void *rem_ptr;
|
||||
|
||||
reg = vader_get_registation (endpoint, (void *)(uintptr_t) dst->seg_addr.lval,
|
||||
dst->seg_len, 0);
|
||||
reg = vader_get_registation (endpoint, dst->seg_addr.pval, dst->seg_len, 0, &rem_ptr);
|
||||
if (OPAL_UNLIKELY(NULL == reg)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
rem_ptr = vader_reg_to_ptr (reg, (void *)(uintptr_t) dst->seg_addr.lval);
|
||||
|
||||
vader_memmove (rem_ptr, (void *)(uintptr_t) src->seg_addr.lval, size);
|
||||
vader_memmove (rem_ptr, src->seg_addr.pval, size);
|
||||
|
||||
vader_return_registration (reg, endpoint);
|
||||
|
||||
|
@ -41,9 +41,8 @@ int mca_btl_vader_send (struct mca_btl_base_module_t *btl,
|
||||
{
|
||||
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) descriptor;
|
||||
|
||||
if (frag->hdr->flags & MCA_BTL_VADER_FLAG_FBOX) {
|
||||
if (OPAL_LIKELY(frag->hdr->flags & MCA_BTL_VADER_FLAG_FBOX)) {
|
||||
mca_btl_vader_fbox_send (frag->segments[0].seg_addr.pval, tag, frag->segments[0].seg_len);
|
||||
|
||||
mca_btl_vader_frag_complete (frag);
|
||||
|
||||
return 1;
|
||||
@ -54,12 +53,10 @@ int mca_btl_vader_send (struct mca_btl_base_module_t *btl,
|
||||
/* type of message, pt-2-pt, one-sided, etc */
|
||||
frag->hdr->tag = tag;
|
||||
|
||||
opal_list_append (&mca_btl_vader_component.active_sends, (opal_list_item_t *) frag);
|
||||
|
||||
/* post the relative address of the descriptor into the peer's fifo */
|
||||
vader_fifo_write (frag->hdr, endpoint);
|
||||
|
||||
if (frag->hdr->flags & MCA_BTL_VADER_FLAG_SINGLE_COPY ||
|
||||
if ((frag->hdr->flags & MCA_BTL_VADER_FLAG_SINGLE_COPY) ||
|
||||
!(frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) {
|
||||
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
|
||||
|
||||
|
@ -53,20 +53,18 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
|
||||
assert (length < mca_btl_vader.super.btl_eager_limit);
|
||||
assert (0 == (flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK));
|
||||
|
||||
if (payload_size) {
|
||||
opal_convertor_get_current_pointer (convertor, &data_ptr);
|
||||
}
|
||||
|
||||
if (!opal_convertor_need_buffers (convertor) &&
|
||||
mca_btl_vader_fbox_sendi (endpoint, tag, header, header_size, data_ptr, payload_size)) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* we won't ever return a descriptor */
|
||||
*descriptor = NULL;
|
||||
|
||||
if (OPAL_LIKELY((payload_size + header_size) < mca_btl_vader_component.max_inline_send &&
|
||||
!opal_convertor_need_buffers (convertor))) {
|
||||
if (payload_size) {
|
||||
opal_convertor_get_current_pointer (convertor, &data_ptr);
|
||||
}
|
||||
|
||||
if (mca_btl_vader_fbox_sendi (endpoint, tag, header, header_size, data_ptr, payload_size)) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
/* allocate a fragment, giving up if we can't get one */
|
||||
frag = (mca_btl_vader_frag_t *) mca_btl_vader_alloc (btl, endpoint, order, length,
|
||||
flags | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
|
||||
@ -82,11 +80,8 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
|
||||
memcpy (frag->segments[0].seg_addr.pval, header, header_size);
|
||||
|
||||
/* write the message data if there is any */
|
||||
/*
|
||||
We can add MEMCHECKER calls before and after the packing.
|
||||
*/
|
||||
/* we can't use single-copy semantics here since as caller will consider the send
|
||||
complete if we return success */
|
||||
complete when we return */
|
||||
if (OPAL_UNLIKELY(payload_size && opal_convertor_need_buffers (convertor))) {
|
||||
/* pack the data into the supplied buffer */
|
||||
iov.iov_base = (IOVBASE_TYPE *)((uintptr_t)frag->segments[0].seg_addr.pval + header_size);
|
||||
@ -97,16 +92,11 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
|
||||
assert (max_data == payload_size);
|
||||
} else if (payload_size) {
|
||||
/* bypassing the convertor may speed things up a little */
|
||||
opal_convertor_get_current_pointer (convertor, &data_ptr);
|
||||
memcpy ((void *)((uintptr_t)frag->segments[0].seg_addr.pval + header_size), data_ptr, payload_size);
|
||||
}
|
||||
|
||||
opal_list_append (&mca_btl_vader_component.active_sends, (opal_list_item_t *) frag);
|
||||
|
||||
/* write the fragment pointer to peer's the FIFO */
|
||||
/* write the fragment pointer to peer's the FIFO. the progress function will return the fragment */
|
||||
vader_fifo_write (frag->hdr, endpoint);
|
||||
|
||||
/* the progress function will return the fragment */
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -11,6 +11,7 @@
|
||||
|
||||
#include "ompi/mca/btl/vader/btl_vader.h"
|
||||
#include "opal/include/opal/align.h"
|
||||
#include "btl_vader_xpmem.h"
|
||||
|
||||
/* largest address we can attach to using xpmem */
|
||||
#define VADER_MAX_ADDRESS ((uintptr_t)0x7ffffffff000)
|
||||
@ -18,7 +19,7 @@
|
||||
/* look up the remote pointer in the peer rcache and attach if
|
||||
* necessary */
|
||||
mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *endpoint, void *rem_ptr,
|
||||
size_t size, int flags)
|
||||
size_t size, int flags, void **local_ptr)
|
||||
{
|
||||
struct mca_rcache_base_module_t *rcache = endpoint->rcache;
|
||||
mca_mpool_base_registration_t *regs[10], *reg = NULL;
|
||||
@ -26,9 +27,8 @@ mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoi
|
||||
uintptr_t base, bound;
|
||||
int rc, i;
|
||||
|
||||
if (OPAL_UNLIKELY(endpoint->peer_smp_rank == mca_btl_vader_component.my_smp_rank)) {
|
||||
return rem_ptr;
|
||||
}
|
||||
/* use btl/self for self communication */
|
||||
assert (endpoint->peer_smp_rank != MCA_BTL_VADER_LOCAL_RANK);
|
||||
|
||||
base = (uintptr_t) down_align_addr(rem_ptr, mca_btl_vader_component.log_attach_align);
|
||||
bound = (uintptr_t) up_align_addr((void *)((uintptr_t) rem_ptr + size - 1),
|
||||
@ -42,7 +42,8 @@ mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoi
|
||||
for (i = 0 ; i < rc ; ++i) {
|
||||
if (bound <= (uintptr_t)regs[i]->bound && base >= (uintptr_t)regs[i]->base) {
|
||||
opal_atomic_add (®s[i]->ref_count, 1);
|
||||
return regs[i];
|
||||
reg = regs[i];
|
||||
goto reg_found;
|
||||
}
|
||||
|
||||
if (regs[i]->flags & MCA_MPOOL_FLAGS_PERSIST) {
|
||||
@ -85,12 +86,17 @@ mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoi
|
||||
reg->alloc_base = xpmem_attach (xpmem_addr, bound - base, NULL);
|
||||
if (OPAL_UNLIKELY((void *)-1 == reg->alloc_base)) {
|
||||
OBJ_RELEASE(reg);
|
||||
reg = NULL;
|
||||
} else {
|
||||
rcache->rcache_insert (rcache, reg, 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rcache->rcache_insert (rcache, reg, 0);
|
||||
}
|
||||
|
||||
reg_found:
|
||||
opal_atomic_wmb ();
|
||||
*local_ptr = (void *) ((uintptr_t) reg->alloc_base +
|
||||
(ptrdiff_t)((uintptr_t) rem_ptr - (uintptr_t) reg->base));
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
@ -105,9 +111,3 @@ void vader_return_registration (mca_mpool_base_registration_t *reg, struct mca_b
|
||||
OBJ_RELEASE (reg);
|
||||
}
|
||||
}
|
||||
|
||||
void *vader_reg_to_ptr (mca_mpool_base_registration_t *reg, void *rem_ptr)
|
||||
{
|
||||
return (void *) ((uintptr_t) reg->alloc_base +
|
||||
(ptrdiff_t)((uintptr_t) rem_ptr - (uintptr_t) reg->base));
|
||||
}
|
||||
|
24
ompi/mca/btl/vader/btl_vader_xpmem.h
Обычный файл
24
ompi/mca/btl/vader/btl_vader_xpmem.h
Обычный файл
@ -0,0 +1,24 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#if !defined(MCA_BTL_VADER_XPMEM_H)
|
||||
#define MCA_BTL_VADER_XPMEM_H
|
||||
|
||||
#include "btl_vader.h"
|
||||
|
||||
/* look up the remote pointer in the peer rcache and attach if
|
||||
* necessary */
|
||||
mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *endpoint, void *rem_ptr,
|
||||
size_t size, int flags, void **local_ptr);
|
||||
|
||||
void vader_return_registration (mca_mpool_base_registration_t *reg, struct mca_btl_base_endpoint_t *endpoint);
|
||||
|
||||
#endif
|
Загрузка…
x
Ссылка в новой задаче
Block a user