1
1
This commit was SVN r25376.
Этот коммит содержится в:
Nathan Hjelm 2011-10-27 20:22:46 +00:00
родитель f96ae43e23
Коммит ee087de073
11 изменённых файлов: 226 добавлений и 24 удалений

Просмотреть файл

@ -33,6 +33,7 @@ libmca_btl_vader_la_sources = \
btl_vader_frag.h \
btl_vader_send.c \
btl_vader_sendi.c \
btl_vader_fbox.h \
btl_vader_get.c \
btl_vader_put.c

Просмотреть файл

@ -30,6 +30,7 @@
#include "btl_vader.h"
#include "btl_vader_endpoint.h"
#include "btl_vader_fifo.h"
#include "btl_vader_fbox.h"
static int vader_del_procs (struct mca_btl_base_module_t *btl,
size_t nprocs, struct ompi_proc_t **procs,
@ -274,8 +275,8 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n)
component->shm_bases[component->my_smp_rank] = (char *)component->vader_mpool_base;
component->shm_seg_ids[component->my_smp_rank] = my_segid;
/* initialize the array of fifo's "owned" by this process */
posix_memalign ((void **)&my_fifos, getpagesize (), sizeof (vader_fifo_t));
/* initialize the fifo and fast boxes "owned" by this process */
posix_memalign ((void **)&my_fifos, getpagesize (), (n + 1) * getpagesize ());
if(NULL == my_fifos)
return OMPI_ERR_OUT_OF_RESOURCE;
@ -297,6 +298,22 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n)
if (NULL == component->xpmem_rcaches)
return OMPI_ERR_OUT_OF_RESOURCE;
component->vader_fboxes_in = (char **) calloc (n, sizeof (char *));
if (NULL == component->vader_fboxes_in)
return OMPI_ERR_OUT_OF_RESOURCE;
component->vader_fboxes_out = (char **) calloc (n, sizeof (char *));
if (NULL == component->vader_fboxes_out)
return OMPI_ERR_OUT_OF_RESOURCE;
component->vader_next_fbox_in = (unsigned char *) calloc (64, 1);
if (NULL == component->vader_next_fbox_in)
return OMPI_ERR_OUT_OF_RESOURCE;
component->vader_next_fbox_out = (unsigned char *) calloc (64, 1);
if (NULL == component->vader_next_fbox_out)
return OMPI_ERR_OUT_OF_RESOURCE;
/* initialize fragment descriptor free lists */
/* initialize free list for send fragments */
i = ompi_free_list_init_new(&component->vader_frags_eager,
@ -482,8 +499,19 @@ static int vader_add_procs (struct mca_btl_base_module_t* btl,
/* get a persistent pointer to the peer's fifo */
component->fifo[peer_smp_rank] =
vader_reg_to_ptr (vader_get_registation (peer_smp_rank, rem_ptr,
sizeof (vader_fifo_t),
(n_local_procs + 1) * getpagesize (),
MCA_MPOOL_FLAGS_PERSIST), rem_ptr);
/* fast boxes are allocated at the same time as the fifos */
component->vader_fboxes_in[peer_smp_rank] = (char *) component->fifo[my_smp_rank] +
(peer_smp_rank + 1) * getpagesize ();
component->vader_fboxes_out[peer_smp_rank] = (char *) component->fifo[peer_smp_rank] +
(my_smp_rank + 1) * getpagesize ();
component->vader_next_fbox_in[peer_smp_rank] = 0;
component->vader_next_fbox_out[peer_smp_rank] = 0;
memset (component->vader_fboxes_in[peer_smp_rank], MCA_BTL_VADER_FBOX_FREE, getpagesize());
}
}
@ -643,7 +671,7 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
struct iovec iov, *lcl_mem;
mca_btl_vader_frag_t *frag;
uint32_t iov_count = 1;
void *data_ptr;
void *data_ptr, *fbox_ptr;
int rc;
opal_convertor_get_current_pointer (convertor, &data_ptr);
@ -688,8 +716,18 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
frag->segment.seg_len = reserve;
} else {
/* inline send */
/* try to reserve a fast box for this transfer */
fbox_ptr = mca_btl_vader_reserve_fbox (endpoint->peer_smp_rank, reserve + *size);
if (fbox_ptr) {
frag->hdr->flags |= MCA_BTL_VADER_FLAG_FBOX;
frag->segment.seg_addr.pval = fbox_ptr;
}
/* NTH: the covertor adds some latency so we bypass it here */
memmove ((void *)((uintptr_t)frag->segment.seg_addr.pval + reserve), data_ptr, *size);
vader_memmove ((void *)((uintptr_t)frag->segment.seg_addr.pval + reserve),
data_ptr, *size);
frag->segment.seg_len = reserve + *size;
}
}

Просмотреть файл

@ -114,6 +114,12 @@ struct mca_btl_vader_component_t {
opal_list_t active_sends; /**< list of outstanding fragments */
char **vader_fboxes_in; /**< incomming fast boxes (memory belongs to this process) */
char **vader_fboxes_out; /**< outgoing fast boxes (memory belongs to remote peers) */
unsigned char *vader_next_fbox_in; /**< indices of fast boxes to poll */
unsigned char *vader_next_fbox_out; /**< indices of fast boxes to write */
struct mca_btl_base_endpoint_t **vader_peers;
};
typedef struct mca_btl_vader_component_t mca_btl_vader_component_t;
@ -256,6 +262,17 @@ static inline void *vader_reg_to_ptr (mca_mpool_base_registration_t *reg, void *
(ptrdiff_t)((uintptr_t) rem_ptr - (uintptr_t) reg->base));
}
/* memcpy is faster at larger sizes but is undefined if the
pointers are aliased (TODO -- readd alias check) */
static inline void vader_memmove (void *dst, void *src, size_t size)
{
if (size >= mca_btl_vader_memcpy_limit) {
memcpy (dst, src, size);
} else {
memmove (dst, src, size);
}
}
/**
* Initiate a send to the peer.
*

Просмотреть файл

@ -35,6 +35,7 @@
#include "btl_vader.h"
#include "btl_vader_frag.h"
#include "btl_vader_fifo.h"
#include "btl_vader_fbox.h"
static int mca_btl_vader_component_progress (void);
static int mca_btl_vader_component_open(void);
@ -286,6 +287,7 @@ static inline void mca_btl_vader_progress_sends (void)
}
}
static int mca_btl_vader_component_progress (void)
{
int my_smp_rank = mca_btl_vader_component.my_smp_rank;
@ -297,6 +299,9 @@ static int mca_btl_vader_component_progress (void)
mca_mpool_base_registration_t *xpmem_reg = NULL;
bool single_copy;
/* check for messages in fast boxes */
mca_btl_vader_check_fboxes ();
/* check active sends for completion */
mca_btl_vader_progress_sends ();

128
ompi/mca/btl/vader/btl_vader_fbox.h Обычный файл
Просмотреть файл

@ -0,0 +1,128 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2011 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#if !defined(MCA_BTL_VADER_FBOX_H)
#define MCA_BTL_VADER_FBOX_H
#include "btl_vader.h"
#include "btl_vader_endpoint.h"
/* XXX -- FIXME -- make no assumptions if possible */
/* Assumptions: page size: 4096, cache line: 64 or 128 bytes, tag = 1 byte */
#define FBOX_SIZE 128 /* 2-4 cache lines */
#define LAST_FBOX 31 /* page size assumtion: 4096 */
#define MAX_MSG 126 /* 1 byte used each for size and tag */
enum {MCA_BTL_VADER_FBOX_FREE = 0xfe, MCA_BTL_VADER_FBOX_RESERVED = 0xff};
#define MCA_BTL_VADER_FBOX_OUT_PTR(peer_smp_rank, fbox) \
(mca_btl_vader_component.vader_fboxes_out[peer_smp_rank] + FBOX_SIZE * (fbox))
#define MCA_BTL_VADER_FBOX_IN_PTR(peer_smp_rank, fbox) \
(mca_btl_vader_component.vader_fboxes_in[peer_smp_rank] + FBOX_SIZE * (fbox))
static inline unsigned char *mca_btl_vader_reserve_fbox (int peer_smp_rank, size_t size)
{
int next_fbox = mca_btl_vader_component.vader_next_fbox_out[peer_smp_rank];
unsigned char *fbox = MCA_BTL_VADER_FBOX_OUT_PTR(peer_smp_rank, next_fbox);
/* todo -- need thread locks here for the multi-threaded case */
if (size > MAX_MSG || fbox[0] != MCA_BTL_VADER_FBOX_FREE) {
/* fall back on fifo */
return NULL;
}
mca_btl_vader_component.vader_next_fbox_out[peer_smp_rank] =
next_fbox == LAST_FBOX ? 0 : next_fbox + 1;
/* mark this fast box as in use */
fbox[0] = MCA_BTL_VADER_FBOX_RESERVED;
return fbox + 2;
}
static inline void mca_btl_vader_fbox_send (unsigned char *fbox, unsigned char tag, size_t size)
{
fbox[-1] = tag;
/* ensure data writes have completed before we mark the data as available */
opal_atomic_wmb ();
fbox[-2] = size;
}
static inline int mca_btl_vader_fbox_sendi (struct mca_btl_base_endpoint_t *endpoint, char tag,
void *header, size_t header_size,
void *payload, size_t payload_size)
{
unsigned char *fbox;
fbox = mca_btl_vader_reserve_fbox(endpoint->peer_smp_rank, header_size + payload_size);
if (NULL == fbox) {
return 0;
}
memcpy (fbox, header, header_size);
if (OPAL_UNLIKELY(payload)) {
/* inline sends are typically just pml headers (due to MCA_BTL_FLAGS_SEND_INPLACE) */
memcpy (fbox + header_size, payload, payload_size);
}
/* mark the fbox as sent */
mca_btl_vader_fbox_send (fbox, tag, header_size + payload_size);
/* send complete */
return 1;
}
static inline void mca_btl_vader_check_fboxes (void)
{
int my_smp_rank = mca_btl_vader_component.my_smp_rank;
mca_btl_active_message_callback_t *reg;
mca_btl_vader_frag_t frag;
unsigned char size, tag;
int i;
for (i = 0 ; i < mca_btl_vader_component.num_smp_procs ; ++i) {
int next_fbox = mca_btl_vader_component.vader_next_fbox_in[i];
unsigned char *fbox = MCA_BTL_VADER_FBOX_IN_PTR(i, next_fbox);
if (my_smp_rank == i) {
continue;
}
/* process all fast-box messages */
while (0xfe != ((size = fbox[0]) & 0xfe)) {
opal_atomic_rmb ();
tag = fbox[1];
reg = mca_btl_base_active_message_trigger + tag;
frag.segment.seg_addr.pval = fbox + 2;
frag.segment.seg_len = size;
frag.base.des_dst = &frag.segment;
frag.base.des_dst_cnt = 1;
reg->cbfunc(&mca_btl_vader.super, tag, &(frag.base), reg->cbdata);
fbox[0] = MCA_BTL_VADER_FBOX_FREE;
next_fbox = next_fbox == LAST_FBOX ? 0 : next_fbox + 1;
fbox = MCA_BTL_VADER_FBOX_IN_PTR(i, next_fbox);
}
mca_btl_vader_component.vader_next_fbox_in[i] = next_fbox;
}
}
#endif /* !defined(MCA_BTL_VADER_FBOX_H) */

Просмотреть файл

@ -29,7 +29,6 @@ static inline void mca_btl_vader_frag_constructor (mca_btl_vader_frag_t *frag)
{
frag->hdr = (mca_btl_vader_hdr_t*)frag->base.super.ptr;
if(frag->hdr != NULL) {
frag->segment.seg_addr.pval = (char *)(frag->hdr + 1);
frag->hdr->my_smp_rank = mca_btl_vader_component.my_smp_rank;
}
}

Просмотреть файл

@ -28,6 +28,7 @@
#define MCA_BTL_VADER_FLAG_INLINE 0
#define MCA_BTL_VADER_FLAG_SINGLE_COPY 1
#define MCA_BTL_VADER_FLAG_FBOX 2
struct mca_btl_vader_hdr_t {
volatile void *next; /* next item in fifo. many peers may touch this */
@ -63,6 +64,7 @@ OBJ_CLASS_DECLARATION(mca_btl_vader_frag_t);
frag = (mca_btl_vader_frag_t *) item; \
frag->hdr->complete = false; \
frag->hdr->flags = MCA_BTL_VADER_FLAG_INLINE; \
frag->segment.seg_addr.pval = (char *)(frag->hdr + 1); \
frag->my_list = &mca_btl_vader_component.vader_frags_eager; \
} while (0)
@ -73,6 +75,7 @@ OBJ_CLASS_DECLARATION(mca_btl_vader_frag_t);
frag = (mca_btl_vader_frag_t *) item; \
frag->hdr->complete = false; \
frag->hdr->flags = MCA_BTL_VADER_FLAG_INLINE; \
frag->segment.seg_addr.pval = (char *)(frag->hdr + 1); \
frag->my_list = &mca_btl_vader_component.vader_frags_user; \
} while (0)

Просмотреть файл

@ -42,12 +42,7 @@ int mca_btl_vader_get (struct mca_btl_base_module_t *btl,
rem_ptr = vader_reg_to_ptr (reg, (void *) src->seg_key.ptr);
if (OPAL_LIKELY((uintptr_t)rem_ptr != src->seg_key.ptr) &&
src->seg_len >= mca_btl_vader_memcpy_limit) {
memcpy ((void *) dst->seg_key.ptr, rem_ptr, size);
} else {
memmove ((void *) dst->seg_key.ptr, rem_ptr, size);
}
vader_memmove ((void *) dst->seg_key.ptr, rem_ptr, size);
vader_return_registration (reg, endpoint->peer_smp_rank);

Просмотреть файл

@ -42,14 +42,7 @@ int mca_btl_vader_put (struct mca_btl_base_module_t *btl,
rem_ptr = vader_reg_to_ptr (reg, (void *) dst->seg_key.ptr);
if (OPAL_LIKELY((uintptr_t)rem_ptr != dst->seg_key.ptr) &&
dst->seg_len >= mca_btl_vader_memcpy_limit) {
/* memcpy is faster at certain sizes but is undefined if the
pointers are aliased */
memcpy (rem_ptr, (void *) src->seg_key.ptr, size);
} else {
memmove (rem_ptr, (void *) src->seg_key.ptr, size);
}
vader_memmove (rem_ptr, (void *) src->seg_key.ptr, size);
vader_return_registration (reg, endpoint->peer_smp_rank);

Просмотреть файл

@ -26,6 +26,7 @@
#include "btl_vader.h"
#include "btl_vader_frag.h"
#include "btl_vader_fifo.h"
#include "btl_vader_fbox.h"
/**
* Initiate a send to the peer.
@ -40,6 +41,16 @@ int mca_btl_vader_send (struct mca_btl_base_module_t *btl,
{
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) descriptor;
if (frag->hdr->flags & MCA_BTL_VADER_FLAG_FBOX) {
mca_btl_vader_fbox_send (frag->segment.seg_addr.pval, tag, frag->segment.seg_len);
if (OPAL_LIKELY(frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) {
MCA_BTL_VADER_FRAG_RETURN(frag);
}
return 1;
}
/* available header space */
frag->hdr->len = frag->segment.seg_len;
/* type of message, pt-2-pt, one-sided, etc */

Просмотреть файл

@ -27,6 +27,8 @@
#include "btl_vader_frag.h"
#include "btl_vader_fifo.h"
#include "btl_vader_fbox.h"
/**
* Initiate an inline send to the peer.
*
@ -46,7 +48,7 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
uint32_t iov_count = 1;
struct iovec iov;
size_t max_data;
void *data_ptr;
void *data_ptr = NULL;
assert (length < mca_btl_vader_component.eager_limit);
assert (0 == (flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK));
@ -54,9 +56,19 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
/* we won't ever return a descriptor */
*descriptor = NULL;
if (OPAL_LIKELY(!(payload_size && opal_convertor_need_buffers (convertor)))) {
if (payload_size) {
opal_convertor_get_current_pointer (convertor, &data_ptr);
}
if (mca_btl_vader_fbox_sendi (endpoint, tag, header, header_size, data_ptr, payload_size)) {
return OMPI_SUCCESS;
}
}
/* allocate a fragment, giving up if we can't get one */
frag = mca_btl_vader_alloc (btl, endpoint, order, length,
flags | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
frag = (mca_btl_vader_frag_t *) mca_btl_vader_alloc (btl, endpoint, order, length,
flags | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
if (OPAL_UNLIKELY(NULL == frag)) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
@ -85,7 +97,7 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
} else if (payload_size) {
/* bypassing the convertor may speed things up a little */
opal_convertor_get_current_pointer (convertor, &data_ptr);
memcpy ((uintptr_t)frag->segment.seg_addr.pval + header_size, data_ptr, payload_size);
memcpy ((void *)((uintptr_t)frag->segment.seg_addr.pval + header_size), data_ptr, payload_size);
}
opal_list_append (&mca_btl_vader_component.active_sends, (opal_list_item_t *) frag);