made btl_vader_max_inline_send a configurable parameter and updated and enabled sendi
This commit was SVN r25374.
Этот коммит содержится в:
родитель
033179d6ac
Коммит
82efe131dc
@ -31,8 +31,6 @@
|
||||
#include "btl_vader_endpoint.h"
|
||||
#include "btl_vader_fifo.h"
|
||||
|
||||
int mca_btl_vader_max_inline_send = 256;
|
||||
|
||||
static int vader_del_procs (struct mca_btl_base_module_t *btl,
|
||||
size_t nprocs, struct ompi_proc_t **procs,
|
||||
struct mca_btl_base_endpoint_t **peers);
|
||||
@ -42,10 +40,6 @@ static int vader_register_error_cb (struct mca_btl_base_module_t* btl,
|
||||
|
||||
static int vader_finalize (struct mca_btl_base_module_t* btl);
|
||||
|
||||
static mca_btl_base_descriptor_t* vader_alloc (struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
uint8_t order, size_t size, uint32_t flags);
|
||||
|
||||
static int vader_free (struct mca_btl_base_module_t* btl, mca_btl_base_descriptor_t* des);
|
||||
|
||||
static struct mca_btl_base_descriptor_t *vader_prepare_src (
|
||||
@ -93,12 +87,12 @@ mca_btl_vader_t mca_btl_vader = {
|
||||
vader_del_procs,
|
||||
NULL, /* btl_register */
|
||||
vader_finalize,
|
||||
vader_alloc,
|
||||
mca_btl_vader_alloc,
|
||||
vader_free,
|
||||
vader_prepare_src,
|
||||
vader_prepare_dst,
|
||||
mca_btl_vader_send,
|
||||
NULL, /* btl_sendi is implemented but not used at the momement */
|
||||
mca_btl_vader_sendi,
|
||||
mca_btl_vader_put,
|
||||
mca_btl_vader_get,
|
||||
mca_btl_base_dump,
|
||||
@ -562,9 +556,9 @@ static int vader_register_error_cb(struct mca_btl_base_module_t* btl,
|
||||
* @param btl (IN) BTL module
|
||||
* @param size (IN) Request segment size.
|
||||
*/
|
||||
static mca_btl_base_descriptor_t *vader_alloc(struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
uint8_t order, size_t size, uint32_t flags)
|
||||
mca_btl_base_descriptor_t *mca_btl_vader_alloc(struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
uint8_t order, size_t size, uint32_t flags)
|
||||
{
|
||||
mca_btl_vader_frag_t *frag = NULL;
|
||||
int rc;
|
||||
@ -576,14 +570,14 @@ static mca_btl_base_descriptor_t *vader_alloc(struct mca_btl_base_module_t *btl,
|
||||
}
|
||||
|
||||
if (OPAL_LIKELY(frag != NULL)) {
|
||||
frag->segment.seg_len = size;
|
||||
frag->endpoint = endpoint;
|
||||
frag->segment.seg_len = size;
|
||||
frag->endpoint = endpoint;
|
||||
|
||||
frag->base.des_flags = flags;
|
||||
frag->base.order = order;
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_flags = flags;
|
||||
frag->base.order = order;
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = &frag->segment;
|
||||
frag->base.des_dst = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
}
|
||||
|
||||
@ -681,12 +675,7 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((*size + reserve) < mca_btl_vader_max_inline_send) {
|
||||
/* inline send */
|
||||
/* NTH: the covertor adds some latency so we bypass it here */
|
||||
memmove ((void *)((uintptr_t)frag->segment.seg_addr.pval + reserve), data_ptr, *size);
|
||||
frag->segment.seg_len = reserve + *size;
|
||||
} else {
|
||||
if ((*size + reserve) > mca_btl_vader_max_inline_send) {
|
||||
/* single copy send */
|
||||
/* pack the iovec after the reserved memory */
|
||||
lcl_mem = (struct iovec *) ((uintptr_t)frag->segment.seg_addr.pval + reserve);
|
||||
@ -697,6 +686,11 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
|
||||
lcl_mem->iov_len = *size;
|
||||
|
||||
frag->segment.seg_len = reserve;
|
||||
} else {
|
||||
/* inline send */
|
||||
/* NTH: the covertor adds some latency so we bypass it here */
|
||||
memmove ((void *)((uintptr_t)frag->segment.seg_addr.pval + reserve), data_ptr, *size);
|
||||
frag->segment.seg_len = reserve + *size;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -707,7 +701,7 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
|
||||
}
|
||||
|
||||
frag->segment.seg_key.ptr = (uintptr_t) data_ptr;
|
||||
frag->segment.seg_len = *size;
|
||||
frag->segment.seg_len = reserve + *size;
|
||||
}
|
||||
|
||||
frag->base.des_src = &frag->segment;
|
||||
|
@ -64,6 +64,7 @@ BEGIN_C_DECLS
|
||||
|
||||
extern int mca_btl_vader_memcpy_limit;
|
||||
extern int mca_btl_vader_segment_multiple;
|
||||
extern int mca_btl_vader_max_inline_send;
|
||||
|
||||
#define VADER_FIFO_FREE (void *) (-2)
|
||||
/* We can't use opal_cache_line_size here because we need a
|
||||
@ -267,7 +268,7 @@ int mca_btl_vader_send(struct mca_btl_base_module_t *btl,
|
||||
mca_btl_base_tag_t tag);
|
||||
|
||||
/**
|
||||
* Initiate an inline send to the peer. Return a descriptor on failure.
|
||||
* Initiate an inline send to the peer.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param peer (IN) BTL peer addressing
|
||||
@ -275,12 +276,9 @@ int mca_btl_vader_send(struct mca_btl_base_module_t *btl,
|
||||
int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct opal_convertor_t *convertor,
|
||||
void *header,
|
||||
size_t header_size,
|
||||
size_t payload_size,
|
||||
uint8_t order,
|
||||
uint32_t flags,
|
||||
mca_btl_base_tag_t tag,
|
||||
void *header, size_t header_size,
|
||||
size_t payload_size, uint8_t order,
|
||||
uint32_t flags, mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t **descriptor);
|
||||
|
||||
/**
|
||||
@ -305,6 +303,17 @@ int mca_btl_vader_get (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct mca_btl_base_descriptor_t *des);
|
||||
|
||||
/**
|
||||
* Allocate a segment.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param size (IN) Request segment size.
|
||||
*/
|
||||
mca_btl_base_descriptor_t* mca_btl_vader_alloc (struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
uint8_t order, size_t size, uint32_t flags);
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
@ -47,6 +47,8 @@ static mca_btl_base_module_t** mca_btl_vader_component_init(int *num_btls,
|
||||
/* limit where we should switch from bcopy to memcpy */
|
||||
int mca_btl_vader_memcpy_limit = 524288;
|
||||
int mca_btl_vader_segment_multiple = 4194304;
|
||||
/* maximum size for using copy-in-copy out semantics for contiguous sends */
|
||||
int mca_btl_vader_max_inline_send = 256;
|
||||
|
||||
/*
|
||||
* Shared Memory (VADER) component instance.
|
||||
@ -127,6 +129,8 @@ static int mca_btl_vader_component_register (void)
|
||||
mca_btl_vader_param_register_int("memcpy_limit", mca_btl_vader_memcpy_limit);
|
||||
mca_btl_vader_segment_multiple =
|
||||
msb(mca_btl_vader_param_register_int("segment_multiple", mca_btl_vader_segment_multiple));
|
||||
mca_btl_vader_max_inline_send =
|
||||
mca_btl_vader_param_register_int("max_inline_send", mca_btl_vader_max_inline_send);
|
||||
|
||||
mca_btl_vader.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH;
|
||||
mca_btl_vader.super.btl_eager_limit = 64 * 1024;
|
||||
|
@ -63,7 +63,6 @@ OBJ_CLASS_DECLARATION(mca_btl_vader_frag_t);
|
||||
frag = (mca_btl_vader_frag_t *) item; \
|
||||
frag->hdr->complete = false; \
|
||||
frag->hdr->flags = MCA_BTL_VADER_FLAG_INLINE; \
|
||||
frag->hdr->len = 0; \
|
||||
frag->my_list = &mca_btl_vader_component.vader_frags_eager; \
|
||||
} while (0)
|
||||
|
||||
@ -74,7 +73,6 @@ OBJ_CLASS_DECLARATION(mca_btl_vader_frag_t);
|
||||
frag = (mca_btl_vader_frag_t *) item; \
|
||||
frag->hdr->complete = false; \
|
||||
frag->hdr->flags = MCA_BTL_VADER_FLAG_INLINE; \
|
||||
frag->hdr->len = 0; \
|
||||
frag->my_list = &mca_btl_vader_component.vader_frags_user; \
|
||||
} while (0)
|
||||
|
||||
|
@ -46,36 +46,35 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
|
||||
uint32_t iov_count = 1;
|
||||
struct iovec iov;
|
||||
size_t max_data;
|
||||
int rc;
|
||||
void *data_ptr;
|
||||
|
||||
assert (length < mca_btl_vader_component.eager_limit);
|
||||
assert (0 == (flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK));
|
||||
|
||||
/* we won't ever return a descriptor */
|
||||
*descriptor = NULL;
|
||||
|
||||
/* allocate a fragment, giving up if we can't get one */
|
||||
/* note that frag==NULL is equivalent to rc returning an error code */
|
||||
MCA_BTL_VADER_FRAG_ALLOC_EAGER(frag, rc);
|
||||
frag = mca_btl_vader_alloc (btl, endpoint, order, length,
|
||||
flags | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
*descriptor = NULL;
|
||||
return rc;
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* fill in fragment fields */
|
||||
frag->segment.seg_len = length;
|
||||
frag->hdr->len = length;
|
||||
|
||||
/* why do any flags matter here other than OWNERSHIP? */
|
||||
frag->base.des_flags = flags | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP;
|
||||
frag->hdr->len = length;
|
||||
frag->hdr->tag = tag;
|
||||
frag->endpoint = endpoint;
|
||||
|
||||
/* write the match header (with MPI comm/tag/etc. info) */
|
||||
memmove (frag->segment.seg_addr.pval, header, header_size);
|
||||
memcpy (frag->segment.seg_addr.pval, header, header_size);
|
||||
|
||||
/* write the message data if there is any */
|
||||
/*
|
||||
We can add MEMCHECKER calls before and after the packing.
|
||||
*/
|
||||
if (payload_size) {
|
||||
/* we can't use single-copy semantics here since as caller will consider the send
|
||||
complete if we return success */
|
||||
if (OPAL_UNLIKELY(payload_size && opal_convertor_need_buffers (convertor))) {
|
||||
/* pack the data into the supplied buffer */
|
||||
iov.iov_base = (IOVBASE_TYPE *)((uintptr_t)frag->segment.seg_addr.pval + header_size);
|
||||
iov.iov_len = max_data = payload_size;
|
||||
@ -83,6 +82,10 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
|
||||
(void) opal_convertor_pack (convertor, &iov, &iov_count, &max_data);
|
||||
|
||||
assert (max_data == payload_size);
|
||||
} else if (payload_size) {
|
||||
/* bypassing the convertor may speed things up a little */
|
||||
opal_convertor_get_current_pointer (convertor, &data_ptr);
|
||||
memcpy ((uintptr_t)frag->segment.seg_addr.pval + header_size, data_ptr, payload_size);
|
||||
}
|
||||
|
||||
opal_list_append (&mca_btl_vader_component.active_sends, (opal_list_item_t *) frag);
|
||||
@ -91,5 +94,7 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
|
||||
vader_fifo_write ((void *) VIRTUAL2RELATIVE(frag->hdr),
|
||||
mca_btl_vader_component.fifo[endpoint->peer_smp_rank]);
|
||||
|
||||
/* the progress function will return the fragment */
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user