1
1

btl/vader: update for BTL 3.0 interface

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
Nathan Hjelm 2015-01-05 16:02:41 -07:00 коммит произвёл Nathan Hjelm
родитель f8ac3fb1e8
Коммит aba0675fe7
12 изменённых файлов: 276 добавлений и 351 удалений

Просмотреть файл

@ -12,8 +12,8 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved.
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2014 Los Alamos National Security, LLC. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. All rights
* All rights reserved. * reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -98,7 +98,7 @@ enum {
* Shared Memory (VADER) BTL module. * Shared Memory (VADER) BTL module.
*/ */
struct mca_btl_vader_component_t { struct mca_btl_vader_component_t {
mca_btl_base_component_2_0_0_t super; /**< base BTL component */ mca_btl_base_component_3_0_0_t super; /**< base BTL component */
int vader_free_list_num; /**< initial size of free lists */ int vader_free_list_num; /**< initial size of free lists */
int vader_free_list_max; /**< maximum size of free lists */ int vader_free_list_max; /**< maximum size of free lists */
int vader_free_list_inc; /**< number of elements to alloc when growing free lists */ int vader_free_list_inc; /**< number of elements to alloc when growing free lists */
@ -115,7 +115,6 @@ struct mca_btl_vader_component_t {
ompi_free_list_t vader_frags_eager; /**< free list of vader send frags */ ompi_free_list_t vader_frags_eager; /**< free list of vader send frags */
ompi_free_list_t vader_frags_max_send; /**< free list of vader max send frags (large fragments) */ ompi_free_list_t vader_frags_max_send; /**< free list of vader max send frags (large fragments) */
ompi_free_list_t vader_frags_user; /**< free list of small inline frags */ ompi_free_list_t vader_frags_user; /**< free list of small inline frags */
ompi_free_list_t vader_frags_rdma; /**< free list of vader put/get frags (single-copy) */
unsigned int fbox_threshold; /**< number of sends required before we setup a send fast box for a peer */ unsigned int fbox_threshold; /**< number of sends required before we setup a send fast box for a peer */
unsigned int fbox_max; /**< maximum number of send fast boxes to allocate */ unsigned int fbox_max; /**< maximum number of send fast boxes to allocate */
@ -208,21 +207,24 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
* @param descriptor (IN) Description of the data to be transferred * @param descriptor (IN) Description of the data to be transferred
*/ */
#if OPAL_BTL_VADER_HAVE_XPMEM #if OPAL_BTL_VADER_HAVE_XPMEM
int mca_btl_vader_put_xpmem (struct mca_btl_base_module_t *btl, int mca_btl_vader_put_xpmem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des); mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
#endif #endif
#if OPAL_BTL_VADER_HAVE_CMA #if OPAL_BTL_VADER_HAVE_CMA
int mca_btl_vader_put_cma (struct mca_btl_base_module_t *btl, int mca_btl_vader_put_cma (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des); mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
#endif #endif
#if OPAL_BTL_VADER_HAVE_KNEM #if OPAL_BTL_VADER_HAVE_KNEM
int mca_btl_vader_put_knem (struct mca_btl_base_module_t *btl, int mca_btl_vader_put_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des); mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
#endif #endif
/** /**
@ -233,21 +235,24 @@ int mca_btl_vader_put_knem (struct mca_btl_base_module_t *btl,
* @param descriptor (IN) Description of the data to be transferred * @param descriptor (IN) Description of the data to be transferred
*/ */
#if OPAL_BTL_VADER_HAVE_XPMEM #if OPAL_BTL_VADER_HAVE_XPMEM
int mca_btl_vader_get_xpmem (struct mca_btl_base_module_t *btl, int mca_btl_vader_get_xpmem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des); mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
#endif #endif
#if OPAL_BTL_VADER_HAVE_CMA #if OPAL_BTL_VADER_HAVE_CMA
int mca_btl_vader_get_cma (struct mca_btl_base_module_t *btl, int mca_btl_vader_get_cma (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des); mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
#endif #endif
#if OPAL_BTL_VADER_HAVE_KNEM #if OPAL_BTL_VADER_HAVE_KNEM
int mca_btl_vader_get_knem (struct mca_btl_base_module_t *btl, int mca_btl_vader_get_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des); mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
#endif #endif
/** /**
@ -260,6 +265,7 @@ mca_btl_base_descriptor_t* mca_btl_vader_alloc (struct mca_btl_base_module_t* bt
struct mca_btl_base_endpoint_t* endpoint, struct mca_btl_base_endpoint_t* endpoint,
uint8_t order, size_t size, uint32_t flags); uint8_t order, size_t size, uint32_t flags);
END_C_DECLS END_C_DECLS
#endif #endif

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved.
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2014 Los Alamos National Security, LLC. * Copyright (c) 2010-2015 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved.
@ -227,12 +227,12 @@ static int mca_btl_vader_component_register (void)
mca_btl_vader.super.btl_eager_limit = 32 * 1024; mca_btl_vader.super.btl_eager_limit = 32 * 1024;
mca_btl_vader.super.btl_rndv_eager_limit = mca_btl_vader.super.btl_eager_limit; mca_btl_vader.super.btl_rndv_eager_limit = mca_btl_vader.super.btl_eager_limit;
mca_btl_vader.super.btl_max_send_size = mca_btl_vader.super.btl_eager_limit; mca_btl_vader.super.btl_max_send_size = mca_btl_vader.super.btl_eager_limit;
mca_btl_vader.super.btl_min_rdma_pipeline_size = mca_btl_vader.super.btl_eager_limit; mca_btl_vader.super.btl_min_rdma_pipeline_size = INT_MAX;
} else { } else {
mca_btl_vader.super.btl_eager_limit = 4 * 1024; mca_btl_vader.super.btl_eager_limit = 4 * 1024;
mca_btl_vader.super.btl_rndv_eager_limit = 32 * 1024; mca_btl_vader.super.btl_rndv_eager_limit = 32 * 1024;
mca_btl_vader.super.btl_max_send_size = 32 * 1024; mca_btl_vader.super.btl_max_send_size = 32 * 1024;
mca_btl_vader.super.btl_min_rdma_pipeline_size = 32 * 1024; mca_btl_vader.super.btl_min_rdma_pipeline_size = INT_MAX;
} }
mca_btl_vader.super.btl_rdma_pipeline_send_length = mca_btl_vader.super.btl_eager_limit; mca_btl_vader.super.btl_rdma_pipeline_send_length = mca_btl_vader.super.btl_eager_limit;
@ -251,7 +251,6 @@ static int mca_btl_vader_component_register (void)
mca_btl_vader.super.btl_bandwidth = 10000; /* Mbs */ mca_btl_vader.super.btl_bandwidth = 10000; /* Mbs */
} }
mca_btl_vader.super.btl_seg_size = sizeof (mca_btl_vader_segment_t);
mca_btl_vader.super.btl_latency = 1; /* Microsecs */ mca_btl_vader.super.btl_latency = 1; /* Microsecs */
/* Call the BTL based to register its MCA params */ /* Call the BTL based to register its MCA params */
@ -272,7 +271,6 @@ static int mca_btl_vader_component_open(void)
OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_eager, ompi_free_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_eager, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_user, ompi_free_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_user, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_max_send, ompi_free_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_max_send, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_rdma, ompi_free_list_t);
OBJ_CONSTRUCT(&mca_btl_vader_component.lock, opal_mutex_t); OBJ_CONSTRUCT(&mca_btl_vader_component.lock, opal_mutex_t);
OBJ_CONSTRUCT(&mca_btl_vader_component.pending_endpoints, opal_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.pending_endpoints, opal_list_t);
OBJ_CONSTRUCT(&mca_btl_vader_component.pending_fragments, opal_list_t); OBJ_CONSTRUCT(&mca_btl_vader_component.pending_fragments, opal_list_t);
@ -293,7 +291,6 @@ static int mca_btl_vader_component_close(void)
OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_eager); OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_eager);
OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_user); OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_user);
OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_max_send); OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_max_send);
OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_rdma);
OBJ_DESTRUCT(&mca_btl_vader_component.lock); OBJ_DESTRUCT(&mca_btl_vader_component.lock);
OBJ_DESTRUCT(&mca_btl_vader_component.pending_endpoints); OBJ_DESTRUCT(&mca_btl_vader_component.pending_endpoints);
OBJ_DESTRUCT(&mca_btl_vader_component.pending_fragments); OBJ_DESTRUCT(&mca_btl_vader_component.pending_fragments);
@ -349,12 +346,11 @@ static void mca_btl_vader_select_next_single_copy_mechanism (void)
static void mca_btl_vader_check_single_copy (void) static void mca_btl_vader_check_single_copy (void)
{ {
int initial_mechanism = mca_btl_vader_component.single_copy_mechanism; int initial_mechanism = mca_btl_vader_component.single_copy_mechanism;
int rc;
#if OPAL_BTL_VADER_HAVE_XPMEM #if OPAL_BTL_VADER_HAVE_XPMEM
if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) { if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
/* try to create an xpmem segment for the entire address space */ /* try to create an xpmem segment for the entire address space */
rc = mca_btl_vader_xpmem_init (); int rc = mca_btl_vader_xpmem_init ();
if (OPAL_SUCCESS != rc) { if (OPAL_SUCCESS != rc) {
if (MCA_BTL_VADER_XPMEM == initial_mechanism) { if (MCA_BTL_VADER_XPMEM == initial_mechanism) {
opal_show_help("help-btl-vader.txt", "xpmem-make-failed", opal_show_help("help-btl-vader.txt", "xpmem-make-failed",
@ -414,7 +410,7 @@ static void mca_btl_vader_check_single_copy (void)
#if OPAL_BTL_VADER_HAVE_KNEM #if OPAL_BTL_VADER_HAVE_KNEM
if (MCA_BTL_VADER_KNEM == mca_btl_vader_component.single_copy_mechanism) { if (MCA_BTL_VADER_KNEM == mca_btl_vader_component.single_copy_mechanism) {
/* mca_btl_vader_knem_init will set the appropriate get/put functions */ /* mca_btl_vader_knem_init will set the appropriate get/put functions */
rc = mca_btl_vader_knem_init (); int rc = mca_btl_vader_knem_init ();
if (OPAL_SUCCESS != rc) { if (OPAL_SUCCESS != rc) {
if (MCA_BTL_VADER_KNEM == initial_mechanism) { if (MCA_BTL_VADER_KNEM == initial_mechanism) {
opal_show_help("help-btl-vader.txt", "knem requested but not available", opal_show_help("help-btl-vader.txt", "knem requested but not available",
@ -559,7 +555,7 @@ failed:
void mca_btl_vader_poll_handle_frag (mca_btl_vader_hdr_t *hdr, struct mca_btl_base_endpoint_t *endpoint) void mca_btl_vader_poll_handle_frag (mca_btl_vader_hdr_t *hdr, struct mca_btl_base_endpoint_t *endpoint)
{ {
mca_btl_base_segment_t segments[2]; mca_btl_base_segment_t segments[2];
mca_btl_base_descriptor_t frag = {.des_local = segments, .des_local_count = 1}; mca_btl_base_descriptor_t frag = {.des_segments = segments, .des_segment_count = 1};
const mca_btl_active_message_callback_t *reg; const mca_btl_active_message_callback_t *reg;
if (hdr->flags & MCA_BTL_VADER_FLAG_COMPLETE) { if (hdr->flags & MCA_BTL_VADER_FLAG_COMPLETE) {
@ -579,7 +575,7 @@ void mca_btl_vader_poll_handle_frag (mca_btl_vader_hdr_t *hdr, struct mca_btl_ba
&segments[1].seg_addr.pval); &segments[1].seg_addr.pval);
segments[1].seg_len = hdr->sc_iov.iov_len; segments[1].seg_len = hdr->sc_iov.iov_len;
frag.des_local_count = 2; frag.des_segment_count = 2;
/* recv upcall */ /* recv upcall */
reg->cbfunc(&mca_btl_vader.super, hdr->tag, &frag, reg->cbdata); reg->cbfunc(&mca_btl_vader.super, hdr->tag, &frag, reg->cbdata);

Просмотреть файл

@ -204,7 +204,7 @@ static inline bool mca_btl_vader_check_fboxes (void)
/* the 0xff tag indicates we should skip the rest of the buffer */ /* the 0xff tag indicates we should skip the rest of the buffer */
if (OPAL_LIKELY((0xfe & hdr.data.tag) != 0xfe)) { if (OPAL_LIKELY((0xfe & hdr.data.tag) != 0xfe)) {
mca_btl_base_segment_t segment; mca_btl_base_segment_t segment;
mca_btl_base_descriptor_t desc = {.des_local = &segment, .des_local_count = 1}; mca_btl_base_descriptor_t desc = {.des_segments = &segment, .des_segment_count = 1};
const mca_btl_active_message_callback_t *reg = const mca_btl_active_message_callback_t *reg =
mca_btl_base_active_message_trigger + hdr.data.tag; mca_btl_base_active_message_trigger + hdr.data.tag;

Просмотреть файл

@ -31,11 +31,11 @@ static inline void mca_btl_vader_frag_constructor (mca_btl_vader_frag_t *frag)
if(frag->hdr != NULL) { if(frag->hdr != NULL) {
frag->hdr->frag = frag; frag->hdr->frag = frag;
frag->hdr->flags = 0; frag->hdr->flags = 0;
frag->segments[0].base.seg_addr.pval = (char *)(frag->hdr + 1); frag->segments[0].seg_addr.pval = (char *)(frag->hdr + 1);
} }
frag->base.des_local = &frag->segments->base; frag->base.des_segments = frag->segments;
frag->base.des_local_count = 1; frag->base.des_segment_count = 1;
frag->fbox = NULL; frag->fbox = NULL;
} }
@ -65,8 +65,6 @@ void mca_btl_vader_frag_init (ompi_free_list_item_t *item, void *ctx)
frag->my_list = &mca_btl_vader_component.vader_frags_eager; frag->my_list = &mca_btl_vader_component.vader_frags_eager;
} else if (mca_btl_vader.super.btl_max_send_size == data_size) { } else if (mca_btl_vader.super.btl_max_send_size == data_size) {
frag->my_list = &mca_btl_vader_component.vader_frags_max_send; frag->my_list = &mca_btl_vader_component.vader_frags_max_send;
} else {
frag->my_list = &mca_btl_vader_component.vader_frags_rdma;
} }
if (data_size) { if (data_size) {

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -57,15 +57,6 @@ struct mca_btl_vader_hdr_t {
}; };
typedef struct mca_btl_vader_hdr_t mca_btl_vader_hdr_t; typedef struct mca_btl_vader_hdr_t mca_btl_vader_hdr_t;
struct mca_btl_vader_segment_t {
mca_btl_base_segment_t base;
#if OPAL_BTL_VADER_HAVE_KNEM
uint64_t cookie;
intptr_t registered_base;
#endif
};
typedef struct mca_btl_vader_segment_t mca_btl_vader_segment_t;
/** /**
* shared memory send fragment derived type. * shared memory send fragment derived type.
*/ */
@ -73,7 +64,7 @@ struct mca_btl_vader_frag_t {
/** base object */ /** base object */
mca_btl_base_descriptor_t base; mca_btl_base_descriptor_t base;
/** storage for segment data (max 2) */ /** storage for segment data (max 2) */
mca_btl_vader_segment_t segments[2]; mca_btl_base_segment_t segments[2];
/** endpoint this fragment is active on */ /** endpoint this fragment is active on */
struct mca_btl_base_endpoint_t *endpoint; struct mca_btl_base_endpoint_t *endpoint;
/** fast box in use (or NULL) */ /** fast box in use (or NULL) */
@ -82,9 +73,6 @@ struct mca_btl_vader_frag_t {
mca_btl_vader_hdr_t *hdr; mca_btl_vader_hdr_t *hdr;
/** free list this fragment was allocated within */ /** free list this fragment was allocated within */
ompi_free_list_t *my_list; ompi_free_list_t *my_list;
#if OPAL_BTL_VADER_HAVE_KNEM
uint64_t cookie;
#endif
}; };
typedef struct mca_btl_vader_frag_t mca_btl_vader_frag_t; typedef struct mca_btl_vader_frag_t mca_btl_vader_frag_t;
@ -108,37 +96,16 @@ static inline int mca_btl_vader_frag_alloc (mca_btl_vader_frag_t **frag, ompi_fr
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
static inline int mca_btl_vader_frag_alloc_rdma (mca_btl_vader_frag_t **frag, ompi_free_list_t *list,
struct mca_btl_base_endpoint_t *endpoint) {
ompi_free_list_item_t *item;
OMPI_FREE_LIST_GET_MT(list, item);
*frag = (mca_btl_vader_frag_t *) item;
if (OPAL_LIKELY(NULL != item)) {
(*frag)->endpoint = endpoint;
}
return OPAL_SUCCESS;
}
static inline void mca_btl_vader_frag_return (mca_btl_vader_frag_t *frag) static inline void mca_btl_vader_frag_return (mca_btl_vader_frag_t *frag)
{ {
if (frag->hdr) { if (frag->hdr) {
frag->hdr->flags = 0; frag->hdr->flags = 0;
} }
frag->segments[0].base.seg_addr.pval = (char *)(frag->hdr + 1); frag->segments[0].seg_addr.pval = (char *)(frag->hdr + 1);
frag->base.des_local_count = 1; frag->base.des_segment_count = 1;
frag->fbox = NULL; frag->fbox = NULL;
#if OPAL_BTL_VADER_HAVE_KNEM
if (frag->cookie) {
/* NTH: explicity ignore the return code. Don't care about this cookie anymore anyway. */
(void) ioctl(mca_btl_vader.knem_fd, KNEM_CMD_DESTROY_REGION, &frag->cookie);
frag->cookie = 0;
}
#endif
OMPI_FREE_LIST_RETURN_MT(frag->my_list, (ompi_free_list_item_t *)frag); OMPI_FREE_LIST_RETURN_MT(frag->my_list, (ompi_free_list_item_t *)frag);
} }
@ -153,9 +120,6 @@ OBJ_CLASS_DECLARATION(mca_btl_vader_frag_t);
#define MCA_BTL_VADER_FRAG_ALLOC_USER(frag, endpoint) \ #define MCA_BTL_VADER_FRAG_ALLOC_USER(frag, endpoint) \
mca_btl_vader_frag_alloc (&(frag), &mca_btl_vader_component.vader_frags_user, endpoint) mca_btl_vader_frag_alloc (&(frag), &mca_btl_vader_component.vader_frags_user, endpoint)
#define MCA_BTL_VADER_FRAG_ALLOC_RDMA(frag, endpoint) \
mca_btl_vader_frag_alloc_rdma (&(frag), &mca_btl_vader_component.vader_frags_rdma, endpoint)
#define MCA_BTL_VADER_FRAG_RETURN(frag) mca_btl_vader_frag_return(frag) #define MCA_BTL_VADER_FRAG_RETURN(frag) mca_btl_vader_frag_return(frag)

Просмотреть файл

@ -33,47 +33,42 @@
* @param descriptor (IN) Description of the data to be transferred * @param descriptor (IN) Description of the data to be transferred
*/ */
#if OPAL_BTL_VADER_HAVE_XPMEM #if OPAL_BTL_VADER_HAVE_XPMEM
int mca_btl_vader_get_xpmem (struct mca_btl_base_module_t *btl, int mca_btl_vader_get_xpmem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des) mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) des;
mca_btl_base_segment_t *src = des->des_remote;
mca_btl_base_segment_t *dst = des->des_local;
const size_t size = min(dst->seg_len, src->seg_len);
mca_mpool_base_registration_t *reg; mca_mpool_base_registration_t *reg;
void *rem_ptr; void *rem_ptr;
reg = vader_get_registation (endpoint, src->seg_addr.pval, src->seg_len, 0, &rem_ptr); /* silence warning about unused arguments */
(void) local_handle;
(void) remote_handle;
reg = vader_get_registation (endpoint, (void *)(intptr_t) remote_address, size, 0, &rem_ptr);
if (OPAL_UNLIKELY(NULL == rem_ptr)) { if (OPAL_UNLIKELY(NULL == rem_ptr)) {
return OPAL_ERROR; return OPAL_ERROR;
} }
vader_memmove (dst->seg_addr.pval, rem_ptr, size); vader_memmove (local_address, rem_ptr, size);
vader_return_registration (reg, endpoint); vader_return_registration (reg, endpoint);
/* always call the callback function */ /* always call the callback function */
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
frag->endpoint = endpoint;
mca_btl_vader_frag_complete (frag);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
#endif #endif
#if OPAL_BTL_VADER_HAVE_CMA #if OPAL_BTL_VADER_HAVE_CMA
int mca_btl_vader_get_cma (struct mca_btl_base_module_t *btl, int mca_btl_vader_get_cma (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des) mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) des; struct iovec src_iov = {.iov_base = (void *)(intptr_t) remote_address, .iov_len = size};
mca_btl_base_segment_t *src = des->des_remote; struct iovec dst_iov = {.iov_base = local_address, .iov_len = size};
mca_btl_base_segment_t *dst = des->des_local;
const size_t size = min(dst->seg_len, src->seg_len);
struct iovec src_iov = {.iov_base = src->seg_addr.pval, .iov_len = size};
struct iovec dst_iov = {.iov_base = dst->seg_addr.pval, .iov_len = size};
ssize_t ret; ssize_t ret;
ret = process_vm_readv (endpoint->segment_data.other.seg_ds->seg_cpid, &dst_iov, 1, &src_iov, 1, 0); ret = process_vm_readv (endpoint->segment_data.other.seg_ds->seg_cpid, &dst_iov, 1, &src_iov, 1, 0);
@ -83,36 +78,29 @@ int mca_btl_vader_get_cma (struct mca_btl_base_module_t *btl,
} }
/* always call the callback function */ /* always call the callback function */
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
frag->endpoint = endpoint;
mca_btl_vader_frag_complete (frag);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
#endif #endif
#if OPAL_BTL_VADER_HAVE_KNEM #if OPAL_BTL_VADER_HAVE_KNEM
int mca_btl_vader_get_knem (struct mca_btl_base_module_t *btl, int mca_btl_vader_get_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des) mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) des;
mca_btl_vader_segment_t *src = (mca_btl_vader_segment_t *) des->des_remote;
mca_btl_vader_segment_t *dst = (mca_btl_vader_segment_t *) des->des_local;
const size_t size = min(dst->base.seg_len, src->base.seg_len);
intptr_t offset = src->base.seg_addr.lval - src->registered_base;
struct knem_cmd_param_iovec recv_iovec; struct knem_cmd_param_iovec recv_iovec;
struct knem_cmd_inline_copy icopy; struct knem_cmd_inline_copy icopy;
/* Fill in the ioctl data fields. There's no async completion, so /* Fill in the ioctl data fields. There's no async completion, so
we don't need to worry about getting a slot, etc. */ we don't need to worry about getting a slot, etc. */
recv_iovec.base = (uintptr_t) dst->base.seg_addr.lval; recv_iovec.base = (uintptr_t) local_address;
recv_iovec.len = size; recv_iovec.len = size;
icopy.local_iovec_array = (uintptr_t) &recv_iovec; icopy.local_iovec_array = (uintptr_t) &recv_iovec;
icopy.local_iovec_nr = 1; icopy.local_iovec_nr = 1;
icopy.remote_cookie = src->cookie; icopy.remote_cookie = remote_handle->cookie;
icopy.remote_offset = offset; icopy.remote_offset = remote_address - remote_handle->base_addr;
icopy.write = 0; icopy.write = 0;
icopy.flags = 0; icopy.flags = 0;
@ -120,7 +108,7 @@ int mca_btl_vader_get_knem (struct mca_btl_base_module_t *btl,
* is greater than the cutoff. Not that if DMA is not supported * is greater than the cutoff. Not that if DMA is not supported
* or the user specified 0 for knem_dma_min the knem_dma_min was * or the user specified 0 for knem_dma_min the knem_dma_min was
* set to UINT_MAX in mca_btl_vader_knem_init. */ * set to UINT_MAX in mca_btl_vader_knem_init. */
if (mca_btl_vader_component.knem_dma_min <= dst->base.seg_len) { if (mca_btl_vader_component.knem_dma_min <= size) {
icopy.flags = KNEM_FLAG_DMA; icopy.flags = KNEM_FLAG_DMA;
} }
/* synchronous flags only, no need to specify icopy.async_status_index */ /* synchronous flags only, no need to specify icopy.async_status_index */
@ -136,10 +124,7 @@ int mca_btl_vader_get_knem (struct mca_btl_base_module_t *btl,
} }
/* always call the callback function */ /* always call the callback function */
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
frag->endpoint = endpoint;
mca_btl_vader_frag_complete (frag);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -19,9 +19,83 @@
#include <unistd.h> #include <unistd.h>
#include "opal/util/show_help.h" #include "opal/util/show_help.h"
#include "opal/mca/mpool/grdma/mpool_grdma.h"
OBJ_CLASS_INSTANCE(mca_btl_vader_registration_handle_t, mca_mpool_base_registration_t, NULL, NULL);
static int mca_btl_vader_knem_reg (void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg)
{
mca_btl_vader_registration_handle_t *knem_reg = (mca_btl_vader_registration_handle_t *) reg;
struct knem_cmd_create_region knem_cr;
struct knem_cmd_param_iovec knem_iov;
knem_iov.base = (uintptr_t) base;
knem_iov.len = size;
knem_cr.iovec_array = (uintptr_t) &knem_iov;
knem_cr.iovec_nr = 1;
/* TODO -- set proper access flags when the protection is passed down */
knem_cr.protection = PROT_READ | PROT_WRITE;
/* Vader will explicitly destroy this cookie */
knem_cr.flags = 0;
if (OPAL_UNLIKELY(ioctl(mca_btl_vader.knem_fd, KNEM_CMD_CREATE_REGION, &knem_cr) < 0)) {
return OPAL_ERROR;
}
knem_reg->btl_handle.cookie = knem_cr.cookie;
knem_reg->btl_handle.base_addr = (intptr_t) base;
return OPAL_SUCCESS;
}
static int mca_btl_vader_knem_dereg (void *reg_data, mca_mpool_base_registration_t *reg)
{
mca_btl_vader_registration_handle_t *knem_reg = (mca_btl_vader_registration_handle_t *) reg;
/* NTH: explicity ignore the return code. Don't care about this cookie anymore anyway. */
(void) ioctl(mca_btl_vader.knem_fd, KNEM_CMD_DESTROY_REGION, &knem_reg->btl_handle.cookie);
return OPAL_SUCCESS;
}
static mca_btl_base_registration_handle_t *
mca_btl_vader_register_mem_knem (struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t *endpoint,
void *base, size_t size, uint32_t flags)
{
mca_btl_vader_registration_handle_t *reg = NULL;
int rc;
rc = btl->btl_mpool->mpool_register (btl->btl_mpool, base, size, 0,
(mca_mpool_base_registration_t **) &reg);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return NULL;
}
return &reg->btl_handle;
}
static int
mca_btl_vader_deregister_mem_knem (struct mca_btl_base_module_t *btl, struct mca_btl_base_registration_handle_t *handle)
{
mca_btl_vader_registration_handle_t *reg =
(mca_btl_vader_registration_handle_t *)((intptr_t) handle - offsetof (mca_btl_vader_registration_handle_t, btl_handle));
btl->btl_mpool->mpool_deregister (btl->btl_mpool, &reg->base);
return OPAL_SUCCESS;
}
int mca_btl_vader_knem_init (void) int mca_btl_vader_knem_init (void)
{ {
mca_mpool_base_resources_t mpool_resources = {
.pool_name = "vader", .reg_data = NULL,
.sizeof_reg = sizeof (mca_btl_vader_registration_handle_t),
.register_mem = mca_btl_vader_knem_reg,
.deregister_mem = mca_btl_vader_knem_dereg
};
struct knem_cmd_info knem_info; struct knem_cmd_info knem_info;
int rc; int rc;
@ -74,6 +148,17 @@ int mca_btl_vader_knem_init (void)
mca_btl_vader.super.btl_get = mca_btl_vader_get_knem; mca_btl_vader.super.btl_get = mca_btl_vader_get_knem;
mca_btl_vader.super.btl_put = mca_btl_vader_put_knem; mca_btl_vader.super.btl_put = mca_btl_vader_put_knem;
/* knem requires registration */
mca_btl_vader.super.btl_register_mem = mca_btl_vader_register_mem_knem;
mca_btl_vader.super.btl_deregister_mem = mca_btl_vader_deregister_mem_knem;
mca_btl_vader.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);
mca_btl_vader.super.btl_mpool = mca_mpool_base_module_create ("grdma", NULL,
&mpool_resources);
if (NULL == mca_btl_vader.super.btl_mpool) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
return OPAL_SUCCESS; return OPAL_SUCCESS;
} while (0); } while (0);
@ -89,6 +174,11 @@ int mca_btl_vader_knem_fini (void)
mca_btl_vader.knem_fd = -1; mca_btl_vader.knem_fd = -1;
} }
if (mca_btl_vader.super.btl_mpool) {
(void) mca_mpool_base_module_destroy (mca_btl_vader.super.btl_mpool);
mca_btl_vader.super.btl_mpool = NULL;
}
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/* /*
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -17,6 +17,18 @@
#include <knem_io.h> #include <knem_io.h>
#include <sys/mman.h> #include <sys/mman.h>
/* At this time only knem requires a registration of "RDMA" buffers */
struct mca_btl_base_registration_handle_t {
uint64_t cookie;
intptr_t base_addr;
};
struct mca_btl_vader_registration_handle_t {
mca_mpool_base_registration_t base;
mca_btl_base_registration_handle_t btl_handle;
};
typedef struct mca_btl_vader_registration_handle_t mca_btl_vader_registration_handle_t;
int mca_btl_vader_knem_init (void); int mca_btl_vader_knem_init (void);
int mca_btl_vader_knem_fini (void); int mca_btl_vader_knem_fini (void);
int mca_btl_vader_knem_progress (void); int mca_btl_vader_knem_progress (void);

Просмотреть файл

@ -48,7 +48,6 @@ static int vader_free (struct mca_btl_base_module_t* btl, mca_btl_base_descripto
static struct mca_btl_base_descriptor_t *vader_prepare_src ( static struct mca_btl_base_descriptor_t *vader_prepare_src (
struct mca_btl_base_module_t *btl, struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint, struct mca_btl_base_endpoint_t *endpoint,
mca_mpool_base_registration_t *registration,
struct opal_convertor_t *convertor, struct opal_convertor_t *convertor,
uint8_t order, uint8_t order,
size_t reserve, size_t reserve,
@ -56,16 +55,6 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (
uint32_t flags uint32_t flags
); );
static struct mca_btl_base_descriptor_t *vader_prepare_dst (
struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
struct mca_mpool_base_registration_t *registration,
struct opal_convertor_t *convertor,
uint8_t order,
size_t reserve,
size_t *size,
uint32_t flags);
static int vader_add_procs(struct mca_btl_base_module_t* btl, static int vader_add_procs(struct mca_btl_base_module_t* btl,
size_t nprocs, struct opal_proc_t **procs, size_t nprocs, struct opal_proc_t **procs,
struct mca_btl_base_endpoint_t** peers, struct mca_btl_base_endpoint_t** peers,
@ -82,7 +71,6 @@ mca_btl_vader_t mca_btl_vader = {
.btl_alloc = mca_btl_vader_alloc, .btl_alloc = mca_btl_vader_alloc,
.btl_free = vader_free, .btl_free = vader_free,
.btl_prepare_src = vader_prepare_src, .btl_prepare_src = vader_prepare_src,
.btl_prepare_dst = vader_prepare_dst,
.btl_send = mca_btl_vader_send, .btl_send = mca_btl_vader_send,
.btl_sendi = mca_btl_vader_sendi, .btl_sendi = mca_btl_vader_sendi,
.btl_dump = mca_btl_base_dump, .btl_dump = mca_btl_base_dump,
@ -108,21 +96,6 @@ static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n)
component->segment_offset = MCA_BTL_VADER_FIFO_SIZE; component->segment_offset = MCA_BTL_VADER_FIFO_SIZE;
/* initialize fragment descriptor free lists */ /* initialize fragment descriptor free lists */
/* initialize free list for single copy (get, put) */
if (MCA_BTL_VADER_NONE != mca_btl_vader_component.single_copy_mechanism) {
rc = ompi_free_list_init_ex_new (&component->vader_frags_rdma,
sizeof(mca_btl_vader_frag_t), 8,
OBJ_CLASS(mca_btl_vader_frag_t),
0, opal_cache_line_size,
component->vader_free_list_num,
component->vader_free_list_max,
component->vader_free_list_inc,
NULL, mca_btl_vader_frag_init, (void *) 0);
if (OPAL_SUCCESS != rc) {
return rc;
}
}
/* initialize free list for small send and inline fragments */ /* initialize free list for small send and inline fragments */
rc = ompi_free_list_init_ex_new(&component->vader_frags_user, rc = ompi_free_list_init_ex_new(&component->vader_frags_user,
sizeof(mca_btl_vader_frag_t), sizeof(mca_btl_vader_frag_t),
@ -418,7 +391,7 @@ mca_btl_base_descriptor_t *mca_btl_vader_alloc(struct mca_btl_base_module_t *btl
} }
if (OPAL_LIKELY(frag != NULL)) { if (OPAL_LIKELY(frag != NULL)) {
frag->segments[0].base.seg_len = size; frag->segments[0].seg_len = size;
frag->base.des_flags = flags; frag->base.des_flags = flags;
frag->base.order = order; frag->base.order = order;
@ -440,56 +413,6 @@ static int vader_free (struct mca_btl_base_module_t *btl, mca_btl_base_descripto
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
struct mca_btl_base_descriptor_t *vader_prepare_dst(struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
struct mca_mpool_base_registration_t *registration,
struct opal_convertor_t *convertor,
uint8_t order, size_t reserve, size_t *size,
uint32_t flags)
{
mca_btl_vader_frag_t *frag;
void *data_ptr;
(void) MCA_BTL_VADER_FRAG_ALLOC_RDMA(frag, endpoint);
if (OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
opal_convertor_get_current_pointer (convertor, &data_ptr);
frag->segments[0].base.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr;
frag->segments[0].base.seg_len = *size;
#if OPAL_BTL_VADER_HAVE_KNEM
if (MCA_BTL_VADER_KNEM == mca_btl_vader_component.single_copy_mechanism) {
struct knem_cmd_create_region knem_cr;
struct knem_cmd_param_iovec knem_iov;
knem_iov.base = (uintptr_t) data_ptr;
knem_iov.len = *size;
knem_cr.iovec_array = (uintptr_t) &knem_iov;
knem_cr.iovec_nr = 1;
knem_cr.protection = PROT_WRITE;
/* Vader will explicitly destroy this cookie */
knem_cr.flags = 0;
if (OPAL_UNLIKELY(ioctl(mca_btl_vader.knem_fd, KNEM_CMD_CREATE_REGION, &knem_cr) < 0)) {
MCA_BTL_VADER_FRAG_RETURN(frag);
return NULL;
}
frag->segments[0].cookie = knem_cr.cookie;
frag->segments[0].registered_base = (intptr_t) data_ptr;
frag->cookie = knem_cr.cookie;
}
#endif /* OPAL_BTL_SM_HAVE_KNEM */
frag->base.order = order;
frag->base.des_flags = flags;
return &frag->base;
}
/** /**
* Pack data * Pack data
* *
@ -497,7 +420,6 @@ struct mca_btl_base_descriptor_t *vader_prepare_dst(struct mca_btl_base_module_t
*/ */
static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_module_t *btl, static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint, struct mca_btl_base_endpoint_t *endpoint,
mca_mpool_base_registration_t *registration,
struct opal_convertor_t *convertor, struct opal_convertor_t *convertor,
uint8_t order, size_t reserve, size_t *size, uint8_t order, size_t reserve, size_t *size,
uint32_t flags) uint32_t flags)
@ -510,118 +432,84 @@ static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_
opal_convertor_get_current_pointer (convertor, &data_ptr); opal_convertor_get_current_pointer (convertor, &data_ptr);
if (OPAL_LIKELY(reserve)) { /* in place send fragment */
/* in place send fragment */ if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) {
if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) { uint32_t iov_count = 1;
uint32_t iov_count = 1; struct iovec iov;
struct iovec iov;
/* non-contiguous data requires the convertor */ /* non-contiguous data requires the convertor */
if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism && if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism &&
total_size > mca_btl_vader.super.btl_eager_limit) { total_size > mca_btl_vader.super.btl_eager_limit) {
(void) MCA_BTL_VADER_FRAG_ALLOC_MAX(frag, endpoint); (void) MCA_BTL_VADER_FRAG_ALLOC_MAX(frag, endpoint);
} else } else
(void) MCA_BTL_VADER_FRAG_ALLOC_EAGER(frag, endpoint); (void) MCA_BTL_VADER_FRAG_ALLOC_EAGER(frag, endpoint);
if (OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
iov.iov_len = *size;
iov.iov_base =
(IOVBASE_TYPE *)(((uintptr_t)(frag->segments[0].base.seg_addr.pval)) +
reserve);
rc = opal_convertor_pack (convertor, &iov, &iov_count, size);
if (OPAL_UNLIKELY(rc < 0)) {
MCA_BTL_VADER_FRAG_RETURN(frag);
return NULL;
}
frag->segments[0].base.seg_len = *size + reserve;
} else {
if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism) {
if (OPAL_LIKELY(total_size <= mca_btl_vader.super.btl_eager_limit)) {
(void) MCA_BTL_VADER_FRAG_ALLOC_EAGER(frag, endpoint);
} else {
(void) MCA_BTL_VADER_FRAG_ALLOC_MAX(frag, endpoint);
}
} else
(void) MCA_BTL_VADER_FRAG_ALLOC_USER(frag, endpoint);
if (OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
#if OPAL_BTL_VADER_HAVE_XPMEM
/* use xpmem to send this segment if it is above the max inline send size */
if (OPAL_UNLIKELY(MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism &&
total_size > (size_t) mca_btl_vader_component.max_inline_send)) {
/* single copy send */
frag->hdr->flags = MCA_BTL_VADER_FLAG_SINGLE_COPY;
/* set up single copy io vector */
frag->hdr->sc_iov.iov_base = data_ptr;
frag->hdr->sc_iov.iov_len = *size;
frag->segments[0].base.seg_len = reserve;
frag->segments[1].base.seg_len = *size;
frag->segments[1].base.seg_addr.pval = data_ptr;
frag->base.des_local_count = 2;
} else {
#endif
/* inline send */
if (OPAL_LIKELY(MCA_BTL_DES_FLAGS_BTL_OWNERSHIP & flags)) {
/* try to reserve a fast box for this transfer only if the
* fragment does not belong to the caller */
fbox = mca_btl_vader_reserve_fbox (endpoint, total_size);
if (OPAL_LIKELY(fbox)) {
frag->segments[0].base.seg_addr.pval = fbox;
}
frag->fbox = fbox;
}
/* NTH: the covertor adds some latency so we bypass it here */
memcpy ((void *)((uintptr_t)frag->segments[0].base.seg_addr.pval + reserve), data_ptr, *size);
frag->segments[0].base.seg_len = total_size;
#if OPAL_BTL_VADER_HAVE_XPMEM
}
#endif
}
} else {
/* put/get fragment */
(void) MCA_BTL_VADER_FRAG_ALLOC_RDMA(frag, endpoint);
if (OPAL_UNLIKELY(NULL == frag)) { if (OPAL_UNLIKELY(NULL == frag)) {
return NULL; return NULL;
} }
frag->segments[0].base.seg_addr.lval = (uint64_t)(uintptr_t) data_ptr; iov.iov_len = *size;
frag->segments[0].base.seg_len = total_size; iov.iov_base =
#if OPAL_BTL_VADER_HAVE_KNEM (IOVBASE_TYPE *)(((uintptr_t)(frag->segments[0].seg_addr.pval)) +
if (MCA_BTL_VADER_KNEM == mca_btl_vader_component.single_copy_mechanism) { reserve);
struct knem_cmd_create_region knem_cr;
struct knem_cmd_param_iovec knem_iov;
knem_iov.base = (uintptr_t) data_ptr; rc = opal_convertor_pack (convertor, &iov, &iov_count, size);
knem_iov.len = total_size; if (OPAL_UNLIKELY(rc < 0)) {
MCA_BTL_VADER_FRAG_RETURN(frag);
return NULL;
}
knem_cr.iovec_array = (uintptr_t) &knem_iov; frag->segments[0].seg_len = *size + reserve;
knem_cr.iovec_nr = 1; } else {
knem_cr.protection = PROT_READ | PROT_WRITE; if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism) {
/* Vader will explicitly destroy this cookie */ if (OPAL_LIKELY(total_size <= mca_btl_vader.super.btl_eager_limit)) {
knem_cr.flags = 0; (void) MCA_BTL_VADER_FRAG_ALLOC_EAGER(frag, endpoint);
if (OPAL_UNLIKELY(ioctl(mca_btl_vader.knem_fd, KNEM_CMD_CREATE_REGION, &knem_cr) < 0)) { } else {
MCA_BTL_VADER_FRAG_RETURN(frag); (void) MCA_BTL_VADER_FRAG_ALLOC_MAX(frag, endpoint);
return NULL; }
} else
(void) MCA_BTL_VADER_FRAG_ALLOC_USER(frag, endpoint);
if (OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
#if OPAL_BTL_VADER_HAVE_XPMEM
/* use xpmem to send this segment if it is above the max inline send size */
if (OPAL_UNLIKELY(MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism &&
total_size > (size_t) mca_btl_vader_component.max_inline_send)) {
/* single copy send */
frag->hdr->flags = MCA_BTL_VADER_FLAG_SINGLE_COPY;
/* set up single copy io vector */
frag->hdr->sc_iov.iov_base = data_ptr;
frag->hdr->sc_iov.iov_len = *size;
frag->segments[0].seg_len = reserve;
frag->segments[1].seg_len = *size;
frag->segments[1].seg_addr.pval = data_ptr;
frag->base.des_segment_count = 2;
} else {
#endif
/* inline send */
if (OPAL_LIKELY(MCA_BTL_DES_FLAGS_BTL_OWNERSHIP & flags)) {
/* try to reserve a fast box for this transfer only if the
* fragment does not belong to the caller */
fbox = mca_btl_vader_reserve_fbox (endpoint, total_size);
if (OPAL_LIKELY(fbox)) {
frag->segments[0].seg_addr.pval = fbox;
}
frag->fbox = fbox;
} }
frag->segments[0].cookie = knem_cr.cookie; /* NTH: the covertor adds some latency so we bypass it here */
frag->segments[0].registered_base = (intptr_t) data_ptr; memcpy ((void *)((uintptr_t)frag->segments[0].seg_addr.pval + reserve), data_ptr, *size);
frag->cookie = knem_cr.cookie; frag->segments[0].seg_len = total_size;
#if OPAL_BTL_VADER_HAVE_XPMEM
} }
#endif /* OPAL_BTL_SM_HAVE_KNEM */ #endif
} }
frag->base.order = order; frag->base.order = order;

Просмотреть файл

@ -35,47 +35,38 @@
* @param descriptor (IN) Description of the data to be transferred * @param descriptor (IN) Description of the data to be transferred
*/ */
#if OPAL_BTL_VADER_HAVE_XPMEM #if OPAL_BTL_VADER_HAVE_XPMEM
int mca_btl_vader_put_xpmem (struct mca_btl_base_module_t *btl, int mca_btl_vader_put_xpmem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des) mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) des;
mca_btl_base_segment_t *src = des->des_local;
mca_btl_base_segment_t *dst = des->des_remote;
const size_t size = min(dst->seg_len, src->seg_len);
mca_mpool_base_registration_t *reg; mca_mpool_base_registration_t *reg;
void *rem_ptr; void *rem_ptr;
reg = vader_get_registation (endpoint, dst->seg_addr.pval, dst->seg_len, 0, &rem_ptr); reg = vader_get_registation (endpoint, (void *)(intptr_t) remote_address, size, 0, &rem_ptr);
if (OPAL_UNLIKELY(NULL == reg)) { if (OPAL_UNLIKELY(NULL == reg)) {
return OPAL_ERROR; return OPAL_ERROR;
} }
vader_memmove (rem_ptr, src->seg_addr.pval, size); vader_memmove (rem_ptr, local_address, size);
vader_return_registration (reg, endpoint); vader_return_registration (reg, endpoint);
/* always call the callback function */ /* always call the callback function */
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
frag->endpoint = endpoint;
mca_btl_vader_frag_complete (frag);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
#endif #endif
#if OPAL_BTL_VADER_HAVE_CMA #if OPAL_BTL_VADER_HAVE_CMA
int mca_btl_vader_put_cma (struct mca_btl_base_module_t *btl, int mca_btl_vader_put_cma (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des) mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) des; struct iovec src_iov = {.iov_base = local_address, .iov_len = size};
mca_btl_base_segment_t *src = des->des_local; struct iovec dst_iov = {.iov_base = (void *)(intptr_t) remote_address, .iov_len = size};
mca_btl_base_segment_t *dst = des->des_remote;
const size_t size = min(dst->seg_len, src->seg_len);
struct iovec src_iov = {.iov_base = src->seg_addr.pval, .iov_len = size};
struct iovec dst_iov = {.iov_base = dst->seg_addr.pval, .iov_len = size};
ssize_t ret; ssize_t ret;
ret = process_vm_writev (endpoint->segment_data.other.seg_ds->seg_cpid, &src_iov, 1, &dst_iov, 1, 0); ret = process_vm_writev (endpoint->segment_data.other.seg_ds->seg_cpid, &src_iov, 1, &dst_iov, 1, 0);
@ -85,36 +76,29 @@ int mca_btl_vader_put_cma (struct mca_btl_base_module_t *btl,
} }
/* always call the callback function */ /* always call the callback function */
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
frag->endpoint = endpoint;
mca_btl_vader_frag_complete (frag);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
#endif #endif
#if OPAL_BTL_VADER_HAVE_KNEM #if OPAL_BTL_VADER_HAVE_KNEM
int mca_btl_vader_put_knem (struct mca_btl_base_module_t *btl, int mca_btl_vader_put_knem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *local_address,
struct mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_descriptor_t *des) mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{ {
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) des;
mca_btl_vader_segment_t *src = (mca_btl_vader_segment_t *) des->des_local;
mca_btl_vader_segment_t *dst = (mca_btl_vader_segment_t *) des->des_remote;
const size_t size = min(dst->base.seg_len, src->base.seg_len);
intptr_t offset = dst->base.seg_addr.lval - dst->registered_base;
struct knem_cmd_param_iovec send_iovec; struct knem_cmd_param_iovec send_iovec;
struct knem_cmd_inline_copy icopy; struct knem_cmd_inline_copy icopy;
/* Fill in the ioctl data fields. There's no async completion, so /* Fill in the ioctl data fields. There's no async completion, so
we don't need to worry about getting a slot, etc. */ we don't need to worry about getting a slot, etc. */
send_iovec.base = (uintptr_t) src->base.seg_addr.lval; send_iovec.base = (uintptr_t) local_address;
send_iovec.len = size; send_iovec.len = size;
icopy.local_iovec_array = (uintptr_t) &send_iovec; icopy.local_iovec_array = (uintptr_t) &send_iovec;
icopy.local_iovec_nr = 1; icopy.local_iovec_nr = 1;
icopy.remote_cookie = dst->cookie; icopy.remote_cookie = remote_handle->cookie;
icopy.remote_offset = offset; icopy.remote_offset = remote_address - remote_handle->base_addr;
icopy.write = 1; icopy.write = 1;
icopy.flags = 0; icopy.flags = 0;
@ -122,7 +106,7 @@ int mca_btl_vader_put_knem (struct mca_btl_base_module_t *btl,
* is greater than the cutoff. Not that if DMA is not supported * is greater than the cutoff. Not that if DMA is not supported
* or the user specified 0 for knem_dma_min the knem_dma_min was * or the user specified 0 for knem_dma_min the knem_dma_min was
* set to UINT_MAX in mca_btl_vader_knem_init. */ * set to UINT_MAX in mca_btl_vader_knem_init. */
if (mca_btl_vader_component.knem_dma_min <= dst->base.seg_len) { if (mca_btl_vader_component.knem_dma_min <= size) {
icopy.flags = KNEM_FLAG_DMA; icopy.flags = KNEM_FLAG_DMA;
} }
/* synchronous flags only, no need to specify icopy.async_status_index */ /* synchronous flags only, no need to specify icopy.async_status_index */
@ -138,10 +122,7 @@ int mca_btl_vader_put_knem (struct mca_btl_base_module_t *btl,
} }
/* always call the callback function */ /* always call the callback function */
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
frag->endpoint = endpoint;
mca_btl_vader_frag_complete (frag);
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }

Просмотреть файл

@ -40,7 +40,7 @@ int mca_btl_vader_send (struct mca_btl_base_module_t *btl,
mca_btl_base_tag_t tag) mca_btl_base_tag_t tag)
{ {
mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) descriptor; mca_btl_vader_frag_t *frag = (mca_btl_vader_frag_t *) descriptor;
const size_t total_size = frag->segments[0].base.seg_len; const size_t total_size = frag->segments[0].seg_len;
if (OPAL_LIKELY(frag->fbox)) { if (OPAL_LIKELY(frag->fbox)) {
mca_btl_vader_fbox_send (frag->fbox, tag); mca_btl_vader_fbox_send (frag->fbox, tag);

Просмотреть файл

@ -12,7 +12,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2014 Los Alamos National Security, LLC. All rights * Copyright (c) 2010-2015 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -49,7 +49,10 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
/* don't attempt sendi if there are pending fragments on the endpoint */ /* don't attempt sendi if there are pending fragments on the endpoint */
if (OPAL_UNLIKELY(opal_list_get_size (&endpoint->pending_frags))) { if (OPAL_UNLIKELY(opal_list_get_size (&endpoint->pending_frags))) {
*descriptor = NULL; if (descriptor) {
*descriptor = NULL;
}
return OPAL_ERR_OUT_OF_RESOURCE; return OPAL_ERR_OUT_OF_RESOURCE;
} }
@ -68,7 +71,9 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
frag = (mca_btl_vader_frag_t *) mca_btl_vader_alloc (btl, endpoint, order, length, frag = (mca_btl_vader_frag_t *) mca_btl_vader_alloc (btl, endpoint, order, length,
flags | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); flags | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
if (OPAL_UNLIKELY(NULL == frag)) { if (OPAL_UNLIKELY(NULL == frag)) {
*descriptor = NULL; if (descriptor) {
*descriptor = NULL;
}
return OPAL_ERR_OUT_OF_RESOURCE; return OPAL_ERR_OUT_OF_RESOURCE;
} }
@ -78,7 +83,7 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
frag->hdr->tag = tag; frag->hdr->tag = tag;
/* write the match header (with MPI comm/tag/etc. info) */ /* write the match header (with MPI comm/tag/etc. info) */
memcpy (frag->segments[0].base.seg_addr.pval, header, header_size); memcpy (frag->segments[0].seg_addr.pval, header, header_size);
/* write the message data if there is any */ /* write the message data if there is any */
/* we can't use single-copy semantics here since as caller will consider the send /* we can't use single-copy semantics here since as caller will consider the send
@ -88,7 +93,7 @@ int mca_btl_vader_sendi (struct mca_btl_base_module_t *btl,
struct iovec iov; struct iovec iov;
/* pack the data into the supplied buffer */ /* pack the data into the supplied buffer */
iov.iov_base = (IOVBASE_TYPE *)((uintptr_t)frag->segments[0].base.seg_addr.pval + header_size); iov.iov_base = (IOVBASE_TYPE *)((uintptr_t)frag->segments[0].seg_addr.pval + header_size);
iov.iov_len = length = payload_size; iov.iov_len = length = payload_size;
(void) opal_convertor_pack (convertor, &iov, &iov_count, &length); (void) opal_convertor_pack (convertor, &iov, &iov_count, &length);