btl/self: rewrite to decrease memory usage (#2307)
This commit rewrites much of the btl/self component to fix a long standing memory usage bug. Before this commit the prepare_src path would always allocate a max send fragment (256kB). This caused the rank to allocate 32 * 256k useless buffers from one send. This commit makes the following changes: - Add the MCA_BTL_FLAGS_GET flag by default. No reason not to set it. - Reduce the eager limit, max send size, buffers per allocation, and maximum buffer count per fragment size. These changes should have no noticible affect on performance but should greatly reduce the memory usage of the component. - Implement the sendi function. This should reduce self send latency somewhat. - Rewrite prepare_src to never allocate a eager or max send fragment for contiguous data. - add_procs needs to return something in the peer array for the proc self not just set the reachability bit. Now stores (void *) 1. - Various cleanups. Removed and unused file. Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
родитель
83e3323646
Коммит
9d92075e60
@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012-2013 Inria. All rights reserved.
|
||||
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -24,68 +24,55 @@
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "opal/class/opal_bitmap.h"
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "opal/sys/atomic.h"
|
||||
#include "opal/mca/btl/btl.h"
|
||||
#include "opal/mca/mpool/base/base.h"
|
||||
#include "btl_self.h"
|
||||
#include "btl_self_frag.h"
|
||||
#include "opal/util/proc.h"
|
||||
|
||||
static int mca_btl_self_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
|
||||
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
|
||||
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
|
||||
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
|
||||
|
||||
static int mca_btl_self_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
|
||||
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
|
||||
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
|
||||
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
|
||||
|
||||
mca_btl_base_module_t mca_btl_self = {
|
||||
.btl_component = &mca_btl_self_component.super,
|
||||
.btl_add_procs = mca_btl_self_add_procs,
|
||||
.btl_del_procs = mca_btl_self_del_procs,
|
||||
.btl_finalize = mca_btl_self_finalize,
|
||||
.btl_alloc = mca_btl_self_alloc,
|
||||
.btl_free = mca_btl_self_free,
|
||||
.btl_prepare_src = mca_btl_self_prepare_src,
|
||||
.btl_send = mca_btl_self_send,
|
||||
.btl_put = mca_btl_self_put,
|
||||
.btl_get = mca_btl_self_get,
|
||||
.btl_dump = mca_btl_base_dump,
|
||||
.btl_ft_event = mca_btl_self_ft_event,
|
||||
};
|
||||
|
||||
|
||||
int mca_btl_self_add_procs( struct mca_btl_base_module_t* btl,
|
||||
size_t nprocs,
|
||||
struct opal_proc_t **procs,
|
||||
struct mca_btl_base_endpoint_t **peers,
|
||||
opal_bitmap_t* reachability )
|
||||
/**
|
||||
* PML->BTL notification of change in the process list.
|
||||
* PML->BTL Notification that a receive fragment has been matched.
|
||||
* Called for message that is send from process with the virtual
|
||||
* address of the shared memory segment being different than that of
|
||||
* the receiver.
|
||||
*
|
||||
* @param btl (IN)
|
||||
* @param proc (IN)
|
||||
* @param peer (OUT)
|
||||
* @return OPAL_SUCCESS or error status on failure.
|
||||
*
|
||||
*/
|
||||
static int mca_btl_self_add_procs (struct mca_btl_base_module_t *btl, size_t nprocs,
|
||||
struct opal_proc_t **procs,
|
||||
struct mca_btl_base_endpoint_t **peers,
|
||||
opal_bitmap_t* reachability)
|
||||
{
|
||||
int i;
|
||||
|
||||
for( i = 0; i < (int)nprocs; i++ ) {
|
||||
for (int i = 0; i < (int)nprocs; i++ ) {
|
||||
if( 0 == opal_compare_proc(procs[i]->proc_name, OPAL_PROC_MY_NAME) ) {
|
||||
opal_bitmap_set_bit( reachability, i );
|
||||
/* need to return something to keep the bml from ignoring us */
|
||||
peers[i] = (struct mca_btl_base_endpoint_t *) 1;
|
||||
break; /* there will always be only one ... */
|
||||
}
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int mca_btl_self_del_procs( struct mca_btl_base_module_t* btl,
|
||||
size_t nprocs,
|
||||
struct opal_proc_t **procs,
|
||||
struct mca_btl_base_endpoint_t **peers )
|
||||
/**
|
||||
* PML->BTL notification of change in the process list.
|
||||
*
|
||||
* @param btl (IN) BTL instance
|
||||
* @param proc (IN) Peer process
|
||||
* @param peer (IN) Peer addressing information.
|
||||
* @return Status indicating if cleanup was successful
|
||||
*
|
||||
*/
|
||||
static int mca_btl_self_del_procs (struct mca_btl_base_module_t *btl, size_t nprocs,
|
||||
struct opal_proc_t **procs,
|
||||
struct mca_btl_base_endpoint_t **peers)
|
||||
{
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
@ -104,7 +91,7 @@ int mca_btl_self_del_procs( struct mca_btl_base_module_t* btl,
|
||||
*
|
||||
*/
|
||||
|
||||
int mca_btl_self_finalize(struct mca_btl_base_module_t* btl)
|
||||
static int mca_btl_self_finalize(struct mca_btl_base_module_t* btl)
|
||||
{
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
@ -116,29 +103,29 @@ int mca_btl_self_finalize(struct mca_btl_base_module_t* btl)
|
||||
* @param btl (IN) BTL module
|
||||
* @param size (IN) Request segment size.
|
||||
*/
|
||||
mca_btl_base_descriptor_t* mca_btl_self_alloc(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
uint8_t order,
|
||||
size_t size,
|
||||
uint32_t flags)
|
||||
static mca_btl_base_descriptor_t *mca_btl_self_alloc (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
uint8_t order, size_t size, uint32_t flags)
|
||||
{
|
||||
mca_btl_self_frag_t* frag = NULL;
|
||||
mca_btl_self_frag_t *frag = NULL;
|
||||
|
||||
if(size <= mca_btl_self.btl_eager_limit) {
|
||||
if (size <= MCA_BTL_SELF_MAX_INLINE_SIZE) {
|
||||
MCA_BTL_SELF_FRAG_ALLOC_RDMA(frag);
|
||||
} else if (size <= mca_btl_self.btl_eager_limit) {
|
||||
MCA_BTL_SELF_FRAG_ALLOC_EAGER(frag);
|
||||
} else if (size <= btl->btl_max_send_size) {
|
||||
MCA_BTL_SELF_FRAG_ALLOC_SEND(frag);
|
||||
}
|
||||
|
||||
if( OPAL_UNLIKELY(NULL == frag) ) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frag->segment.seg_len = size;
|
||||
frag->base.des_flags = flags;
|
||||
frag->base.des_segments = &(frag->segment);
|
||||
frag->segments[0].seg_len = size;
|
||||
frag->base.des_segment_count = 1;
|
||||
return (mca_btl_base_descriptor_t*)frag;
|
||||
frag->base.des_flags = flags;
|
||||
|
||||
return &frag->base;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -147,90 +134,57 @@ mca_btl_base_descriptor_t* mca_btl_self_alloc(
|
||||
* @param btl (IN) BTL module
|
||||
* @param segment (IN) Allocated segment.
|
||||
*/
|
||||
int mca_btl_self_free( struct mca_btl_base_module_t* btl,
|
||||
mca_btl_base_descriptor_t* des )
|
||||
static int mca_btl_self_free (struct mca_btl_base_module_t *btl, mca_btl_base_descriptor_t *des)
|
||||
{
|
||||
mca_btl_self_frag_t* frag = (mca_btl_self_frag_t*)des;
|
||||
MCA_BTL_SELF_FRAG_RETURN((mca_btl_self_frag_t *) des);
|
||||
|
||||
frag->base.des_segments = NULL;
|
||||
frag->base.des_segment_count = 0;
|
||||
|
||||
if(frag->size == mca_btl_self.btl_eager_limit) {
|
||||
MCA_BTL_SELF_FRAG_RETURN_EAGER(frag);
|
||||
} else if (frag->size == mca_btl_self.btl_max_send_size) {
|
||||
MCA_BTL_SELF_FRAG_RETURN_SEND(frag);
|
||||
} else {
|
||||
MCA_BTL_SELF_FRAG_RETURN_RDMA(frag);
|
||||
}
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Prepare data for send/put
|
||||
* Prepare data for send
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
*/
|
||||
struct mca_btl_base_descriptor_t*
|
||||
mca_btl_self_prepare_src( struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
struct opal_convertor_t* convertor,
|
||||
uint8_t order,
|
||||
size_t reserve,
|
||||
size_t* size,
|
||||
uint32_t flags )
|
||||
static struct mca_btl_base_descriptor_t *mca_btl_self_prepare_src (struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct opal_convertor_t *convertor,
|
||||
uint8_t order, size_t reserve,
|
||||
size_t *size, uint32_t flags)
|
||||
{
|
||||
mca_btl_self_frag_t* frag;
|
||||
struct iovec iov;
|
||||
uint32_t iov_count = 1;
|
||||
size_t max_data = *size;
|
||||
int rc;
|
||||
bool inline_send = !opal_convertor_need_buffers(convertor);
|
||||
size_t buffer_len = reserve + (inline_send ? 0 : *size);
|
||||
mca_btl_self_frag_t *frag;
|
||||
|
||||
frag = (mca_btl_self_frag_t *) mca_btl_self_alloc (btl, endpoint, order, buffer_len, flags);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* non-contigous data */
|
||||
if( opal_convertor_need_buffers(convertor) ||
|
||||
max_data < mca_btl_self.btl_max_send_size ||
|
||||
reserve != 0 ) {
|
||||
if (OPAL_UNLIKELY(!inline_send)) {
|
||||
struct iovec iov = {.iov_len = *size, .iov_base = (IOVBASE_TYPE *) ((uintptr_t) frag->data + reserve)};
|
||||
size_t max_data = *size;
|
||||
uint32_t iov_count = 1;
|
||||
int rc;
|
||||
|
||||
MCA_BTL_SELF_FRAG_ALLOC_SEND(frag);
|
||||
if(OPAL_UNLIKELY(NULL == frag)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(reserve + max_data > frag->size) {
|
||||
max_data = frag->size - reserve;
|
||||
}
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = (IOVBASE_TYPE*)((unsigned char*)(frag+1) + reserve);
|
||||
|
||||
rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
|
||||
rc = opal_convertor_pack (convertor, &iov, &iov_count, &max_data);
|
||||
if(rc < 0) {
|
||||
MCA_BTL_SELF_FRAG_RETURN_SEND(frag);
|
||||
mca_btl_self_free (btl, &frag->base);
|
||||
return NULL;
|
||||
}
|
||||
frag->segment.seg_addr.pval = frag+1;
|
||||
frag->segment.seg_len = reserve + max_data;
|
||||
|
||||
*size = max_data;
|
||||
} else {
|
||||
MCA_BTL_SELF_FRAG_ALLOC_RDMA(frag);
|
||||
if(OPAL_UNLIKELY(NULL == frag)) {
|
||||
return NULL;
|
||||
}
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = NULL;
|
||||
void *data_ptr;
|
||||
|
||||
/* convertor should return offset into users buffer */
|
||||
rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
|
||||
if(rc < 0) {
|
||||
MCA_BTL_SELF_FRAG_RETURN_RDMA(frag);
|
||||
return NULL;
|
||||
}
|
||||
frag->segment.seg_addr.lval = (uint64_t)(uintptr_t) iov.iov_base;
|
||||
frag->segment.seg_len = max_data;
|
||||
*size = max_data;
|
||||
opal_convertor_get_current_pointer (convertor, &data_ptr);
|
||||
|
||||
frag->segments[1].seg_addr.pval = data_ptr;
|
||||
frag->segments[1].seg_len = *size;
|
||||
frag->base.des_segment_count = 2;
|
||||
}
|
||||
frag->base.des_flags = flags;
|
||||
frag->base.des_segments = &frag->segment;
|
||||
frag->base.des_segment_count = 1;
|
||||
|
||||
return &frag->base;
|
||||
}
|
||||
@ -242,10 +196,10 @@ mca_btl_self_prepare_src( struct mca_btl_base_module_t* btl,
|
||||
* @param peer (IN) BTL peer addressing
|
||||
*/
|
||||
|
||||
int mca_btl_self_send( struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
struct mca_btl_base_descriptor_t* des,
|
||||
mca_btl_base_tag_t tag )
|
||||
static int mca_btl_self_send (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct mca_btl_base_descriptor_t *des,
|
||||
mca_btl_base_tag_t tag)
|
||||
{
|
||||
mca_btl_active_message_callback_t* reg;
|
||||
int btl_ownership = (des->des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
|
||||
@ -264,6 +218,39 @@ int mca_btl_self_send( struct mca_btl_base_module_t* btl,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int mca_btl_self_sendi (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct opal_convertor_t *convertor, void *header, size_t header_size,
|
||||
size_t payload_size, uint8_t order, uint32_t flags, mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t **descriptor)
|
||||
{
|
||||
mca_btl_base_descriptor_t *frag;
|
||||
|
||||
if (!payload_size || !opal_convertor_need_buffers(convertor)) {
|
||||
void *data_ptr = NULL;
|
||||
if (payload_size) {
|
||||
opal_convertor_get_current_pointer (convertor, &data_ptr);
|
||||
}
|
||||
|
||||
mca_btl_base_segment_t segments[2] = {{.seg_addr.pval = header, .seg_len = header_size},
|
||||
{.seg_addr.pval = data_ptr, .seg_len = payload_size}};
|
||||
mca_btl_base_descriptor_t des = {.des_segments = segments, .des_segment_count = payload_size ? 2 : 1,
|
||||
.des_flags = 0};
|
||||
|
||||
(void) mca_btl_self_send (btl, endpoint, &des, tag);
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
frag = mca_btl_self_prepare_src (btl, endpoint, convertor, order, header_size, &payload_size,
|
||||
flags | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
|
||||
if (NULL == frag) {
|
||||
*descriptor = NULL;
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
memcpy (frag->des_segments[0].seg_addr.pval, header, header_size);
|
||||
(void) mca_btl_self_send (btl, endpoint, frag, tag);
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static int mca_btl_self_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
|
||||
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
|
||||
@ -289,22 +276,23 @@ static int mca_btl_self_get (mca_btl_base_module_t *btl, struct mca_btl_base_end
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_btl_self_ft_event(int state) {
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_RESTART == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_TERM == state ) {
|
||||
;
|
||||
}
|
||||
else {
|
||||
;
|
||||
}
|
||||
|
||||
static int mca_btl_self_ft_event(int state) {
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
/* btl self module */
|
||||
mca_btl_base_module_t mca_btl_self = {
|
||||
.btl_component = &mca_btl_self_component.super,
|
||||
.btl_add_procs = mca_btl_self_add_procs,
|
||||
.btl_del_procs = mca_btl_self_del_procs,
|
||||
.btl_finalize = mca_btl_self_finalize,
|
||||
.btl_alloc = mca_btl_self_alloc,
|
||||
.btl_free = mca_btl_self_free,
|
||||
.btl_prepare_src = mca_btl_self_prepare_src,
|
||||
.btl_send = mca_btl_self_send,
|
||||
.btl_sendi = mca_btl_self_sendi,
|
||||
.btl_put = mca_btl_self_put,
|
||||
.btl_get = mca_btl_self_get,
|
||||
.btl_dump = mca_btl_base_dump,
|
||||
.btl_ft_event = mca_btl_self_ft_event,
|
||||
};
|
||||
|
@ -10,7 +10,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -30,12 +30,14 @@
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif /* HAVE_SYS_TYPES_H */
|
||||
#include "opal/mca/event/event.h"
|
||||
|
||||
#include "opal/mca/btl/btl.h"
|
||||
#include "opal/mca/btl/base/base.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
#define MCA_BTL_SELF_MAX_INLINE_SIZE 128
|
||||
|
||||
/**
|
||||
* Shared Memory (SELF) BTL module.
|
||||
*/
|
||||
@ -44,7 +46,6 @@ struct mca_btl_self_component_t {
|
||||
int free_list_num; /**< initial size of free lists */
|
||||
int free_list_max; /**< maximum size of free lists */
|
||||
int free_list_inc; /**< number of elements to alloc when growing free lists */
|
||||
opal_mutex_t self_lock;
|
||||
opal_free_list_t self_frags_eager; /**< free list of self first */
|
||||
opal_free_list_t self_frags_send; /**< free list of self second */
|
||||
opal_free_list_t self_frags_rdma; /**< free list of self second */
|
||||
@ -52,146 +53,8 @@ struct mca_btl_self_component_t {
|
||||
typedef struct mca_btl_self_component_t mca_btl_self_component_t;
|
||||
OPAL_MODULE_DECLSPEC extern mca_btl_self_component_t mca_btl_self_component;
|
||||
|
||||
/**
|
||||
* Register shared memory module parameters with the MCA framework
|
||||
*/
|
||||
int mca_btl_self_component_open(void);
|
||||
|
||||
/**
|
||||
* Any final cleanup before being unloaded.
|
||||
*/
|
||||
int mca_btl_self_component_close(void);
|
||||
|
||||
/**
|
||||
* SELF module initialization.
|
||||
*
|
||||
* @param num_btls (OUT) Number of BTLs returned in BTL array.
|
||||
* @param enable_progress_threads (IN) Flag indicating whether BTL is allowed to have progress threads
|
||||
* @param enable_mpi_threads (IN) Flag indicating whether BTL must support multilple simultaneous invocations from different threads
|
||||
*
|
||||
*/
|
||||
mca_btl_base_module_t** mca_btl_self_component_init(
|
||||
int *num_btls,
|
||||
bool enable_progress_threads,
|
||||
bool enable_mpi_threads
|
||||
);
|
||||
|
||||
extern mca_btl_base_module_t mca_btl_self;
|
||||
|
||||
|
||||
/**
|
||||
* Cleanup any resources held by the BTL.
|
||||
*
|
||||
* @param btl BTL instance.
|
||||
* @return OPAL_SUCCESS or error status on failure.
|
||||
*/
|
||||
|
||||
int mca_btl_self_finalize(
|
||||
struct mca_btl_base_module_t* btl
|
||||
);
|
||||
|
||||
|
||||
/**
|
||||
* PML->BTL notification of change in the process list.
|
||||
* PML->BTL Notification that a receive fragment has been matched.
|
||||
* Called for message that is send from process with the virtual
|
||||
* address of the shared memory segment being different than that of
|
||||
* the receiver.
|
||||
*
|
||||
* @param btl (IN)
|
||||
* @param proc (IN)
|
||||
* @param peer (OUT)
|
||||
* @return OPAL_SUCCESS or error status on failure.
|
||||
*
|
||||
*/
|
||||
|
||||
int mca_btl_self_add_procs(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
size_t nprocs,
|
||||
struct opal_proc_t **procs,
|
||||
struct mca_btl_base_endpoint_t** peers,
|
||||
struct opal_bitmap_t* reachability
|
||||
);
|
||||
|
||||
|
||||
/**
|
||||
* PML->BTL notification of change in the process list.
|
||||
*
|
||||
* @param btl (IN) BTL instance
|
||||
* @param proc (IN) Peer process
|
||||
* @param peer (IN) Peer addressing information.
|
||||
* @return Status indicating if cleanup was successful
|
||||
*
|
||||
*/
|
||||
int mca_btl_self_del_procs(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
size_t nprocs,
|
||||
struct opal_proc_t **procs,
|
||||
struct mca_btl_base_endpoint_t **peers
|
||||
);
|
||||
|
||||
|
||||
/**
|
||||
* Allocate a segment.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param size (IN) Request segment size.
|
||||
*/
|
||||
mca_btl_base_descriptor_t* mca_btl_self_alloc(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
uint8_t order,
|
||||
size_t size,
|
||||
uint32_t flags
|
||||
);
|
||||
|
||||
/**
|
||||
* Return a segment allocated by this BTL.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param segment (IN) Allocated segment.
|
||||
*/
|
||||
int mca_btl_self_free(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
mca_btl_base_descriptor_t* segment
|
||||
);
|
||||
|
||||
/**
|
||||
* Pack data
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param peer (IN) BTL peer addressing
|
||||
*/
|
||||
struct mca_btl_base_descriptor_t* mca_btl_self_prepare_src(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
struct opal_convertor_t* convertor,
|
||||
uint8_t order,
|
||||
size_t reserve,
|
||||
size_t* size,
|
||||
uint32_t flags
|
||||
);
|
||||
|
||||
/**
|
||||
* Initiate a send to the peer.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param peer (IN) BTL peer addressing
|
||||
*/
|
||||
int mca_btl_self_send(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
struct mca_btl_base_descriptor_t* descriptor,
|
||||
mca_btl_base_tag_t tag
|
||||
);
|
||||
|
||||
/**
|
||||
* Fault Tolerance Event Notification Function
|
||||
* @param state Checkpoint Stae
|
||||
* @return OPAL_SUCCESS or failure status
|
||||
*/
|
||||
int mca_btl_self_ft_event(int state);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
@ -11,7 +11,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -20,20 +20,25 @@
|
||||
* $HEADER$
|
||||
*/
|
||||
#include "opal_config.h"
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif /* HAVE_UNISTD_H */
|
||||
#include <string.h>
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif /* HAVE_SYS_TYPES_H */
|
||||
|
||||
#include "opal/runtime/opal.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
#include "btl_self.h"
|
||||
#include "btl_self_frag.h"
|
||||
|
||||
static int mca_btl_self_component_register(void);
|
||||
static int mca_btl_self_component_open(void);
|
||||
static int mca_btl_self_component_close(void);
|
||||
|
||||
/**
|
||||
* SELF module initialization.
|
||||
*
|
||||
* @param num_btls (OUT) Number of BTLs returned in BTL array.
|
||||
* @param enable_progress_threads (IN) Flag indicating whether BTL is allowed to have progress threads
|
||||
* @param enable_mpi_threads (IN) Flag indicating whether BTL must support multilple simultaneous invocations from different threads
|
||||
*
|
||||
*/
|
||||
static mca_btl_base_module_t **mca_btl_self_component_init (int *num_btls,
|
||||
bool enable_progress_threads,
|
||||
bool enable_mpi_threads);
|
||||
|
||||
/*
|
||||
* Shared Memory (SELF) component instance.
|
||||
@ -76,14 +81,15 @@ static int mca_btl_self_component_register(void)
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_btl_self_component.free_list_num);
|
||||
mca_btl_self_component.free_list_max = -1;
|
||||
/* NTH: free list buffers are not released until we tear down so DO NOT make them unlimited here */
|
||||
mca_btl_self_component.free_list_max = 64;
|
||||
(void) mca_base_component_var_register(&mca_btl_self_component.super.btl_version, "free_list_max",
|
||||
"Maximum number of fragments",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_btl_self_component.free_list_max);
|
||||
mca_btl_self_component.free_list_inc = 32;
|
||||
mca_btl_self_component.free_list_inc = 8;
|
||||
(void) mca_base_component_var_register(&mca_btl_self_component.super.btl_version, "free_list_inc",
|
||||
"Increment by this number of fragments",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
@ -92,25 +98,23 @@ static int mca_btl_self_component_register(void)
|
||||
&mca_btl_self_component.free_list_inc);
|
||||
|
||||
mca_btl_self.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH;
|
||||
mca_btl_self.btl_eager_limit = 128 * 1024;
|
||||
mca_btl_self.btl_eager_limit = 1024;
|
||||
mca_btl_self.btl_rndv_eager_limit = 128 * 1024;
|
||||
mca_btl_self.btl_max_send_size = 256 * 1024;
|
||||
mca_btl_self.btl_max_send_size = 16 * 1024;
|
||||
mca_btl_self.btl_rdma_pipeline_send_length = INT_MAX;
|
||||
mca_btl_self.btl_rdma_pipeline_frag_size = INT_MAX;
|
||||
mca_btl_self.btl_min_rdma_pipeline_size = 0;
|
||||
mca_btl_self.btl_flags = MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_SEND;
|
||||
mca_btl_self.btl_flags = MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_SEND;
|
||||
mca_btl_self.btl_bandwidth = 100;
|
||||
mca_btl_self.btl_latency = 0;
|
||||
mca_btl_base_param_register(&mca_btl_self_component.super.btl_version,
|
||||
&mca_btl_self);
|
||||
mca_btl_base_param_register (&mca_btl_self_component.super.btl_version, &mca_btl_self);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_btl_self_component_open(void)
|
||||
static int mca_btl_self_component_open(void)
|
||||
{
|
||||
/* initialize objects */
|
||||
OBJ_CONSTRUCT(&mca_btl_self_component.self_lock, opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&mca_btl_self_component.self_frags_eager, opal_free_list_t);
|
||||
OBJ_CONSTRUCT(&mca_btl_self_component.self_frags_send, opal_free_list_t);
|
||||
OBJ_CONSTRUCT(&mca_btl_self_component.self_frags_rdma, opal_free_list_t);
|
||||
@ -123,64 +127,67 @@ int mca_btl_self_component_open(void)
|
||||
* component cleanup - sanity checking of queue lengths
|
||||
*/
|
||||
|
||||
int mca_btl_self_component_close(void)
|
||||
static int mca_btl_self_component_close(void)
|
||||
{
|
||||
OBJ_DESTRUCT(&mca_btl_self_component.self_lock);
|
||||
OBJ_DESTRUCT(&mca_btl_self_component.self_frags_eager);
|
||||
OBJ_DESTRUCT(&mca_btl_self_component.self_frags_send);
|
||||
OBJ_DESTRUCT(&mca_btl_self_component.self_frags_rdma);
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* SELF component initialization
|
||||
*/
|
||||
mca_btl_base_module_t** mca_btl_self_component_init( int *num_btls,
|
||||
bool enable_progress_threads,
|
||||
bool enable_mpi_threads )
|
||||
static mca_btl_base_module_t **mca_btl_self_component_init (int *num_btls,
|
||||
bool enable_progress_threads,
|
||||
bool enable_mpi_threads)
|
||||
{
|
||||
mca_btl_base_module_t **btls = NULL;
|
||||
*num_btls = 0;
|
||||
int ret;
|
||||
|
||||
/* allocate the Shared Memory PTL */
|
||||
*num_btls = 1;
|
||||
btls = (mca_btl_base_module_t**)malloc((*num_btls)*sizeof(mca_btl_base_module_t*));
|
||||
/* initialize free lists */
|
||||
ret = opal_free_list_init (&mca_btl_self_component.self_frags_eager,
|
||||
sizeof (mca_btl_self_frag_eager_t) + mca_btl_self.btl_eager_limit,
|
||||
opal_cache_line_size, OBJ_CLASS(mca_btl_self_frag_eager_t), 0,
|
||||
opal_cache_line_size, mca_btl_self_component.free_list_num,
|
||||
mca_btl_self_component.free_list_max,
|
||||
mca_btl_self_component.free_list_inc,
|
||||
NULL, 0, NULL, NULL, NULL);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ret = opal_free_list_init (&mca_btl_self_component.self_frags_send,
|
||||
sizeof (mca_btl_self_frag_send_t) + mca_btl_self.btl_max_send_size,
|
||||
opal_cache_line_size, OBJ_CLASS(mca_btl_self_frag_send_t), 0,
|
||||
opal_cache_line_size, mca_btl_self_component.free_list_num,
|
||||
mca_btl_self_component.free_list_max,
|
||||
mca_btl_self_component.free_list_inc,
|
||||
NULL, 0, NULL, NULL, NULL);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ret = opal_free_list_init (&mca_btl_self_component.self_frags_rdma,
|
||||
sizeof (mca_btl_self_frag_rdma_t) + MCA_BTL_SELF_MAX_INLINE_SIZE,
|
||||
opal_cache_line_size, OBJ_CLASS(mca_btl_self_frag_rdma_t), 0,
|
||||
opal_cache_line_size, mca_btl_self_component.free_list_num,
|
||||
mca_btl_self_component.free_list_max,
|
||||
mca_btl_self_component.free_list_inc,
|
||||
NULL, 0, NULL, NULL, NULL);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* get pointer to the btls */
|
||||
btls = (mca_btl_base_module_t **) malloc (sizeof (mca_btl_base_module_t *));
|
||||
if (NULL == btls) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* initialize free lists */
|
||||
opal_free_list_init (&mca_btl_self_component.self_frags_eager,
|
||||
sizeof(mca_btl_self_frag_eager_t) + mca_btl_self.btl_eager_limit,
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(mca_btl_self_frag_eager_t),
|
||||
0,opal_cache_line_size,
|
||||
mca_btl_self_component.free_list_num,
|
||||
mca_btl_self_component.free_list_max,
|
||||
mca_btl_self_component.free_list_inc,
|
||||
NULL, 0, NULL, NULL, NULL);
|
||||
opal_free_list_init (&mca_btl_self_component.self_frags_send,
|
||||
sizeof(mca_btl_self_frag_send_t) + mca_btl_self.btl_max_send_size,
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(mca_btl_self_frag_send_t),
|
||||
0,opal_cache_line_size,
|
||||
mca_btl_self_component.free_list_num,
|
||||
mca_btl_self_component.free_list_max,
|
||||
mca_btl_self_component.free_list_inc,
|
||||
NULL, 0, NULL, NULL, NULL);
|
||||
opal_free_list_init (&mca_btl_self_component.self_frags_rdma,
|
||||
sizeof(mca_btl_self_frag_rdma_t),
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(mca_btl_self_frag_rdma_t),
|
||||
0,opal_cache_line_size,
|
||||
mca_btl_self_component.free_list_num,
|
||||
mca_btl_self_component.free_list_max,
|
||||
mca_btl_self_component.free_list_inc,
|
||||
NULL, 0, NULL, NULL, NULL);
|
||||
btls[0] = &mca_btl_self;
|
||||
*num_btls = 1;
|
||||
|
||||
/* get pointer to the btls */
|
||||
btls[0] = (mca_btl_base_module_t *)(&mca_btl_self);
|
||||
return btls;
|
||||
}
|
||||
|
||||
|
@ -1,42 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*/
|
||||
#ifndef MCA_BTL_SELF_ENDPOINT_H
|
||||
#define MCA_BTL_SELF_ENDPOINT_H
|
||||
|
||||
#if OPAL_ENABLE_PROGRESS_THREADS == 1
|
||||
#include "opal/mca/event/event.h"
|
||||
#endif
|
||||
|
||||
/**
|
||||
* An abstraction that represents a connection to a endpoint process.
|
||||
* An instance of mca_ptl_base_endpoint_t is associated w/ each process
|
||||
* and BTL pair at startup.
|
||||
*/
|
||||
|
||||
struct mca_btl_base_endpoint_t {
|
||||
int my_selfp_rank; /**< My SELFP process rank. Used for accessing
|
||||
* SELFP specfic data structures. */
|
||||
int peer_selfp_rank; /**< My peer's SELFP process rank. Used for accessing
|
||||
* SELFP specfic data structures. */
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
@ -9,6 +10,8 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -21,31 +24,32 @@
|
||||
|
||||
static inline void mca_btl_self_frag_constructor(mca_btl_self_frag_t* frag)
|
||||
{
|
||||
frag->segment.seg_addr.pval = frag+1;
|
||||
frag->segment.seg_len = (uint32_t)frag->size;
|
||||
frag->base.des_segments = &frag->segment;
|
||||
frag->base.des_segment_count = 1;
|
||||
frag->base.des_flags = 0;
|
||||
frag->base.des_flags = 0;
|
||||
frag->segments[0].seg_addr.pval = (void *) frag->data;
|
||||
frag->segments[0].seg_len = (uint32_t) frag->size;
|
||||
frag->base.des_segments = frag->segments;
|
||||
frag->base.des_segment_count = 1;
|
||||
}
|
||||
|
||||
static void mca_btl_self_frag_eager_constructor(mca_btl_self_frag_t* frag)
|
||||
{
|
||||
frag->list = &mca_btl_self_component.self_frags_eager;
|
||||
frag->size = mca_btl_self.btl_eager_limit;
|
||||
mca_btl_self_frag_constructor(frag);
|
||||
}
|
||||
|
||||
static void mca_btl_self_frag_send_constructor(mca_btl_self_frag_t* frag)
|
||||
{
|
||||
frag->list = &mca_btl_self_component.self_frags_send;
|
||||
frag->size = mca_btl_self.btl_max_send_size;
|
||||
mca_btl_self_frag_constructor(frag);
|
||||
}
|
||||
|
||||
static void mca_btl_self_frag_rdma_constructor(mca_btl_self_frag_t* frag)
|
||||
{
|
||||
frag->size = 0;
|
||||
frag->segment.seg_addr.pval = frag+1;
|
||||
frag->segment.seg_len = (uint32_t)frag->size;
|
||||
frag->base.des_flags = 0;
|
||||
frag->list = &mca_btl_self_component.self_frags_rdma;
|
||||
frag->size = MCA_BTL_SELF_MAX_INLINE_SIZE;
|
||||
mca_btl_self_frag_constructor(frag);
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE( mca_btl_self_frag_eager_t,
|
||||
|
@ -33,9 +33,11 @@
|
||||
*/
|
||||
struct mca_btl_self_frag_t {
|
||||
mca_btl_base_descriptor_t base;
|
||||
mca_btl_base_segment_t segment;
|
||||
mca_btl_base_segment_t segments[2];
|
||||
struct mca_btl_base_endpoint_t *endpoint;
|
||||
opal_free_list_t *list;
|
||||
size_t size;
|
||||
unsigned char data[];
|
||||
};
|
||||
typedef struct mca_btl_self_frag_t mca_btl_self_frag_t;
|
||||
typedef struct mca_btl_self_frag_t mca_btl_self_frag_eager_t;
|
||||
@ -47,43 +49,27 @@ OBJ_CLASS_DECLARATION(mca_btl_self_frag_send_t);
|
||||
OBJ_CLASS_DECLARATION(mca_btl_self_frag_rdma_t);
|
||||
|
||||
#define MCA_BTL_SELF_FRAG_ALLOC_EAGER(frag) \
|
||||
{ \
|
||||
frag = (mca_btl_self_frag_t *) \
|
||||
opal_free_list_get (&mca_btl_self_component.self_frags_eager); \
|
||||
}
|
||||
{ \
|
||||
frag = (mca_btl_self_frag_t *) \
|
||||
opal_free_list_get (&mca_btl_self_component.self_frags_eager); \
|
||||
}
|
||||
|
||||
#define MCA_BTL_SELF_FRAG_RETURN_EAGER(frag) \
|
||||
{ \
|
||||
opal_free_list_return (&mca_btl_self_component.self_frags_eager, \
|
||||
(opal_free_list_item_t*)(frag)); \
|
||||
frag->segment.seg_addr.pval = frag+1; \
|
||||
}
|
||||
|
||||
#define MCA_BTL_SELF_FRAG_ALLOC_SEND(frag) \
|
||||
{ \
|
||||
frag = (mca_btl_self_frag_t *) \
|
||||
opal_free_list_get (&mca_btl_self_component.self_frags_send); \
|
||||
}
|
||||
|
||||
#define MCA_BTL_SELF_FRAG_RETURN_SEND(frag) \
|
||||
{ \
|
||||
opal_free_list_return (&mca_btl_self_component.self_frags_send, \
|
||||
(opal_free_list_item_t*)(frag)); \
|
||||
frag->segment.seg_addr.pval = frag+1; \
|
||||
}
|
||||
|
||||
#define MCA_BTL_SELF_FRAG_ALLOC_RDMA(frag) \
|
||||
{ \
|
||||
frag = (mca_btl_self_frag_t *) \
|
||||
opal_free_list_get (&mca_btl_self_component.self_frags_rdma); \
|
||||
}
|
||||
{ \
|
||||
frag = (mca_btl_self_frag_t *) \
|
||||
opal_free_list_get (&mca_btl_self_component.self_frags_rdma); \
|
||||
}
|
||||
|
||||
#define MCA_BTL_SELF_FRAG_RETURN_RDMA(frag) \
|
||||
{ \
|
||||
opal_free_list_return (&mca_btl_self_component.self_frags_rdma, \
|
||||
(opal_free_list_item_t*)(frag)); \
|
||||
frag->segment.seg_addr.pval = frag+1; \
|
||||
}
|
||||
#define MCA_BTL_SELF_FRAG_ALLOC_SEND(frag) \
|
||||
{ \
|
||||
frag = (mca_btl_self_frag_t *) \
|
||||
opal_free_list_get (&mca_btl_self_component.self_frags_send); \
|
||||
}
|
||||
|
||||
#endif
|
||||
#define MCA_BTL_SELF_FRAG_RETURN(frag) \
|
||||
{ \
|
||||
opal_free_list_return ((frag)->list, (opal_free_list_item_t*)(frag)); \
|
||||
}
|
||||
|
||||
#endif /* MCA_BTL_SELF_SEND_FRAG_H */
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user