Scalability patch, or slim-fast effect #1. All BML structures just
got a whole lot smaller, decreasing the memory footprint of the running application. How much it's a good question. Here is a breakdown: - in mca_bml_base_endpoint_t: 3 *size_t + 1 * uint32_t - in mca_bml_base_btl_t: 1 * int + 1 * double - 1 * float + 6 * size_t + 9 * (void*) The decrease in mca_bml_base_endpoint_t is for each peer and the decrease in mca_bml_base_btl_t is for each BTL for each peer. So, if we consider the most convenient case where there is only one network between all peers, this decrease the memory foot print per peer by 9*size_t + 9*(void*) + 2 * int32_t + 1 * double - 1 * float. On a 64 bits machine this will be 156 bytes per peer. Now we access all these fields directly from the underlying BTL structure, and as this structure is common to multiple BML endpoint, we are a lot more cache friendly. Even if this do not improve the latency, it makes the SM performance graph a lot smoother. This commit was SVN r19659.
Этот коммит содержится в:
родитель
b32e4e7f34
Коммит
00d24bf8ab
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2008 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -28,8 +28,6 @@
|
||||
static void mca_bml_base_endpoint_construct(mca_bml_base_endpoint_t* ep)
|
||||
{
|
||||
ep->btl_pipeline_send_length = 0;
|
||||
ep->btl_max_send_size = 0;
|
||||
ep->btl_rdma_align = 0;
|
||||
ep->btl_send_limit = 0;
|
||||
|
||||
OBJ_CONSTRUCT(&ep->btl_eager, mca_bml_base_btl_array_t);
|
||||
|
@ -25,11 +25,6 @@ int mca_bml_base_output = -1;
|
||||
|
||||
mca_bml_base_module_t mca_bml = {
|
||||
NULL, /* bml_component */
|
||||
0, /* bml_eager_limit */
|
||||
0, /* bml_rndv_eager_limit */
|
||||
0, /* bml_max_send_size */
|
||||
0, /* bml_min_rdma_size */
|
||||
0, /* bml_max_rdma_size */
|
||||
NULL, /* bml_add_procs */
|
||||
NULL, /* bml_del_procs */
|
||||
NULL, /* bml_add_btl */
|
||||
|
@ -53,29 +53,10 @@ struct mca_mpool_base_resources_t;
|
||||
*/
|
||||
|
||||
struct mca_bml_base_btl_t {
|
||||
int btl_index; /**< index in endpoint array */
|
||||
int btl_flags; /**< support for put/get? */
|
||||
double btl_weight; /**< BTL weight for scheduling */
|
||||
size_t btl_eager_limit; /**< BTL eager limit */
|
||||
size_t btl_rndv_eager_limit; /**< BTL rndv eager limit */
|
||||
size_t btl_max_send_size; /**< BTL min send size */
|
||||
size_t btl_rdma_pipeline_send_length; /**< BTL send length */
|
||||
size_t btl_rdma_pipeline_frag_size; /**< BTL rdma frag size */
|
||||
size_t btl_min_rdma_pipeline_size; /**< BTL min rdma size */
|
||||
struct mca_btl_base_module_t *btl; /**< BTL module */
|
||||
struct mca_btl_base_endpoint_t* btl_endpoint; /**< BTL addressing info */
|
||||
|
||||
/* BTL function table */
|
||||
mca_btl_base_module_alloc_fn_t btl_alloc;
|
||||
mca_btl_base_module_free_fn_t btl_free;
|
||||
mca_btl_base_module_send_fn_t btl_send;
|
||||
mca_btl_base_module_sendi_fn_t btl_sendi;
|
||||
mca_btl_base_module_prepare_fn_t btl_prepare_src;
|
||||
mca_btl_base_module_prepare_fn_t btl_prepare_dst;
|
||||
mca_btl_base_module_put_fn_t btl_put;
|
||||
mca_btl_base_module_get_fn_t btl_get;
|
||||
|
||||
mca_mpool_base_module_t* btl_mpool;
|
||||
uint32_t btl_flags; /**< support for put/get? */
|
||||
float btl_weight; /**< BTL weight for scheduling */
|
||||
struct mca_btl_base_module_t *btl; /**< BTL module */
|
||||
struct mca_btl_base_endpoint_t* btl_endpoint; /**< BTL addressing info */
|
||||
};
|
||||
typedef struct mca_bml_base_btl_t mca_bml_base_btl_t;
|
||||
|
||||
@ -241,27 +222,31 @@ struct mca_bml_base_endpoint_t {
|
||||
size_t btl_pipeline_send_length; /**< max of pipeline send_length of available BTLs */
|
||||
size_t btl_send_limit; /**< max of min rdma pipeline for available rmda btls */
|
||||
size_t btl_max_send_size; /**< min of max send size for available send btls */
|
||||
size_t btl_rdma_align; /**< max of min rdma size for available rmda btls */
|
||||
mca_bml_base_btl_array_t btl_eager; /**< array of btls to use for first fragments */
|
||||
mca_bml_base_btl_array_t btl_send; /**< array of btls to use for remaining fragments */
|
||||
size_t bml_max_send_length;
|
||||
size_t bml_max_rdma_length;
|
||||
mca_bml_base_btl_array_t btl_rdma; /**< array of btls that support (prefer) rdma */
|
||||
size_t btl_rdma_index; /**< index of last used BTL for RDMA */
|
||||
uint32_t btl_flags_or; /**< the bitwise OR of the btl flags */
|
||||
uint32_t btl_flags_and; /**< the bitwise AND of the btl flags */
|
||||
size_t btl_rdma_index; /**< index of last used BTL for RDMA */
|
||||
uint32_t btl_flags_or; /**< the bitwise OR of the btl flags */
|
||||
};
|
||||
typedef struct mca_bml_base_endpoint_t mca_bml_base_endpoint_t;
|
||||
|
||||
|
||||
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_bml_base_endpoint_t);
|
||||
|
||||
static inline void mca_bml_base_alloc(mca_bml_base_btl_t* bml_btl, mca_btl_base_descriptor_t** des, uint8_t order, size_t size, uint32_t flags) {
|
||||
*des = bml_btl->btl_alloc(bml_btl->btl, bml_btl->btl_endpoint, order, size, flags);
|
||||
static inline void mca_bml_base_alloc( mca_bml_base_btl_t* bml_btl,
|
||||
mca_btl_base_descriptor_t** des,
|
||||
uint8_t order, size_t size, uint32_t flags )
|
||||
{
|
||||
mca_btl_base_module_t* btl = bml_btl->btl;
|
||||
*des = btl->btl_alloc(btl, bml_btl->btl_endpoint, order, size, flags);
|
||||
}
|
||||
|
||||
static inline void mca_bml_base_free(mca_bml_base_btl_t* bml_btl, mca_btl_base_descriptor_t* des) {
|
||||
bml_btl->btl_free( bml_btl->btl, des );
|
||||
static inline void mca_bml_base_free( mca_bml_base_btl_t* bml_btl,
|
||||
mca_btl_base_descriptor_t* des )
|
||||
{
|
||||
mca_btl_base_module_t* btl = bml_btl->btl;
|
||||
|
||||
btl->btl_free( btl, des );
|
||||
/* The previous function is supposed to release the des object
|
||||
* so we should not touch it anymore.
|
||||
*/
|
||||
@ -269,22 +254,22 @@ static inline void mca_bml_base_free(mca_bml_base_btl_t* bml_btl, mca_btl_base_d
|
||||
|
||||
#if OMPI_ENABLE_DEBUG_RELIABILITY
|
||||
|
||||
int mca_bml_base_send(
|
||||
mca_bml_base_btl_t* bml_btl,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
mca_btl_base_tag_t tag);
|
||||
int mca_bml_base_send( mca_bml_base_btl_t* bml_btl,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
mca_btl_base_tag_t tag );
|
||||
|
||||
|
||||
#else
|
||||
|
||||
static inline int mca_bml_base_send(
|
||||
mca_bml_base_btl_t* bml_btl,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
mca_btl_base_tag_t tag)
|
||||
static inline int mca_bml_base_send( mca_bml_base_btl_t* bml_btl,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
mca_btl_base_tag_t tag )
|
||||
{
|
||||
int rc;
|
||||
mca_btl_base_module_t* btl = bml_btl->btl;
|
||||
|
||||
des->des_context = (void*) bml_btl;
|
||||
rc = bml_btl->btl_send(bml_btl->btl, bml_btl->btl_endpoint, des, tag);
|
||||
rc = btl->btl_send(btl, bml_btl->btl_endpoint, des, tag);
|
||||
if(rc == OMPI_ERR_RESOURCE_BUSY)
|
||||
rc = OMPI_SUCCESS;
|
||||
|
||||
@ -293,45 +278,48 @@ static inline int mca_bml_base_send(
|
||||
|
||||
#endif
|
||||
|
||||
static inline int mca_bml_base_send_status(
|
||||
mca_bml_base_btl_t* bml_btl,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
mca_btl_base_tag_t tag)
|
||||
static inline int mca_bml_base_send_status( mca_bml_base_btl_t* bml_btl,
|
||||
mca_btl_base_descriptor_t* des,
|
||||
mca_btl_base_tag_t tag )
|
||||
{
|
||||
mca_btl_base_module_t* btl = bml_btl->btl;
|
||||
|
||||
des->des_context = (void*) bml_btl;
|
||||
return bml_btl->btl_send(bml_btl->btl, bml_btl->btl_endpoint, des, tag);
|
||||
return btl->btl_send(btl, bml_btl->btl_endpoint, des, tag);
|
||||
}
|
||||
|
||||
static inline int mca_bml_base_sendi(
|
||||
mca_bml_base_btl_t* bml_btl,
|
||||
struct ompi_convertor_t* convertor,
|
||||
void* header,
|
||||
size_t header_size,
|
||||
size_t payload_size,
|
||||
uint8_t order,
|
||||
uint32_t flags,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t** descriptor)
|
||||
static inline int mca_bml_base_sendi( mca_bml_base_btl_t* bml_btl,
|
||||
struct ompi_convertor_t* convertor,
|
||||
void* header,
|
||||
size_t header_size,
|
||||
size_t payload_size,
|
||||
uint8_t order,
|
||||
uint32_t flags,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t** descriptor )
|
||||
{
|
||||
return bml_btl->btl_sendi(bml_btl->btl, bml_btl->btl_endpoint,
|
||||
convertor, header, header_size,
|
||||
payload_size, order, flags, tag, descriptor);
|
||||
mca_btl_base_module_t* btl = bml_btl->btl;
|
||||
return btl->btl_sendi(btl, bml_btl->btl_endpoint,
|
||||
convertor, header, header_size,
|
||||
payload_size, order, flags, tag, descriptor);
|
||||
}
|
||||
|
||||
static inline int mca_bml_base_put(mca_bml_base_btl_t* bml_btl, mca_btl_base_descriptor_t* des) {
|
||||
static inline int mca_bml_base_put( mca_bml_base_btl_t* bml_btl,
|
||||
mca_btl_base_descriptor_t* des)
|
||||
{
|
||||
mca_btl_base_module_t* btl = bml_btl->btl;
|
||||
|
||||
des->des_context = (void*) bml_btl;
|
||||
return bml_btl->btl_put(
|
||||
bml_btl->btl,
|
||||
bml_btl->btl_endpoint,
|
||||
des);
|
||||
return btl->btl_put( btl, bml_btl->btl_endpoint, des );
|
||||
}
|
||||
|
||||
static inline int mca_bml_base_get(mca_bml_base_btl_t* bml_btl, mca_btl_base_descriptor_t* des) {
|
||||
static inline int mca_bml_base_get( mca_bml_base_btl_t* bml_btl,
|
||||
mca_btl_base_descriptor_t* des)
|
||||
{
|
||||
mca_btl_base_module_t* btl = bml_btl->btl;
|
||||
|
||||
des->des_context = (void*) bml_btl;
|
||||
return bml_btl->btl_get(
|
||||
bml_btl->btl,
|
||||
bml_btl->btl_endpoint,
|
||||
des);
|
||||
return btl->btl_get( btl, bml_btl->btl_endpoint, des );
|
||||
}
|
||||
|
||||
|
||||
@ -342,15 +330,12 @@ static inline void mca_bml_base_prepare_src(mca_bml_base_btl_t* bml_btl,
|
||||
size_t reserve,
|
||||
size_t *size,
|
||||
uint32_t flags,
|
||||
mca_btl_base_descriptor_t** des) {
|
||||
*des = bml_btl->btl_prepare_src( bml_btl->btl,
|
||||
bml_btl->btl_endpoint,
|
||||
reg,
|
||||
conv,
|
||||
order,
|
||||
reserve,
|
||||
size,
|
||||
flags );
|
||||
mca_btl_base_descriptor_t** des)
|
||||
{
|
||||
mca_btl_base_module_t* btl = bml_btl->btl;
|
||||
|
||||
*des = btl->btl_prepare_src( btl, bml_btl->btl_endpoint, reg, conv,
|
||||
order, reserve, size, flags );
|
||||
if( OPAL_LIKELY((*des) != NULL) ) {
|
||||
(*des)->des_context = (void*) bml_btl;
|
||||
}
|
||||
@ -363,15 +348,12 @@ static inline void mca_bml_base_prepare_dst(mca_bml_base_btl_t* bml_btl,
|
||||
size_t reserve,
|
||||
size_t *size,
|
||||
uint32_t flags,
|
||||
mca_btl_base_descriptor_t** des) {
|
||||
*des = bml_btl->btl_prepare_dst( bml_btl->btl,
|
||||
bml_btl->btl_endpoint,
|
||||
reg,
|
||||
conv,
|
||||
order,
|
||||
reserve,
|
||||
size,
|
||||
flags );
|
||||
mca_btl_base_descriptor_t** des)
|
||||
{
|
||||
mca_btl_base_module_t* btl = bml_btl->btl;
|
||||
|
||||
*des = btl->btl_prepare_dst( btl, bml_btl->btl_endpoint, reg, conv,
|
||||
order, reserve, size, flags );
|
||||
if( OPAL_LIKELY((*des) != NULL) ) {
|
||||
(*des)->des_context = (void*) bml_btl;
|
||||
}
|
||||
@ -570,12 +552,7 @@ typedef int (*mca_bml_base_module_ft_event_fn_t)(int status);
|
||||
struct mca_bml_base_module_t {
|
||||
/* BML common attributes */
|
||||
mca_bml_base_component_t* bml_component; /**< pointer back to the BML component structure */
|
||||
size_t bml_eager_limit; /**< maximum size of first fragment -- eager send */
|
||||
size_t bml_rndv_eager_limit; /**< size of a first fragment of rndv protocol */
|
||||
size_t bml_max_send_size; /**< maximum send fragment size supported by the BML */
|
||||
size_t bml_min_rdma_size; /**< threshold below which the BML should not fragment */
|
||||
size_t bml_max_rdma_size; /**< maximum rdma fragment size supported by the BML */
|
||||
|
||||
|
||||
/* BML function table */
|
||||
mca_bml_base_module_add_procs_fn_t bml_add_procs;
|
||||
mca_bml_base_module_del_procs_fn_t bml_del_procs;
|
||||
|
@ -237,7 +237,6 @@ static int mca_bml_r2_add_procs( size_t nprocs,
|
||||
bml_endpoint->btl_proc = proc;
|
||||
proc->proc_bml = bml_endpoint;
|
||||
|
||||
bml_endpoint->btl_flags_and = 0;
|
||||
bml_endpoint->btl_flags_or = 0;
|
||||
}
|
||||
|
||||
@ -255,39 +254,21 @@ static int mca_bml_r2_add_procs( size_t nprocs,
|
||||
/* cache the endpoint on the proc */
|
||||
bml_btl = mca_bml_base_btl_array_insert(&bml_endpoint->btl_send);
|
||||
bml_btl->btl = btl;
|
||||
bml_btl->btl_eager_limit = btl->btl_eager_limit;
|
||||
bml_btl->btl_rndv_eager_limit = btl->btl_rndv_eager_limit;
|
||||
bml_btl->btl_max_send_size = btl->btl_max_send_size;
|
||||
bml_btl->btl_rdma_pipeline_send_length =
|
||||
btl->btl_rdma_pipeline_send_length;
|
||||
bml_btl->btl_rdma_pipeline_frag_size =
|
||||
btl->btl_rdma_pipeline_frag_size;
|
||||
bml_btl->btl_min_rdma_pipeline_size =
|
||||
btl->btl_min_rdma_pipeline_size;
|
||||
bml_btl->btl_endpoint = btl_endpoints[p];
|
||||
bml_btl->btl_weight = 0;
|
||||
bml_btl->btl_alloc = btl->btl_alloc;
|
||||
bml_btl->btl_free = btl->btl_free;
|
||||
bml_btl->btl_prepare_src = btl->btl_prepare_src;
|
||||
bml_btl->btl_prepare_dst = btl->btl_prepare_dst;
|
||||
bml_btl->btl_send = btl->btl_send;
|
||||
bml_btl->btl_sendi = btl->btl_sendi;
|
||||
bml_btl->btl_flags = btl->btl_flags;
|
||||
bml_btl->btl_put = btl->btl_put;
|
||||
if( (bml_btl->btl_flags & MCA_BTL_FLAGS_PUT) && (NULL == bml_btl->btl_put) ) {
|
||||
if( (bml_btl->btl_flags & MCA_BTL_FLAGS_PUT) && (NULL == btl->btl_put) ) {
|
||||
opal_output(0, "mca_bml_r2_add_procs: The PUT flag is specified for"
|
||||
" the %s BTL without any PUT function attached. Disard the flag !",
|
||||
bml_btl->btl->btl_component->btl_version.mca_component_name);
|
||||
bml_btl->btl_flags ^= MCA_BTL_FLAGS_PUT;
|
||||
}
|
||||
bml_btl->btl_get = btl->btl_get;
|
||||
if( (bml_btl->btl_flags & MCA_BTL_FLAGS_GET) && (NULL == bml_btl->btl_get) ) {
|
||||
if( (bml_btl->btl_flags & MCA_BTL_FLAGS_GET) && (NULL == btl->btl_get) ) {
|
||||
opal_output(0, "mca_bml_r2_add_procs: The GET flag is specified for"
|
||||
" the %s BTL without any GET function attached. Disard the flag !",
|
||||
bml_btl->btl->btl_component->btl_version.mca_component_name);
|
||||
bml_btl->btl_flags ^= MCA_BTL_FLAGS_GET;
|
||||
}
|
||||
bml_btl->btl_mpool = btl->btl_mpool;
|
||||
if( (bml_btl->btl_flags & (MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_SEND)) == 0 ) {
|
||||
/**
|
||||
* If no protocol specified, we have 2 choices: we ignore the BTL
|
||||
@ -297,10 +278,9 @@ static int mca_bml_r2_add_procs( size_t nprocs,
|
||||
bml_btl->btl_flags |= MCA_BTL_FLAGS_SEND;
|
||||
}
|
||||
/**
|
||||
* calculate the bitwise OR and AND of the btl flags
|
||||
* calculate the bitwise OR of the btl flags
|
||||
*/
|
||||
bml_endpoint->btl_flags_or |= bml_btl->btl_flags;
|
||||
bml_endpoint->btl_flags_and &= bml_btl->btl_flags;
|
||||
/* This BTL is in use, allow the progress registration */
|
||||
btl_inuse++;
|
||||
}
|
||||
@ -351,8 +331,6 @@ static int mca_bml_r2_add_procs( size_t nprocs,
|
||||
qsort(bml_endpoint->btl_send.bml_btls, n_size,
|
||||
sizeof(mca_bml_base_btl_t), btl_bandwidth_compare);
|
||||
|
||||
bml_endpoint->bml_max_send_length = 0;
|
||||
bml_endpoint->bml_max_rdma_length = 0;
|
||||
bml_endpoint->btl_rdma_index = 0;
|
||||
for(n_index = 0; n_index < n_size; n_index++) {
|
||||
mca_bml_base_btl_t* bml_btl =
|
||||
@ -362,7 +340,6 @@ static int mca_bml_r2_add_procs( size_t nprocs,
|
||||
if(btl->btl_latency < latency) {
|
||||
latency = btl->btl_latency;
|
||||
}
|
||||
bml_endpoint->bml_max_send_length += bml_btl->btl->btl_bandwidth;
|
||||
}
|
||||
|
||||
/* (1) set the weight of each btl as a percentage of overall bandwidth
|
||||
@ -399,14 +376,14 @@ static int mca_bml_r2_add_procs( size_t nprocs,
|
||||
!((proc->proc_arch != ompi_proc_local_proc->proc_arch) &&
|
||||
(0 == (btl->btl_flags & MCA_BTL_FLAGS_HETEROGENEOUS_RDMA)))) {
|
||||
mca_bml_base_btl_t* bml_btl_rdma = mca_bml_base_btl_array_insert(&bml_endpoint->btl_rdma);
|
||||
mca_btl_base_module_t* btl_rdma = bml_btl->btl;
|
||||
|
||||
*bml_btl_rdma = *bml_btl;
|
||||
if(bml_endpoint->btl_pipeline_send_length <
|
||||
bml_btl_rdma->btl_rdma_pipeline_send_length) {
|
||||
bml_endpoint->btl_pipeline_send_length =
|
||||
bml_btl_rdma->btl_rdma_pipeline_send_length;
|
||||
if(bml_endpoint->btl_pipeline_send_length < btl_rdma->btl_rdma_pipeline_send_length) {
|
||||
bml_endpoint->btl_pipeline_send_length = btl_rdma->btl_rdma_pipeline_send_length;
|
||||
}
|
||||
if(bml_endpoint->btl_send_limit < bml_btl_rdma->btl_min_rdma_pipeline_size) {
|
||||
bml_endpoint->btl_send_limit = bml_btl_rdma->btl_min_rdma_pipeline_size;
|
||||
if(bml_endpoint->btl_send_limit < btl_rdma->btl_min_rdma_pipeline_size) {
|
||||
bml_endpoint->btl_send_limit = btl_rdma->btl_min_rdma_pipeline_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -542,6 +519,8 @@ static inline int bml_r2_remove_btl_progress(mca_btl_base_module_t* btl)
|
||||
static int mca_bml_r2_del_proc_btl(ompi_proc_t* proc, mca_btl_base_module_t* btl)
|
||||
{
|
||||
mca_bml_base_endpoint_t* ep = (mca_bml_base_endpoint_t*)proc->proc_bml;
|
||||
mca_bml_base_btl_t* bml_btl;
|
||||
mca_btl_base_module_t* ep_btl;
|
||||
double total_bandwidth = 0;
|
||||
size_t b;
|
||||
|
||||
@ -558,18 +537,22 @@ static int mca_bml_r2_del_proc_btl(ompi_proc_t* proc, mca_btl_base_module_t* btl
|
||||
reset max_send_size to the min of all btl's */
|
||||
total_bandwidth = 0;
|
||||
for(b=0; b< mca_bml_base_btl_array_get_size(&ep->btl_send); b++) {
|
||||
mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_send, b);
|
||||
total_bandwidth += bml_btl->btl->btl_bandwidth;
|
||||
if (bml_btl->btl_max_send_size < ep->btl_max_send_size) {
|
||||
ep->btl_max_send_size = bml_btl->btl->btl_max_send_size;
|
||||
bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_send, b);
|
||||
ep_btl = bml_btl->btl;
|
||||
|
||||
total_bandwidth += ep_btl->btl_bandwidth;
|
||||
if (ep_btl->btl_max_send_size < ep->btl_max_send_size) {
|
||||
ep->btl_max_send_size = ep_btl->btl_max_send_size;
|
||||
}
|
||||
}
|
||||
|
||||
/* compute weighting factor for this btl */
|
||||
for(b=0; b< mca_bml_base_btl_array_get_size(&ep->btl_send); b++) {
|
||||
mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_send, b);
|
||||
if(bml_btl->btl->btl_bandwidth > 0) {
|
||||
bml_btl->btl_weight = bml_btl->btl->btl_bandwidth / total_bandwidth;
|
||||
bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_send, b);
|
||||
ep_btl = bml_btl->btl;
|
||||
|
||||
if(ep_btl->btl_bandwidth > 0) {
|
||||
bml_btl->btl_weight = ep_btl->btl_bandwidth / total_bandwidth;
|
||||
} else {
|
||||
bml_btl->btl_weight = 1.0 / mca_bml_base_btl_array_get_size(&ep->btl_send);
|
||||
}
|
||||
@ -582,24 +565,26 @@ static int mca_bml_r2_del_proc_btl(ompi_proc_t* proc, mca_btl_base_module_t* btl
|
||||
/* computer total bandwidth */
|
||||
total_bandwidth = 0;
|
||||
for(b=0; b< mca_bml_base_btl_array_get_size(&ep->btl_rdma); b++) {
|
||||
mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_rdma, b);
|
||||
bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_rdma, b);
|
||||
ep_btl = bml_btl->btl;
|
||||
|
||||
/* update aggregate endpoint info */
|
||||
total_bandwidth += bml_btl->btl->btl_bandwidth;
|
||||
if (ep->btl_pipeline_send_length <
|
||||
bml_btl->btl_rdma_pipeline_send_length) {
|
||||
ep->btl_pipeline_send_length =
|
||||
bml_btl->btl_rdma_pipeline_send_length;
|
||||
total_bandwidth += ep_btl->btl_bandwidth;
|
||||
if (ep->btl_pipeline_send_length < ep_btl->btl_rdma_pipeline_send_length) {
|
||||
ep->btl_pipeline_send_length = ep_btl->btl_rdma_pipeline_send_length;
|
||||
}
|
||||
if (ep->btl_send_limit < bml_btl->btl_min_rdma_pipeline_size) {
|
||||
ep->btl_send_limit = bml_btl->btl_min_rdma_pipeline_size;
|
||||
if (ep->btl_send_limit < ep_btl->btl_min_rdma_pipeline_size) {
|
||||
ep->btl_send_limit = ep_btl->btl_min_rdma_pipeline_size;
|
||||
}
|
||||
}
|
||||
|
||||
/* compute weighting factor for this btl */
|
||||
for(b=0; b< mca_bml_base_btl_array_get_size(&ep->btl_rdma); b++) {
|
||||
mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_rdma, b);
|
||||
if(bml_btl->btl->btl_bandwidth > 0) {
|
||||
bml_btl->btl_weight = bml_btl->btl->btl_bandwidth / total_bandwidth;
|
||||
bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_rdma, b);
|
||||
ep_btl = bml_btl->btl;
|
||||
|
||||
if(ep_btl->btl_bandwidth > 0) {
|
||||
bml_btl->btl_weight = ep_btl->btl_bandwidth / total_bandwidth;
|
||||
} else {
|
||||
bml_btl->btl_weight = 1.0 / mca_bml_base_btl_array_get_size(&ep->btl_rdma);
|
||||
}
|
||||
@ -803,11 +788,6 @@ int mca_bml_r2_component_fini(void)
|
||||
mca_bml_r2_module_t mca_bml_r2 = {
|
||||
{
|
||||
&mca_bml_r2_component,
|
||||
0, /* eager limit */
|
||||
0, /* min send size */
|
||||
0, /* max send size */
|
||||
0, /* min rdma size */
|
||||
0, /* max rdma size */
|
||||
mca_bml_r2_add_procs,
|
||||
mca_bml_r2_del_procs,
|
||||
mca_bml_r2_add_btl,
|
||||
|
@ -118,10 +118,10 @@ ompi_osc_rdma_peer_info_free(ompi_osc_rdma_peer_info_t *peer_info)
|
||||
if (NULL != peer_info->local_descriptors) {
|
||||
for (i = 0 ; i < peer_info->local_num_btls ; ++i) {
|
||||
if (NULL != peer_info->local_descriptors[i]) {
|
||||
mca_bml_base_btl_t *bml_btl =
|
||||
peer_info->local_btls[i];
|
||||
bml_btl->btl_free(bml_btl->btl,
|
||||
peer_info->local_descriptors[i]);
|
||||
mca_bml_base_btl_t *bml_btl = peer_info->local_btls[i];
|
||||
mca_btl_base_module_t* btl = bml_btl->btl;
|
||||
|
||||
btl->btl_free(btl, peer_info->local_descriptors[i]);
|
||||
}
|
||||
}
|
||||
free(peer_info->local_descriptors);
|
||||
|
@ -1220,7 +1220,7 @@ setup_rdma(ompi_osc_rdma_module_t *module)
|
||||
++j) {
|
||||
mca_bml_base_btl_t *bml_btl =
|
||||
mca_bml_base_btl_array_get_index(&endpoint->btl_rdma, j);
|
||||
mca_mpool_base_module_t *btl_mpool = bml_btl->btl_mpool;
|
||||
mca_mpool_base_module_t *btl_mpool = bml_btl->btl->btl_mpool;
|
||||
int index = peer_info->local_num_btls;
|
||||
|
||||
if (!is_valid_rdma(bml_btl)) continue;
|
||||
|
@ -157,8 +157,9 @@ ompi_osc_rdma_sendreq_rdma(ompi_osc_rdma_module_t *module,
|
||||
ompi_osc_rdma_sendreq_t *sendreq)
|
||||
{
|
||||
mca_btl_base_descriptor_t* descriptor;
|
||||
size_t size = sendreq->req_origin_bytes_packed;
|
||||
ompi_osc_rdma_btl_t *rdma_btl = NULL;
|
||||
mca_btl_base_module_t* btl;
|
||||
size_t size = sendreq->req_origin_bytes_packed;
|
||||
int index, target, ret;
|
||||
|
||||
target = sendreq->req_target_rank;
|
||||
@ -172,6 +173,7 @@ ompi_osc_rdma_sendreq_rdma(ompi_osc_rdma_module_t *module,
|
||||
}
|
||||
|
||||
rdma_btl = &(module->m_peer_info[target].peer_btls[index]);
|
||||
btl = rdma_btl->bml_btl->btl;
|
||||
|
||||
if (sendreq->req_type == OMPI_OSC_RDMA_PUT) {
|
||||
mca_bml_base_prepare_src(rdma_btl->bml_btl, NULL,
|
||||
@ -199,10 +201,8 @@ ompi_osc_rdma_sendreq_rdma(ompi_osc_rdma_module_t *module,
|
||||
descriptor->des_cbdata = sendreq;
|
||||
descriptor->des_cbfunc = rdma_cb;
|
||||
|
||||
ret = rdma_btl->bml_btl->
|
||||
btl_put(rdma_btl->bml_btl->btl,
|
||||
rdma_btl->bml_btl->btl_endpoint,
|
||||
descriptor);
|
||||
ret = btl->btl_put(btl, rdma_btl->bml_btl->btl_endpoint,
|
||||
descriptor);
|
||||
} else {
|
||||
mca_bml_base_prepare_dst(rdma_btl->bml_btl,
|
||||
NULL, &sendreq->req_origin_convertor, rdma_btl->rdma_order,
|
||||
@ -223,10 +223,8 @@ ompi_osc_rdma_sendreq_rdma(ompi_osc_rdma_module_t *module,
|
||||
descriptor->des_cbdata = sendreq;
|
||||
descriptor->des_cbfunc = rdma_cb;
|
||||
|
||||
ret = rdma_btl->bml_btl->
|
||||
btl_get(rdma_btl->bml_btl->btl,
|
||||
rdma_btl->bml_btl->btl_endpoint,
|
||||
descriptor);
|
||||
ret = btl->btl_get(btl, rdma_btl->bml_btl->btl_endpoint,
|
||||
descriptor);
|
||||
}
|
||||
rdma_btl->rdma_order = descriptor->order;
|
||||
|
||||
@ -403,6 +401,7 @@ ompi_osc_rdma_sendreq_send(ompi_osc_rdma_module_t *module,
|
||||
int ret = OMPI_SUCCESS;
|
||||
mca_bml_base_endpoint_t *endpoint = NULL;
|
||||
mca_bml_base_btl_t *bml_btl = NULL;
|
||||
mca_btl_base_module_t* btl = NULL;
|
||||
mca_btl_base_descriptor_t *descriptor = NULL;
|
||||
ompi_osc_rdma_send_header_t *header = NULL;
|
||||
size_t written_data = 0;
|
||||
@ -445,10 +444,11 @@ ompi_osc_rdma_sendreq_send(ompi_osc_rdma_module_t *module,
|
||||
/* get a buffer... */
|
||||
endpoint = (mca_bml_base_endpoint_t*) sendreq->req_target_proc->proc_bml;
|
||||
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
|
||||
btl = bml_btl->btl;
|
||||
mca_bml_base_alloc(bml_btl, &descriptor, MCA_BTL_NO_ORDER,
|
||||
module->m_use_buffers ? bml_btl->btl_eager_limit :
|
||||
needed_len < bml_btl->btl_eager_limit ? needed_len :
|
||||
bml_btl->btl_eager_limit, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
|
||||
module->m_use_buffers ? btl->btl_eager_limit :
|
||||
needed_len < btl->btl_eager_limit ? needed_len :
|
||||
btl->btl_eager_limit, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
|
||||
if (NULL == descriptor) {
|
||||
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
goto cleanup;
|
||||
@ -690,7 +690,7 @@ ompi_osc_rdma_replyreq_send(ompi_osc_rdma_module_t *module,
|
||||
endpoint = (mca_bml_base_endpoint_t*) replyreq->rep_origin_proc->proc_bml;
|
||||
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
|
||||
mca_bml_base_alloc(bml_btl, &descriptor, MCA_BTL_NO_ORDER,
|
||||
bml_btl->btl_eager_limit, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
|
||||
bml_btl->btl->btl_eager_limit, MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_SEND_ALWAYS_CALLBACK);
|
||||
if (NULL == descriptor) {
|
||||
ret = OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
goto cleanup;
|
||||
|
@ -176,7 +176,7 @@ do {
|
||||
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32(&proc->send_sequence,1); \
|
||||
\
|
||||
/* select a btl */ \
|
||||
eager_limit = bml_btl->btl_eager_limit - sizeof(mca_pml_dr_hdr_t); \
|
||||
eager_limit = bml_btl->btl->btl_eager_limit - sizeof(mca_pml_dr_hdr_t); \
|
||||
if(size <= eager_limit) { \
|
||||
switch(sendreq->req_send.req_send_mode) { \
|
||||
case MCA_PML_BASE_SEND_SYNCHRONOUS: \
|
||||
|
@ -299,7 +299,7 @@ mca_pml_ob1_calc_weighted_length( mca_pml_ob1_com_btl_t *btls, int num_btls, siz
|
||||
mca_bml_base_btl_t* bml_btl = btls[i].bml_btl;
|
||||
size_t length = 0;
|
||||
if( OPAL_UNLIKELY(0 != length_left) ) {
|
||||
length = (length_left > bml_btl->btl_eager_limit)?
|
||||
length = (length_left > bml_btl->btl->btl_eager_limit)?
|
||||
((size_t)(size * (bml_btl->btl_weight / weight_total))) :
|
||||
length_left;
|
||||
|
||||
|
@ -62,7 +62,7 @@ size_t mca_pml_ob1_rdma_btls(
|
||||
mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma,
|
||||
(bml_endpoint->btl_rdma_index + n) % num_btls);
|
||||
mca_mpool_base_registration_t* reg = NULL;
|
||||
mca_mpool_base_module_t *btl_mpool = bml_btl->btl_mpool;
|
||||
mca_mpool_base_module_t *btl_mpool = bml_btl->btl->btl_mpool;
|
||||
|
||||
if(NULL != btl_mpool) {
|
||||
if(!mca_pml_ob1.leave_pinned) {
|
||||
@ -101,10 +101,9 @@ size_t mca_pml_ob1_rdma_btls(
|
||||
return num_btls_used;
|
||||
}
|
||||
|
||||
size_t mca_pml_ob1_rdma_pipeline_btls(
|
||||
mca_bml_base_endpoint_t* bml_endpoint,
|
||||
size_t size,
|
||||
mca_pml_ob1_com_btl_t* rdma_btls)
|
||||
size_t mca_pml_ob1_rdma_pipeline_btls( mca_bml_base_endpoint_t* bml_endpoint,
|
||||
size_t size,
|
||||
mca_pml_ob1_com_btl_t* rdma_btls )
|
||||
{
|
||||
int i, num_btls = mca_bml_base_btl_array_get_size(&bml_endpoint->btl_rdma);
|
||||
double weight_total = 0;
|
||||
@ -112,7 +111,7 @@ size_t mca_pml_ob1_rdma_pipeline_btls(
|
||||
for(i = 0; i < num_btls && i < mca_pml_ob1.max_rdma_per_request; i++) {
|
||||
rdma_btls[i].bml_btl =
|
||||
mca_bml_base_btl_array_get_next(&bml_endpoint->btl_rdma);
|
||||
if(rdma_btls[i].bml_btl->btl_mpool != NULL)
|
||||
if(NULL != rdma_btls[i].bml_btl->btl->btl_mpool)
|
||||
rdma_btls[i].btl_reg = NULL;
|
||||
else
|
||||
rdma_btls[i].btl_reg = &pml_ob1_dummy_reg;
|
||||
|
@ -687,9 +687,8 @@ void mca_pml_ob1_recv_request_matched_probe( mca_pml_ob1_recv_request_t* recvreq
|
||||
*
|
||||
*/
|
||||
|
||||
int mca_pml_ob1_recv_request_schedule_once(
|
||||
mca_pml_ob1_recv_request_t* recvreq,
|
||||
mca_bml_base_btl_t *start_bml_btl)
|
||||
int mca_pml_ob1_recv_request_schedule_once( mca_pml_ob1_recv_request_t* recvreq,
|
||||
mca_bml_base_btl_t *start_bml_btl )
|
||||
{
|
||||
mca_bml_base_btl_t* bml_btl;
|
||||
int num_tries = recvreq->req_rdma_cnt, num_fail = 0;
|
||||
@ -717,6 +716,7 @@ int mca_pml_ob1_recv_request_schedule_once(
|
||||
mca_btl_base_descriptor_t* dst;
|
||||
mca_btl_base_descriptor_t* ctl;
|
||||
mca_mpool_base_registration_t * reg = NULL;
|
||||
mca_btl_base_module_t* btl;
|
||||
int rc, rdma_idx;
|
||||
|
||||
if(prev_bytes_remaining == bytes_remaining) {
|
||||
@ -743,21 +743,20 @@ int mca_pml_ob1_recv_request_schedule_once(
|
||||
if(++recvreq->req_rdma_idx >= recvreq->req_rdma_cnt)
|
||||
recvreq->req_rdma_idx = 0;
|
||||
} while(!size);
|
||||
btl = bml_btl->btl;
|
||||
|
||||
/* makes sure that we don't exceed BTL max rdma size
|
||||
* if memory is not pinned already */
|
||||
if(NULL == reg &&
|
||||
bml_btl->btl_rdma_pipeline_frag_size != 0 &&
|
||||
size > bml_btl->btl_rdma_pipeline_frag_size) {
|
||||
size = bml_btl->btl_rdma_pipeline_frag_size;
|
||||
if( (NULL == reg) && (btl->btl_rdma_pipeline_frag_size != 0) &&
|
||||
(size > btl->btl_rdma_pipeline_frag_size)) {
|
||||
size = btl->btl_rdma_pipeline_frag_size;
|
||||
}
|
||||
|
||||
/* take lock to protect converter against concurrent access
|
||||
* from unpack */
|
||||
OPAL_THREAD_LOCK(&recvreq->lock);
|
||||
ompi_convertor_set_position(
|
||||
&recvreq->req_recv.req_base.req_convertor,
|
||||
&recvreq->req_rdma_offset);
|
||||
ompi_convertor_set_position( &recvreq->req_recv.req_base.req_convertor,
|
||||
&recvreq->req_rdma_offset );
|
||||
|
||||
/* prepare a descriptor for RDMA */
|
||||
mca_bml_base_prepare_dst(bml_btl, reg,
|
||||
@ -806,8 +805,7 @@ int mca_pml_ob1_recv_request_schedule_once(
|
||||
|
||||
if(!recvreq->req_ack_sent)
|
||||
recvreq->req_ack_sent = true;
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_PUT,
|
||||
recvreq->req_recv.req_base.req_proc);
|
||||
ob1_hdr_hton(hdr, MCA_PML_OB1_HDR_TYPE_PUT, recvreq->req_recv.req_base.req_proc);
|
||||
|
||||
PERUSE_TRACE_COMM_OMPI_EVENT( PERUSE_COMM_REQ_XFER_CONTINUE,
|
||||
&(recvreq->req_recv.req_base), size,
|
||||
|
@ -470,7 +470,7 @@ int mca_pml_ob1_send_request_start_copy( mca_pml_ob1_send_request_t* sendreq,
|
||||
size_t max_data = size;
|
||||
int rc;
|
||||
|
||||
if(NULL != bml_btl->btl_sendi) {
|
||||
if(NULL != bml_btl->btl->btl_sendi) {
|
||||
mca_pml_ob1_match_hdr_t match;
|
||||
match.hdr_common.hdr_flags = 0;
|
||||
match.hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH;
|
||||
@ -1020,9 +1020,8 @@ cannot_pack:
|
||||
size = range->range_btls[btl_idx].length;
|
||||
|
||||
/* makes sure that we don't exceed BTL max send size */
|
||||
if(bml_btl->btl_max_send_size != 0)
|
||||
{
|
||||
size_t max_send_size = bml_btl->btl_max_send_size -
|
||||
if(bml_btl->btl->btl_max_send_size != 0) {
|
||||
size_t max_send_size = bml_btl->btl->btl_max_send_size -
|
||||
sizeof(mca_pml_ob1_frag_hdr_t);
|
||||
|
||||
if (size > max_send_size) {
|
||||
|
@ -341,7 +341,8 @@ mca_pml_ob1_send_request_start_btl( mca_pml_ob1_send_request_t* sendreq,
|
||||
mca_bml_base_btl_t* bml_btl )
|
||||
{
|
||||
size_t size = sendreq->req_send.req_bytes_packed;
|
||||
size_t eager_limit = bml_btl->btl_eager_limit - sizeof(mca_pml_ob1_hdr_t);
|
||||
mca_btl_base_module_t* btl = bml_btl->btl;
|
||||
size_t eager_limit = btl->btl_eager_limit - sizeof(mca_pml_ob1_hdr_t);
|
||||
int rc;
|
||||
|
||||
if( OPAL_LIKELY(size <= eager_limit) ) {
|
||||
@ -365,8 +366,8 @@ mca_pml_ob1_send_request_start_btl( mca_pml_ob1_send_request_t* sendreq,
|
||||
}
|
||||
} else {
|
||||
size = eager_limit;
|
||||
if(OPAL_UNLIKELY(bml_btl->btl_rndv_eager_limit < eager_limit))
|
||||
size = bml_btl->btl_rndv_eager_limit;
|
||||
if(OPAL_UNLIKELY(btl->btl_rndv_eager_limit < eager_limit))
|
||||
size = btl->btl_rndv_eager_limit;
|
||||
if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) {
|
||||
rc = mca_pml_ob1_send_request_start_buffered(sendreq, bml_btl, size);
|
||||
} else if
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user