9d92075e60
This commit rewrites much of the btl/self component to fix a long standing memory usage bug. Before this commit the prepare_src path would always allocate a max send fragment (256kB). This caused the rank to allocate 32 * 256k useless buffers from one send. This commit makes the following changes: - Add the MCA_BTL_FLAGS_GET flag by default. No reason not to set it. - Reduce the eager limit, max send size, buffers per allocation, and maximum buffer count per fragment size. These changes should have no noticible affect on performance but should greatly reduce the memory usage of the component. - Implement the sendi function. This should reduce self send latency somewhat. - Rewrite prepare_src to never allocate a eager or max send fragment for contiguous data. - add_procs needs to return something in the peer array for the proc self not just set the reachability bit. Now stores (void *) 1. - Various cleanups. Removed and unused file. Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
69 строки
2.4 KiB
C
69 строки
2.4 KiB
C
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
/*
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
|
|
* reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "opal_config.h"
|
|
#include "btl_self_frag.h"
|
|
|
|
static inline void mca_btl_self_frag_constructor(mca_btl_self_frag_t* frag)
|
|
{
|
|
frag->base.des_flags = 0;
|
|
frag->segments[0].seg_addr.pval = (void *) frag->data;
|
|
frag->segments[0].seg_len = (uint32_t) frag->size;
|
|
frag->base.des_segments = frag->segments;
|
|
frag->base.des_segment_count = 1;
|
|
}
|
|
|
|
static void mca_btl_self_frag_eager_constructor(mca_btl_self_frag_t* frag)
|
|
{
|
|
frag->list = &mca_btl_self_component.self_frags_eager;
|
|
frag->size = mca_btl_self.btl_eager_limit;
|
|
mca_btl_self_frag_constructor(frag);
|
|
}
|
|
|
|
static void mca_btl_self_frag_send_constructor(mca_btl_self_frag_t* frag)
|
|
{
|
|
frag->list = &mca_btl_self_component.self_frags_send;
|
|
frag->size = mca_btl_self.btl_max_send_size;
|
|
mca_btl_self_frag_constructor(frag);
|
|
}
|
|
|
|
static void mca_btl_self_frag_rdma_constructor(mca_btl_self_frag_t* frag)
|
|
{
|
|
frag->list = &mca_btl_self_component.self_frags_rdma;
|
|
frag->size = MCA_BTL_SELF_MAX_INLINE_SIZE;
|
|
mca_btl_self_frag_constructor(frag);
|
|
}
|
|
|
|
OBJ_CLASS_INSTANCE( mca_btl_self_frag_eager_t,
|
|
mca_btl_base_descriptor_t,
|
|
mca_btl_self_frag_eager_constructor,
|
|
NULL );
|
|
|
|
OBJ_CLASS_INSTANCE( mca_btl_self_frag_send_t,
|
|
mca_btl_base_descriptor_t,
|
|
mca_btl_self_frag_send_constructor,
|
|
NULL );
|
|
|
|
OBJ_CLASS_INSTANCE( mca_btl_self_frag_rdma_t,
|
|
mca_btl_base_descriptor_t,
|
|
mca_btl_self_frag_rdma_constructor,
|
|
NULL );
|