1
1
openmpi/ompi/mca/bml/base/bml_base_init.c
George Bosilca 00d24bf8ab Scalability patch, or slim-fast effect #1. All BML structures just
got a whole lot smaller, decreasing the memory footprint of the
running application. How much it's a good question. Here is a
breakdown:

- in mca_bml_base_endpoint_t: 3 *size_t + 1 * uint32_t
- in mca_bml_base_btl_t: 1 * int + 1 * double - 1 * float
                         + 6 * size_t + 9 * (void*)

The decrease in mca_bml_base_endpoint_t is for each peer and the
decrease in mca_bml_base_btl_t is for each BTL for each peer.
So, if we consider the most convenient case where there is only
one network between all peers, this decrease the memory foot print
per peer by
9*size_t + 9*(void*) + 2 * int32_t + 1 * double - 1 * float.
On a 64 bits machine this will be 156 bytes per peer.

Now we access all these fields directly from the underlying BTL
structure, and as this structure is common to multiple BML endpoint,
we are a lot more cache friendly. Even if this do not improve the
latency, it makes the SM performance graph a lot smoother.

This commit was SVN r19659.
2008-09-30 21:02:37 +00:00

94 строки
3.3 KiB
C

/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/mca/bml/base/base.h"
#include "opal/mca/base/base.h"
#include "opal/mca/mca.h"
int mca_bml_base_output = -1;
mca_bml_base_module_t mca_bml = {
NULL, /* bml_component */
NULL, /* bml_add_procs */
NULL, /* bml_del_procs */
NULL, /* bml_add_btl */
NULL, /* bml_del_btl */
NULL, /* bml_del_proc_btl */
NULL, /* bml_register */
NULL, /* bml_register_error */
NULL, /* bml_finalize*/
NULL /* FT event */
};
mca_bml_base_component_t mca_bml_component;
static bool init_called = false;
bool
mca_bml_base_inited(void)
{
return init_called;
}
int mca_bml_base_init( bool enable_progress_threads,
bool enable_mpi_threads) {
opal_list_item_t *item = NULL;
mca_bml_base_component_t *component = NULL, *best_component = NULL;
mca_bml_base_module_t *module = NULL, *best_module = NULL;
int priority = 0, best_priority = -1;
mca_base_component_list_item_t *cli = NULL;
init_called = true;
for (item = opal_list_get_first(&mca_bml_base_components_available);
opal_list_get_end(&mca_bml_base_components_available) != item;
item = opal_list_get_next(item)) {
cli = (mca_base_component_list_item_t*) item;
component = (mca_bml_base_component_t*) cli->cli_component;
if(NULL == component->bml_init) {
opal_output_verbose( 10, mca_bml_base_output,
"select: no init function; ignoring component %s",
component->bml_version.mca_component_name );
continue;
}
module = component->bml_init(&priority,
enable_progress_threads,
enable_mpi_threads);
if(NULL == module) {
continue;
}
if(priority > best_priority) {
best_priority = priority;
best_component = component;
best_module = module;
}
}
if(NULL == best_module) {
return OMPI_SUCCESS;
}
else {
mca_bml_component = *best_component;
mca_bml = *best_module;
return mca_base_components_close(mca_bml_base_output,
&mca_bml_base_components_available,
(mca_base_component_t*) best_component);
}
}