1
1
openmpi/opal/mca/mpool/base/mpool_base_tree.c
Ralph Castain 33ab928e1b ompi_proc_t size reduction: part 1
We currently save the hostname of a proc when we create the ompi_proc_t for it. This was originally done because the only method we had for discovering the host of a proc was to include that info in the modex, and we had to therefore store it somewhere proc-local. Obviously, this ccarried a memory penalty for storing all those strings, and so we added a "cutoff" parameter so that we wouldn't collect hostnames above a certain number of procs.

Unfortunately, this still results in an 8-byte/proc memory cost as we have a char* pointer in the opal_proc_t that is contained in the ompi_proc_t so that we can store the hostname of the other procs if we fall below the cutoff. At scale, this can consume a fair amount of memory.

With the switch to relying on PMIx, there is no longer a need to cache the proc hostnames. Using the "optional" feature of PMIx_Get, we restrict the retrieval to be purely proc-local - i.e., we retrieve the info either via shared memory or from within the proc-internal hash storage (depending upon the active PMIx components). Thus, the retrieval of a hostname is purely a local operation involving no communication.

All RM's are required to provide a complete hostname map of all procs at startup. Thus, we have full access to all hostnames without including them in a modex or having to cache them on each proc. This allows us to remove the char* pointer from the opal_proc_t, saving us 8-bytes/proc.

Unfortunately, PMIx_Get does not currently support the return of a static pointer to memory. Thus, even though PMIx has the hostname in its memory, it can only return a malloc'd version of it. I have therefore ensured that the return from opal_get_proc_hostname is consistently malloc'd and free'd wherever used. This shouldn't be a burden as the hostname is only used in one of two circumstances:

(a) in an error message
(b) in a verbose output for debugging purposes

Thus, there should be no performance penalty associated with the malloc/free requirement. PMIx will eventually be returning static pointers, and so we can eventually simplify this method and return a "const char*" - but as noted, this really isn't an issue even today.

Signed-off-by: Ralph Castain <rhc@pmix.org>
2020-03-23 12:49:44 -07:00

243 строки
7.0 KiB
C

/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.5A
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007 Voltaire. All rights reserved.
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2012-2015 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2015-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
* Copyright (c) 2020 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "opal/mca/mca.h"
#include "opal/util/show_help.h"
#include "opal/util/proc.h"
#include "opal/util/printf.h"
#include "opal/class/opal_rb_tree.h"
#include "mpool_base_tree.h"
static int num_leaks = 0;
static int max_mem_leaks = -1;
static char *leak_msg = NULL;
static int condition(void *value);
static void action(void *key, void *value);
static void opal_mca_mpool_base_tree_constructor(mca_mpool_base_tree_item_t *item) {
item->key = NULL;
}
OBJ_CLASS_INSTANCE(mca_mpool_base_tree_item_t, opal_free_list_item_t, opal_mca_mpool_base_tree_constructor, NULL);
/*
* use globals for the tree and the tree_item free list..
*/
opal_rb_tree_t mca_mpool_base_tree = {{0}};
opal_free_list_t mca_mpool_base_tree_item_free_list = {{{0}}};
static opal_mutex_t tree_lock;
/*
* simple minded compare function...
*/
int mca_mpool_base_tree_node_compare(void * key1, void * key2)
{
if(key1 < key2)
{
return -1;
}
else if(key1 > key2)
{
return 1;
}
else
{
return 0;
}
}
/*
* initialize the rb tree
*/
int mca_mpool_base_tree_init(void) {
int rc;
OBJ_CONSTRUCT(&mca_mpool_base_tree, opal_rb_tree_t);
OBJ_CONSTRUCT(&mca_mpool_base_tree_item_free_list, opal_free_list_t);
OBJ_CONSTRUCT(&tree_lock, opal_mutex_t);
rc = opal_free_list_init (&mca_mpool_base_tree_item_free_list,
sizeof(mca_mpool_base_tree_item_t),
opal_cache_line_size,
OBJ_CLASS(mca_mpool_base_tree_item_t),
0,opal_cache_line_size,
0, -1 , 4, NULL, 0, NULL, NULL, NULL);
if(OPAL_SUCCESS == rc) {
rc = opal_rb_tree_init(&mca_mpool_base_tree, mca_mpool_base_tree_node_compare);
}
return rc;
}
/*
*
*/
int mca_mpool_base_tree_fini(void)
{
OBJ_DESTRUCT(&mca_mpool_base_tree);
OBJ_DESTRUCT(&mca_mpool_base_tree_item_free_list);
OBJ_DESTRUCT(&tree_lock);
return OPAL_SUCCESS;
}
/*
* insert an item in the rb tree
*/
int mca_mpool_base_tree_insert(mca_mpool_base_tree_item_t* item) {
int rc;
OPAL_THREAD_LOCK(&tree_lock);
rc = opal_rb_tree_insert(&mca_mpool_base_tree, item->key, item);
OPAL_THREAD_UNLOCK(&tree_lock);
return rc;
}
/*
* remove an item from the rb tree
* Does not put the item back onto the free list. That
* must be done separately by calling mca_mpool_base_tree_item_put.
* This allows a caller to remove an item from the tree
* before safely cleaning up the item and only then returning it
* to the free list. If the item is returned to the free list too soon
* race conditions can occur
*
*/
int mca_mpool_base_tree_delete(mca_mpool_base_tree_item_t* item) {
int rc;
OPAL_THREAD_LOCK(&tree_lock);
rc = opal_rb_tree_delete(&mca_mpool_base_tree, item->key);
OPAL_THREAD_UNLOCK(&tree_lock);
return rc;
}
/**
* find the item in the rb tree
*/
mca_mpool_base_tree_item_t* mca_mpool_base_tree_find(void* base) {
mca_mpool_base_tree_item_t* item;
OPAL_THREAD_LOCK(&tree_lock);
item = (mca_mpool_base_tree_item_t*)opal_rb_tree_find(&mca_mpool_base_tree,
base);
OPAL_THREAD_UNLOCK(&tree_lock);
return item;
}
/*
* get a tree item from the free list
*/
mca_mpool_base_tree_item_t* mca_mpool_base_tree_item_get(void) {
return (mca_mpool_base_tree_item_t *)
opal_free_list_get (&mca_mpool_base_tree_item_free_list);
}
/*
* put an item back into the free list
*/
void mca_mpool_base_tree_item_put(mca_mpool_base_tree_item_t* item) {
opal_free_list_return (&mca_mpool_base_tree_item_free_list,
&item->super);
}
/*
* Print a show_help kind of message for an items still left in the
* tree
*/
void mca_mpool_base_tree_print(int show_up_to_mem_leaks)
{
/* If they asked to show 0 leaks, then don't show anything. */
if (0 == show_up_to_mem_leaks) {
return;
}
num_leaks = 0;
max_mem_leaks = show_up_to_mem_leaks;
opal_rb_tree_traverse(&mca_mpool_base_tree, condition, action);
if (0 == num_leaks) {
return;
}
if (num_leaks <= show_up_to_mem_leaks ||
show_up_to_mem_leaks < 0) {
opal_show_help("help-mpool-base.txt", "all mem leaks",
true, OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
opal_process_info.nodename,
getpid(), leak_msg);
} else {
int i = num_leaks - show_up_to_mem_leaks;
opal_show_help("help-mpool-base.txt", "some mem leaks",
true, OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
opal_process_info.nodename,
getpid(), leak_msg, i,
(i > 1) ? "s were" : " was",
(i > 1) ? "are" : "is");
}
free(leak_msg);
leak_msg = NULL;
}
/* Condition function for rb traversal */
static int condition(void *value)
{
return 1;
}
/* Action function for rb traversal */
static void action(void *key, void *value)
{
char *tmp;
mca_mpool_base_tree_item_t *item = (mca_mpool_base_tree_item_t *) value;
if( (++num_leaks <= max_mem_leaks) || (max_mem_leaks < 0) ) {
/* We know that we're supposed to make the first one; check on
successive items if we're supposed to catenate more
notices. */
if (NULL == leak_msg) {
opal_asprintf(&leak_msg, " %lu bytes at address 0x%lx",
(unsigned long) item->num_bytes,
(unsigned long) key);
} else {
opal_asprintf(&tmp, "%s\n %lu bytes at address 0x%lx",
leak_msg, (unsigned long) item->num_bytes,
(unsigned long) key);
free(leak_msg);
leak_msg = tmp;
}
}
}