
We currently save the hostname of a proc when we create the ompi_proc_t for it. This was originally done because the only method we had for discovering the host of a proc was to include that info in the modex, and we had to therefore store it somewhere proc-local. Obviously, this ccarried a memory penalty for storing all those strings, and so we added a "cutoff" parameter so that we wouldn't collect hostnames above a certain number of procs. Unfortunately, this still results in an 8-byte/proc memory cost as we have a char* pointer in the opal_proc_t that is contained in the ompi_proc_t so that we can store the hostname of the other procs if we fall below the cutoff. At scale, this can consume a fair amount of memory. With the switch to relying on PMIx, there is no longer a need to cache the proc hostnames. Using the "optional" feature of PMIx_Get, we restrict the retrieval to be purely proc-local - i.e., we retrieve the info either via shared memory or from within the proc-internal hash storage (depending upon the active PMIx components). Thus, the retrieval of a hostname is purely a local operation involving no communication. All RM's are required to provide a complete hostname map of all procs at startup. Thus, we have full access to all hostnames without including them in a modex or having to cache them on each proc. This allows us to remove the char* pointer from the opal_proc_t, saving us 8-bytes/proc. Unfortunately, PMIx_Get does not currently support the return of a static pointer to memory. Thus, even though PMIx has the hostname in its memory, it can only return a malloc'd version of it. I have therefore ensured that the return from opal_get_proc_hostname is consistently malloc'd and free'd wherever used. This shouldn't be a burden as the hostname is only used in one of two circumstances: (a) in an error message (b) in a verbose output for debugging purposes Thus, there should be no performance penalty associated with the malloc/free requirement. PMIx will eventually be returning static pointers, and so we can eventually simplify this method and return a "const char*" - but as noted, this really isn't an issue even today. Signed-off-by: Ralph Castain <rhc@pmix.org>
243 строки
7.0 KiB
C
243 строки
7.0 KiB
C
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
/*
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2004-2013 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.5A
|
|
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
|
* Copyright (c) 2007 Voltaire. All rights reserved.
|
|
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
|
* Copyright (c) 2012-2015 Los Alamos National Security, LLC.
|
|
* All rights reserved.
|
|
* Copyright (c) 2015-2018 Research Organization for Information Science
|
|
* and Technology (RIST). All rights reserved.
|
|
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
|
|
* Copyright (c) 2020 Intel, Inc. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "opal_config.h"
|
|
|
|
#include "opal/mca/mca.h"
|
|
#include "opal/util/show_help.h"
|
|
#include "opal/util/proc.h"
|
|
#include "opal/util/printf.h"
|
|
|
|
#include "opal/class/opal_rb_tree.h"
|
|
#include "mpool_base_tree.h"
|
|
|
|
|
|
static int num_leaks = 0;
|
|
static int max_mem_leaks = -1;
|
|
static char *leak_msg = NULL;
|
|
|
|
static int condition(void *value);
|
|
static void action(void *key, void *value);
|
|
|
|
static void opal_mca_mpool_base_tree_constructor(mca_mpool_base_tree_item_t *item) {
|
|
item->key = NULL;
|
|
}
|
|
|
|
OBJ_CLASS_INSTANCE(mca_mpool_base_tree_item_t, opal_free_list_item_t, opal_mca_mpool_base_tree_constructor, NULL);
|
|
|
|
/*
|
|
* use globals for the tree and the tree_item free list..
|
|
*/
|
|
opal_rb_tree_t mca_mpool_base_tree = {{0}};
|
|
opal_free_list_t mca_mpool_base_tree_item_free_list = {{{0}}};
|
|
static opal_mutex_t tree_lock;
|
|
|
|
/*
|
|
* simple minded compare function...
|
|
*/
|
|
int mca_mpool_base_tree_node_compare(void * key1, void * key2)
|
|
{
|
|
if(key1 < key2)
|
|
{
|
|
return -1;
|
|
}
|
|
else if(key1 > key2)
|
|
{
|
|
return 1;
|
|
}
|
|
else
|
|
{
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* initialize the rb tree
|
|
*/
|
|
int mca_mpool_base_tree_init(void) {
|
|
int rc;
|
|
OBJ_CONSTRUCT(&mca_mpool_base_tree, opal_rb_tree_t);
|
|
OBJ_CONSTRUCT(&mca_mpool_base_tree_item_free_list, opal_free_list_t);
|
|
OBJ_CONSTRUCT(&tree_lock, opal_mutex_t);
|
|
rc = opal_free_list_init (&mca_mpool_base_tree_item_free_list,
|
|
sizeof(mca_mpool_base_tree_item_t),
|
|
opal_cache_line_size,
|
|
OBJ_CLASS(mca_mpool_base_tree_item_t),
|
|
0,opal_cache_line_size,
|
|
0, -1 , 4, NULL, 0, NULL, NULL, NULL);
|
|
if(OPAL_SUCCESS == rc) {
|
|
rc = opal_rb_tree_init(&mca_mpool_base_tree, mca_mpool_base_tree_node_compare);
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*/
|
|
int mca_mpool_base_tree_fini(void)
|
|
{
|
|
OBJ_DESTRUCT(&mca_mpool_base_tree);
|
|
OBJ_DESTRUCT(&mca_mpool_base_tree_item_free_list);
|
|
OBJ_DESTRUCT(&tree_lock);
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
/*
|
|
* insert an item in the rb tree
|
|
*/
|
|
int mca_mpool_base_tree_insert(mca_mpool_base_tree_item_t* item) {
|
|
int rc;
|
|
|
|
OPAL_THREAD_LOCK(&tree_lock);
|
|
rc = opal_rb_tree_insert(&mca_mpool_base_tree, item->key, item);
|
|
OPAL_THREAD_UNLOCK(&tree_lock);
|
|
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
* remove an item from the rb tree
|
|
* Does not put the item back onto the free list. That
|
|
* must be done separately by calling mca_mpool_base_tree_item_put.
|
|
* This allows a caller to remove an item from the tree
|
|
* before safely cleaning up the item and only then returning it
|
|
* to the free list. If the item is returned to the free list too soon
|
|
* race conditions can occur
|
|
*
|
|
*/
|
|
int mca_mpool_base_tree_delete(mca_mpool_base_tree_item_t* item) {
|
|
int rc;
|
|
|
|
OPAL_THREAD_LOCK(&tree_lock);
|
|
rc = opal_rb_tree_delete(&mca_mpool_base_tree, item->key);
|
|
OPAL_THREAD_UNLOCK(&tree_lock);
|
|
|
|
return rc;
|
|
}
|
|
|
|
/**
|
|
* find the item in the rb tree
|
|
*/
|
|
mca_mpool_base_tree_item_t* mca_mpool_base_tree_find(void* base) {
|
|
mca_mpool_base_tree_item_t* item;
|
|
|
|
OPAL_THREAD_LOCK(&tree_lock);
|
|
item = (mca_mpool_base_tree_item_t*)opal_rb_tree_find(&mca_mpool_base_tree,
|
|
base);
|
|
OPAL_THREAD_UNLOCK(&tree_lock);
|
|
|
|
return item;
|
|
}
|
|
|
|
/*
|
|
* get a tree item from the free list
|
|
*/
|
|
mca_mpool_base_tree_item_t* mca_mpool_base_tree_item_get(void) {
|
|
return (mca_mpool_base_tree_item_t *)
|
|
opal_free_list_get (&mca_mpool_base_tree_item_free_list);
|
|
}
|
|
|
|
/*
|
|
* put an item back into the free list
|
|
*/
|
|
void mca_mpool_base_tree_item_put(mca_mpool_base_tree_item_t* item) {
|
|
opal_free_list_return (&mca_mpool_base_tree_item_free_list,
|
|
&item->super);
|
|
}
|
|
|
|
|
|
/*
|
|
* Print a show_help kind of message for an items still left in the
|
|
* tree
|
|
*/
|
|
void mca_mpool_base_tree_print(int show_up_to_mem_leaks)
|
|
{
|
|
/* If they asked to show 0 leaks, then don't show anything. */
|
|
if (0 == show_up_to_mem_leaks) {
|
|
return;
|
|
}
|
|
|
|
num_leaks = 0;
|
|
max_mem_leaks = show_up_to_mem_leaks;
|
|
opal_rb_tree_traverse(&mca_mpool_base_tree, condition, action);
|
|
if (0 == num_leaks) {
|
|
return;
|
|
}
|
|
|
|
if (num_leaks <= show_up_to_mem_leaks ||
|
|
show_up_to_mem_leaks < 0) {
|
|
opal_show_help("help-mpool-base.txt", "all mem leaks",
|
|
true, OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
|
|
opal_process_info.nodename,
|
|
getpid(), leak_msg);
|
|
} else {
|
|
int i = num_leaks - show_up_to_mem_leaks;
|
|
opal_show_help("help-mpool-base.txt", "some mem leaks",
|
|
true, OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
|
|
opal_process_info.nodename,
|
|
getpid(), leak_msg, i,
|
|
(i > 1) ? "s were" : " was",
|
|
(i > 1) ? "are" : "is");
|
|
}
|
|
free(leak_msg);
|
|
leak_msg = NULL;
|
|
}
|
|
|
|
|
|
/* Condition function for rb traversal */
|
|
static int condition(void *value)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
|
|
/* Action function for rb traversal */
|
|
static void action(void *key, void *value)
|
|
{
|
|
char *tmp;
|
|
mca_mpool_base_tree_item_t *item = (mca_mpool_base_tree_item_t *) value;
|
|
|
|
if( (++num_leaks <= max_mem_leaks) || (max_mem_leaks < 0) ) {
|
|
|
|
/* We know that we're supposed to make the first one; check on
|
|
successive items if we're supposed to catenate more
|
|
notices. */
|
|
if (NULL == leak_msg) {
|
|
opal_asprintf(&leak_msg, " %lu bytes at address 0x%lx",
|
|
(unsigned long) item->num_bytes,
|
|
(unsigned long) key);
|
|
} else {
|
|
opal_asprintf(&tmp, "%s\n %lu bytes at address 0x%lx",
|
|
leak_msg, (unsigned long) item->num_bytes,
|
|
(unsigned long) key);
|
|
free(leak_msg);
|
|
leak_msg = tmp;
|
|
}
|
|
}
|
|
}
|