2004-08-02 00:24:22 +00:00
|
|
|
/*
|
2007-03-16 23:11:45 +00:00
|
|
|
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
2005-11-05 19:57:48 +00:00
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
2006-08-24 16:38:08 +00:00
|
|
|
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
2005-11-05 19:57:48 +00:00
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
2004-11-28 20:09:25 +00:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
2005-03-24 12:43:37 +00:00
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
2007-07-13 20:49:30 +00:00
|
|
|
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
2004-11-22 01:38:40 +00:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
2004-08-02 00:24:22 +00:00
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
2004-10-20 01:03:09 +00:00
|
|
|
#include "ompi_config.h"
|
2006-07-12 22:12:07 +00:00
|
|
|
#if HAVE_UNISTD_H
|
|
|
|
#include <unistd.h>
|
|
|
|
#endif /* HAVE_UNISTD_H*/
|
2007-07-13 20:49:30 +00:00
|
|
|
#ifdef HAVE_STDLIB_H
|
|
|
|
#include <stdlib.h>
|
|
|
|
#endif /* HAVE_STDLIB_H */
|
|
|
|
#include <errno.h>
|
2005-07-03 23:31:27 +00:00
|
|
|
#include "opal/util/output.h"
|
2006-02-12 01:33:29 +00:00
|
|
|
#include "opal/mca/base/base.h"
|
|
|
|
#include "opal/mca/base/mca_base_param.h"
|
|
|
|
#include "ompi/mca/allocator/base/base.h"
|
2004-06-16 20:01:19 +00:00
|
|
|
#include "mpool_sm.h"
|
2006-02-12 01:33:29 +00:00
|
|
|
#include "ompi/mca/common/sm/common_sm_mmap.h"
|
|
|
|
#include "orte/util/proc_info.h"
|
|
|
|
#include "orte/util/sys_info.h"
|
2006-09-26 16:02:31 +00:00
|
|
|
#include "ompi/proc/proc.h"
|
2004-06-16 15:41:29 +00:00
|
|
|
|
2004-08-02 00:24:22 +00:00
|
|
|
/*
|
|
|
|
* Local functions
|
|
|
|
*/
|
|
|
|
static int mca_mpool_sm_open(void);
|
2006-07-12 22:12:07 +00:00
|
|
|
static int mca_mpool_sm_close( void );
|
2005-06-21 17:10:28 +00:00
|
|
|
static mca_mpool_base_module_t* mca_mpool_sm_init(
|
2005-06-23 15:53:51 +00:00
|
|
|
struct mca_mpool_base_resources_t* resources);
|
2004-06-17 20:33:52 +00:00
|
|
|
|
2004-08-02 00:24:22 +00:00
|
|
|
mca_mpool_sm_component_t mca_mpool_sm_component = {
|
2004-06-16 15:41:29 +00:00
|
|
|
{
|
2004-08-02 00:24:22 +00:00
|
|
|
/* First, the mca_base_component_t struct containing meta
|
|
|
|
information about the component itself */
|
|
|
|
|
|
|
|
{
|
|
|
|
/* Indicate that we are a mpool v1.0.0 component (which also
|
|
|
|
implies a specific MCA version) */
|
|
|
|
|
|
|
|
MCA_MPOOL_BASE_VERSION_1_0_0,
|
|
|
|
|
|
|
|
"sm", /* MCA component name */
|
Major simplifications to component versioning:
- After long discussions and ruminations on how we run components in
LAM/MPI, made the decision that, by default, all components included
in Open MPI will use the version number of their parent project
(i.e., OMPI or ORTE). They are certaint free to use a different
number, but this simplification makes the common cases easy:
- components are only released when the parent project is released
- it is easy (trivial?) to distinguish which version component goes
with with version of the parent project
- removed all autogen/configure code for templating the version .h
file in components
- made all ORTE components use ORTE_*_VERSION for version numbers
- made all OMPI components use OMPI_*_VERSION for version numbers
- removed all VERSION files from components
- configure now displays OPAL, ORTE, and OMPI version numbers
- ditto for ompi_info
- right now, faking it -- OPAL and ORTE and OMPI will always have the
same version number (i.e., they all come from the same top-level
VERSION file). But this paves the way for the Great Configure
Reorganization, where, among other things, each project will have
its own version number.
So all in all, we went from a boatload of version numbers to
[effectively] three. That's pretty good. :-)
This commit was SVN r6344.
2005-07-04 20:12:36 +00:00
|
|
|
OMPI_MAJOR_VERSION, /* MCA component major version */
|
|
|
|
OMPI_MINOR_VERSION, /* MCA component minor version */
|
|
|
|
OMPI_RELEASE_VERSION, /* MCA component release version */
|
2004-08-02 00:24:22 +00:00
|
|
|
mca_mpool_sm_open, /* component open */
|
2006-07-12 22:12:07 +00:00
|
|
|
mca_mpool_sm_close
|
2004-08-02 00:24:22 +00:00
|
|
|
},
|
|
|
|
|
|
|
|
/* Next the MCA v1.0.0 component meta data */
|
|
|
|
|
|
|
|
{
|
2007-03-16 23:11:45 +00:00
|
|
|
/* The component is not checkpoint ready */
|
|
|
|
MCA_BASE_METADATA_PARAM_NONE
|
2004-08-02 00:24:22 +00:00
|
|
|
},
|
|
|
|
|
|
|
|
mca_mpool_sm_init
|
2004-06-16 15:41:29 +00:00
|
|
|
}
|
|
|
|
};
|
2004-08-02 00:24:22 +00:00
|
|
|
|
2007-07-13 20:49:30 +00:00
|
|
|
static char *max_size_param, *min_size_param, *peer_size_param;
|
2007-08-03 15:43:02 +00:00
|
|
|
static long default_max, default_min, default_peer;
|
2004-06-16 15:41:29 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* component open/close/init function
|
|
|
|
*/
|
2004-08-02 00:24:22 +00:00
|
|
|
static int mca_mpool_sm_open(void)
|
2004-06-16 15:41:29 +00:00
|
|
|
{
|
2007-07-13 20:49:30 +00:00
|
|
|
int value = 0;
|
2007-08-03 15:43:02 +00:00
|
|
|
char *size_str = NULL;
|
|
|
|
|
2007-07-13 20:49:30 +00:00
|
|
|
default_max = 512*1024*1024;
|
|
|
|
default_min = 128*1024*1024;
|
|
|
|
default_peer = 32*1024*1024;
|
|
|
|
|
2004-08-02 00:24:22 +00:00
|
|
|
/* register SM component parameters */
|
2006-09-26 16:02:31 +00:00
|
|
|
mca_base_param_reg_string(&mca_mpool_sm_component.super.mpool_version,
|
|
|
|
"allocator",
|
|
|
|
"Name of allocator component to use with sm mpool",
|
|
|
|
false, false,
|
|
|
|
"bucket",
|
|
|
|
&mca_mpool_sm_component.sm_allocator_name);
|
|
|
|
|
2007-08-07 14:40:13 +00:00
|
|
|
/* register values as string instead of int. A string-converted
|
|
|
|
* signed long int allows the max_size or the sm_size
|
|
|
|
* to be set up to 2GB-1 for 32 bit and much greater for 64 bit. */
|
2007-08-03 15:43:02 +00:00
|
|
|
asprintf(&size_str, "%ld", default_max);
|
2007-07-13 20:49:30 +00:00
|
|
|
mca_base_param_reg_string(&mca_mpool_sm_component.super.mpool_version,
|
2006-09-26 16:02:31 +00:00
|
|
|
"max_size",
|
|
|
|
"Maximum size of the sm mpool shared memory file",
|
2007-07-13 20:49:30 +00:00
|
|
|
false, false, size_str, &max_size_param);
|
2007-08-03 15:43:02 +00:00
|
|
|
free(size_str);
|
|
|
|
asprintf(&size_str, "%ld", default_min);
|
2007-07-13 20:49:30 +00:00
|
|
|
mca_base_param_reg_string(&mca_mpool_sm_component.super.mpool_version,
|
2006-09-26 16:02:31 +00:00
|
|
|
"min_size",
|
|
|
|
"Minimum size of the sm mpool shared memory file",
|
2007-07-13 20:49:30 +00:00
|
|
|
false, false, size_str, &min_size_param);
|
2007-08-03 15:43:02 +00:00
|
|
|
free(size_str);
|
|
|
|
asprintf(&size_str, "%ld", default_peer);
|
2007-07-13 20:49:30 +00:00
|
|
|
mca_base_param_reg_string(&mca_mpool_sm_component.super.mpool_version,
|
2006-09-26 16:02:31 +00:00
|
|
|
"per_peer_size",
|
|
|
|
"Size (in bytes) to allocate per local peer in "
|
|
|
|
"the sm mpool shared memory file, bounded by "
|
|
|
|
"min_size and max_size",
|
2007-07-13 20:49:30 +00:00
|
|
|
false, false, size_str, &peer_size_param);
|
2007-08-03 15:43:02 +00:00
|
|
|
free(size_str);
|
2007-07-13 20:49:30 +00:00
|
|
|
mca_base_param_reg_int(&mca_mpool_sm_component.super.mpool_version,
|
|
|
|
"verbose",
|
|
|
|
"Enable verbose output for mpool sm component",
|
|
|
|
false, false, 0, &value);
|
|
|
|
if (value != 0) {
|
|
|
|
mca_mpool_sm_component.verbose = opal_output_open(NULL);
|
|
|
|
} else {
|
|
|
|
mca_mpool_sm_component.verbose = -1;
|
|
|
|
}
|
2006-09-26 16:02:31 +00:00
|
|
|
mca_mpool_sm_component.sm_size = 0;
|
|
|
|
|
2004-06-16 15:41:29 +00:00
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2006-07-12 22:12:07 +00:00
|
|
|
static int mca_mpool_sm_close( void )
|
|
|
|
{
|
|
|
|
if( NULL != mca_common_sm_mmap ) {
|
|
|
|
if( OMPI_SUCCESS == mca_common_sm_mmap_fini( mca_common_sm_mmap ) ) {
|
|
|
|
unlink( mca_common_sm_mmap->map_path );
|
|
|
|
}
|
2006-07-18 18:42:30 +00:00
|
|
|
OBJ_RELEASE( mca_common_sm_mmap );
|
2006-07-12 22:12:07 +00:00
|
|
|
}
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
2004-06-16 15:41:29 +00:00
|
|
|
|
2005-06-21 17:10:28 +00:00
|
|
|
static mca_mpool_base_module_t* mca_mpool_sm_init(
|
2005-06-23 15:53:51 +00:00
|
|
|
struct mca_mpool_base_resources_t* resources)
|
2004-06-16 15:41:29 +00:00
|
|
|
{
|
2004-10-21 22:40:25 +00:00
|
|
|
char *file_name;
|
2005-04-26 05:27:38 +00:00
|
|
|
int len;
|
2005-05-31 17:06:55 +00:00
|
|
|
mca_mpool_sm_module_t* mpool_module;
|
2007-07-13 20:49:30 +00:00
|
|
|
mca_allocator_base_component_t* allocator_component;
|
2007-08-03 15:43:02 +00:00
|
|
|
long max_size, min_size, peer_size;
|
2006-09-26 16:02:31 +00:00
|
|
|
ompi_proc_t **procs;
|
|
|
|
size_t num_all_procs, i, num_local_procs = 0;
|
2007-07-13 20:49:30 +00:00
|
|
|
|
2007-02-01 17:18:35 +00:00
|
|
|
/* README: this needs to change if procs in different jobs (even
|
|
|
|
spawned ones) are to talk using shared memory */
|
|
|
|
procs = ompi_proc_world(&num_all_procs);
|
2006-09-26 16:02:31 +00:00
|
|
|
for (i = 0 ; i < num_all_procs ; ++i) {
|
|
|
|
if (procs[i]->proc_flags & OMPI_PROC_FLAG_LOCAL) {
|
|
|
|
num_local_procs++;
|
|
|
|
}
|
|
|
|
}
|
2007-07-30 14:44:37 +00:00
|
|
|
free(procs);
|
|
|
|
|
2007-07-13 20:49:30 +00:00
|
|
|
/* parse the max, min and peer sizes, and validate them */
|
|
|
|
/* absolutely necessary to reset errno each time */
|
|
|
|
errno = 0;
|
2007-08-03 15:43:02 +00:00
|
|
|
max_size = strtol(max_size_param, (char **)NULL, 10);
|
2007-07-13 20:49:30 +00:00
|
|
|
if (errno == ERANGE) {
|
2007-08-03 15:43:02 +00:00
|
|
|
opal_output(0, "mca_mpool_sm_init: max_size overflows! set to default (%ld)", default_max);
|
2007-07-13 20:49:30 +00:00
|
|
|
max_size = default_max;
|
|
|
|
} else if (errno == EINVAL) {
|
2007-08-03 15:43:02 +00:00
|
|
|
opal_output(0, "mca_mpool_sm_init: invalid max_size entered. set it to (%ld)", default_max);
|
2007-07-13 20:49:30 +00:00
|
|
|
max_size = default_max;
|
|
|
|
}
|
|
|
|
|
|
|
|
errno = 0;
|
2007-08-03 15:43:02 +00:00
|
|
|
min_size = strtol(min_size_param, (char **)NULL, 10);
|
2007-07-13 20:49:30 +00:00
|
|
|
if (errno == ERANGE) {
|
2007-08-03 15:43:02 +00:00
|
|
|
opal_output(0, "mca_mpool_sm_init: min_size overflows! set to default (%ld)", default_min);
|
2007-07-13 20:49:30 +00:00
|
|
|
min_size = default_min;
|
|
|
|
} else if (errno == EINVAL) {
|
2007-08-03 15:43:02 +00:00
|
|
|
opal_output(0, "mca_mpool_sm_init: invalid min_size entered. set it to (%ld)", default_min);
|
2007-07-13 20:49:30 +00:00
|
|
|
min_size = default_min;
|
|
|
|
}
|
|
|
|
|
|
|
|
errno = 0;
|
2007-08-03 15:43:02 +00:00
|
|
|
peer_size = strtol(peer_size_param, (char **)NULL, 10);
|
2007-07-13 20:49:30 +00:00
|
|
|
if (errno == ERANGE) {
|
2007-08-03 15:43:02 +00:00
|
|
|
opal_output(0, "mca_mpool_sm_init: peer_size overflows! set to default (%ld)", default_peer);
|
2007-07-13 20:49:30 +00:00
|
|
|
peer_size = default_peer;
|
|
|
|
} else if (errno == EINVAL) {
|
2007-08-03 15:43:02 +00:00
|
|
|
opal_output(0, "mca_mpool_sm_init: invalid peer_size entered. set it to (%ld)", default_peer);
|
2007-07-13 20:49:30 +00:00
|
|
|
peer_size = default_peer;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* more checks... */
|
2006-09-26 16:02:31 +00:00
|
|
|
if (min_size > max_size) {
|
2007-08-03 15:43:02 +00:00
|
|
|
opal_output(0, "mca_mpool_sm_init: adjusting max_size to be min_size (%ld)",
|
2006-09-26 16:02:31 +00:00
|
|
|
min_size);
|
|
|
|
max_size = min_size;
|
|
|
|
}
|
|
|
|
|
2007-07-13 20:49:30 +00:00
|
|
|
/* sm_size is a product of peer_size * num_local_procs. To prevent the
|
|
|
|
* sm_size from overflowing SIZE_MAX, we first calculate the quotient.
|
|
|
|
* If quotient is less than the peer_size, it means the product
|
|
|
|
* (peer_size * num_local_procs) is going to overflow SIZE_MAX, then we'll
|
|
|
|
* set sm_size to max_size. */
|
2007-08-03 15:43:02 +00:00
|
|
|
if ((double)LONG_MAX / num_local_procs < peer_size) {
|
2007-07-13 20:49:30 +00:00
|
|
|
/* enable verbose would show if sm_size overflows */
|
|
|
|
opal_output(mca_mpool_sm_component.verbose,
|
2007-08-03 15:43:02 +00:00
|
|
|
"mca_mpool_sm_init: sm_size overflows, set sm_size to max_size (%ld)",
|
|
|
|
LONG_MAX);
|
2007-07-13 20:49:30 +00:00
|
|
|
mca_mpool_sm_component.sm_size = max_size;
|
|
|
|
} else {
|
|
|
|
mca_mpool_sm_component.sm_size = peer_size * num_local_procs;
|
|
|
|
}
|
|
|
|
|
2007-08-03 15:43:02 +00:00
|
|
|
if (min_size > mca_mpool_sm_component.sm_size) {
|
2006-09-26 16:02:31 +00:00
|
|
|
mca_mpool_sm_component.sm_size = min_size;
|
|
|
|
}
|
2007-08-03 15:43:02 +00:00
|
|
|
if (max_size < mca_mpool_sm_component.sm_size) {
|
2006-09-26 16:02:31 +00:00
|
|
|
mca_mpool_sm_component.sm_size = max_size;
|
|
|
|
}
|
|
|
|
|
2005-06-07 02:09:57 +00:00
|
|
|
allocator_component = mca_allocator_component_lookup(
|
2004-08-02 00:24:22 +00:00
|
|
|
mca_mpool_sm_component.sm_allocator_name);
|
2004-06-16 20:01:19 +00:00
|
|
|
|
2007-07-13 20:49:30 +00:00
|
|
|
/* if specified allocator cannot be loaded - look for an alternative */
|
2004-06-16 20:01:19 +00:00
|
|
|
if(NULL == allocator_component) {
|
2005-07-03 16:22:16 +00:00
|
|
|
if(opal_list_get_size(&mca_allocator_base_components) == 0) {
|
2004-08-02 00:24:22 +00:00
|
|
|
mca_base_component_list_item_t* item = (mca_base_component_list_item_t*)
|
2005-07-03 16:22:16 +00:00
|
|
|
opal_list_get_first(&mca_allocator_base_components);
|
2004-08-02 00:24:22 +00:00
|
|
|
allocator_component = (mca_allocator_base_component_t*)item->cli_component;
|
2005-07-03 23:31:27 +00:00
|
|
|
opal_output(0, "mca_mpool_sm_init: unable to locate allocator: %s - using %s\n",
|
2004-08-02 00:24:22 +00:00
|
|
|
mca_mpool_sm_component.sm_allocator_name, allocator_component->allocator_version.mca_component_name);
|
2004-06-16 20:01:19 +00:00
|
|
|
} else {
|
2005-07-03 23:31:27 +00:00
|
|
|
opal_output(0, "mca_mpool_sm_init: unable to locate allocator: %s\n",
|
2004-08-02 00:24:22 +00:00
|
|
|
mca_mpool_sm_component.sm_allocator_name);
|
2004-06-16 20:01:19 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
2007-07-13 20:49:30 +00:00
|
|
|
|
2005-05-31 17:06:55 +00:00
|
|
|
mpool_module = (mca_mpool_sm_module_t*)malloc(sizeof(mca_mpool_sm_module_t));
|
|
|
|
mca_mpool_sm_module_init(mpool_module);
|
2006-07-12 22:12:07 +00:00
|
|
|
|
2004-06-16 20:01:19 +00:00
|
|
|
/* create initial shared memory mapping */
|
2006-08-24 16:38:08 +00:00
|
|
|
len = asprintf( &file_name, "%s"OPAL_PATH_SEP"shared_mem_pool.%s",
|
2006-01-19 07:09:44 +00:00
|
|
|
orte_process_info.job_session_dir,
|
|
|
|
orte_system_info.nodename );
|
2004-10-21 22:40:25 +00:00
|
|
|
if ( 0 > len ) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2007-07-13 20:49:30 +00:00
|
|
|
|
|
|
|
opal_output(mca_mpool_sm_component.verbose,
|
2007-08-03 15:43:02 +00:00
|
|
|
"mca_mpool_sm_init: shared memory size used: (%ld)",
|
2007-07-13 20:49:30 +00:00
|
|
|
mca_mpool_sm_component.sm_size);
|
2004-10-21 22:40:25 +00:00
|
|
|
|
2004-08-04 17:22:16 +00:00
|
|
|
if(NULL ==
|
2004-08-06 19:35:57 +00:00
|
|
|
(mca_common_sm_mmap =
|
|
|
|
mca_common_sm_mmap_init(mca_mpool_sm_component.sm_size,
|
2004-10-21 22:40:25 +00:00
|
|
|
file_name,sizeof(mca_common_sm_mmap_t), 8 )
|
2004-08-04 17:22:16 +00:00
|
|
|
))
|
|
|
|
{
|
2006-01-19 07:09:44 +00:00
|
|
|
opal_output(0, "mca_mpool_sm_init: unable to create shared memory mapping (%s)", file_name);
|
2004-10-21 22:40:25 +00:00
|
|
|
free(file_name);
|
2004-06-16 20:01:19 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
2004-10-21 22:40:25 +00:00
|
|
|
free(file_name);
|
2004-06-16 20:01:19 +00:00
|
|
|
|
2006-07-12 22:12:07 +00:00
|
|
|
/* setup allocator */
|
2005-05-31 17:06:55 +00:00
|
|
|
mpool_module->sm_allocator =
|
|
|
|
allocator_component->allocator_init(true,
|
2005-06-21 17:10:28 +00:00
|
|
|
mca_common_sm_mmap_seg_alloc, NULL, NULL);
|
2005-05-31 17:06:55 +00:00
|
|
|
if(NULL == mpool_module->sm_allocator) {
|
2005-07-03 23:31:27 +00:00
|
|
|
opal_output(0, "mca_mpool_sm_init: unable to initialize allocator");
|
2004-06-16 20:01:19 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
2005-05-31 17:06:55 +00:00
|
|
|
|
|
|
|
return &mpool_module->super;
|
2004-06-16 15:41:29 +00:00
|
|
|
}
|
2005-05-31 17:06:55 +00:00
|
|
|
|