New POSIX shared memory component and other common sm enhancements.
NOTE: mmap is still the default. Some highlights: o Silent component failover. o The sysv component will only be queried for selection if it is placed before the mmap component (for example, -mca mpi_common_sm sysv,posix,mmap). In the default case, sysv will never be queried/selected. o Per some on-list discussion, now unlinking mmaped file in both mmap and posix components (see: "System V Shared Memory for Open MPI: Request for Community Input and Testing" thread). o Assuming local process homogeneity with respect to all utilized shared memory facilities. That is, if one local process deems a particular shared memory facility acceptable, then ALL local processes should be able to utilize that facility. As it stands, this is an important point because one process dictates to all other local processes which common sm component will be selected based on its own, local run-time test. o Addressed some of George's code reuse concerns. This commit was SVN r23633.
Этот коммит содержится в:
родитель
7a301bc417
Коммит
3b162593e6
10
README
10
README
@ -941,9 +941,13 @@ for a full list); a summary of the more commonly used ones follows:
|
||||
--disable-<name> form; this form may be slightly more compact if
|
||||
disabling multiple packages.
|
||||
|
||||
--enable-sysv
|
||||
Enable System V (sysv) shared memory support. By default, System V
|
||||
shared memory support is disabled.
|
||||
--disable-sysv
|
||||
Disable System V (sysv) shared memory support. By default, System V
|
||||
shared memory support is enabled.
|
||||
|
||||
--disable-posix-shmem
|
||||
Disable POSIX shared memory support. By default, POSIX shared memory support
|
||||
is enabled.
|
||||
|
||||
--with-wrapper-cflags=<cflags>
|
||||
--with-wrapper-cxxflags=<cxxflags>
|
||||
|
@ -10,6 +10,8 @@
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2010 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -45,18 +47,20 @@ EXTRA_DIST = .windows
|
||||
|
||||
headers = \
|
||||
common_sm.h \
|
||||
common_sm_rml.h \
|
||||
common_sm_mmap.h
|
||||
|
||||
# Source files
|
||||
|
||||
sources = \
|
||||
common_sm.c \
|
||||
common_sm_rml.c \
|
||||
common_sm_mmap.c
|
||||
|
||||
# Only build the Windows support if we're building on windows, but
|
||||
# always include the files in the tarball.
|
||||
|
||||
if MCA_common_sm_windows
|
||||
if COMMON_SM_BUILD_WINDOWS
|
||||
headers += common_sm_windows.h
|
||||
sources += common_sm_windows.c
|
||||
endif
|
||||
@ -64,11 +68,19 @@ endif
|
||||
# Only build the SYSV support if we have the right stuff, but
|
||||
# always include the files in the tarball.
|
||||
|
||||
if MCA_common_sm_sysv
|
||||
if COMMON_SM_BUILD_SYSV
|
||||
headers += common_sm_sysv.h
|
||||
sources += common_sm_sysv.c
|
||||
endif
|
||||
|
||||
# Only build the POSIX support if we have the right stuff, but
|
||||
# always include the files in the tarball.
|
||||
|
||||
if COMMON_SM_BUILD_POSIX
|
||||
headers += common_sm_posix.h
|
||||
sources += common_sm_posix.c
|
||||
endif
|
||||
|
||||
# Help file
|
||||
|
||||
dist_pkgdata_DATA = help-mpi-common-sm.txt
|
||||
|
@ -5,7 +5,7 @@
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
|
||||
* Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
@ -14,9 +14,9 @@
|
||||
* Copyright (c) 2010 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
@ -26,10 +26,17 @@
|
||||
#include <string.h>
|
||||
#endif
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/mca/dpm/dpm.h"
|
||||
|
||||
#include "common_sm_rml.h"
|
||||
#include "common_sm_mmap.h"
|
||||
#if MCA_COMMON_SM_SYSV
|
||||
#include "common_sm_sysv.h"
|
||||
@ -37,81 +44,297 @@
|
||||
#if MCA_COMMON_SM_WINDOWS
|
||||
#include "common_sm_windows.h"
|
||||
#endif /* MCA_COMMON_SM_WINDOWS */
|
||||
#if MCA_COMMON_SM_POSIX
|
||||
#include "common_sm_posix.h"
|
||||
#endif /* MCA_COMMON_SM_POSIX */
|
||||
|
||||
static int initialized = 0;
|
||||
static int sysv_index = -1;
|
||||
static char **sm_argv = NULL;
|
||||
/* let mmap be the default selection */
|
||||
static char *sm_params = "mmap";
|
||||
static mca_common_sm_init_fn_t sm_init = NULL;
|
||||
static mca_common_sm_init_group_fn_t sm_init_group = NULL;
|
||||
static mca_common_sm_seg_alloc_fn_t sm_seg_alloc = NULL;
|
||||
static mca_common_sm_fini_fn_t sm_fini = NULL;
|
||||
static char sm_all_buff[OPAL_PATH_MAX];
|
||||
/**
|
||||
* ASSUMING local proc homogeneity with respect to all utilized shared memory
|
||||
* facilities. that is, if one local proc deems a particular shared memory
|
||||
* facility acceptable, then ALL local procs should be able to utilize that
|
||||
* facility. as it stands, this is an important point because one process
|
||||
* dictates to all other local procs which common sm component will be selected
|
||||
* based on its own, local run-time test.
|
||||
*/
|
||||
|
||||
mca_common_sm_module_t *mca_common_sm_module = NULL;
|
||||
static bool initialized = false;
|
||||
static int sysv_index = -1;
|
||||
static int posix_index = -1;
|
||||
static int common_sm_index = -1;
|
||||
static char **sm_argv = NULL;
|
||||
static char *sm_params = NULL;
|
||||
static mca_common_sm_init_fn_t sm_init = NULL;
|
||||
static mca_common_sm_seg_alloc_fn_t sm_seg_alloc = NULL;
|
||||
static mca_common_sm_fini_fn_t sm_fini = NULL;
|
||||
/* should be more than enough to store all common sm component names */
|
||||
static char sm_default[32];
|
||||
|
||||
/**
|
||||
* lock to protect multiple instances of query_sm_components()
|
||||
* from being invoked simultaneously (because of rml usage).
|
||||
*/
|
||||
static opal_mutex_t mutex;
|
||||
|
||||
/* common shared memory component information */
|
||||
typedef struct
|
||||
{
|
||||
/* flag indicating whether or not the component is available */
|
||||
bool avail;
|
||||
/* component name */
|
||||
char *sm_name;
|
||||
} mca_common_sm_info_t;
|
||||
/**
|
||||
* NOTE:
|
||||
* o array position dictates the default order in which
|
||||
* the common shared memory components will be queried.
|
||||
* o first component successfully queried gets selected.
|
||||
* o sm_name format: {component availability, "component name,"}
|
||||
*
|
||||
* if you change the order of sm_avail_table below,
|
||||
* don't forget to update mca_common_sm_comp_index_map_t.
|
||||
*
|
||||
* placing mmap before sysv in the list prevents sysv from ever being selected
|
||||
* (in the default case). this is because, at least for now, mmap's selection
|
||||
* query always succeeds. that is, sysv must be explicitly requested.
|
||||
* NOTE: mmap is the default for now.
|
||||
*
|
||||
* {component availability, component name}
|
||||
*/
|
||||
static const mca_common_sm_info_t sm_avail_table[] =
|
||||
{
|
||||
{true , "mmap," }, /* assume mmap is always available */
|
||||
{(bool)MCA_COMMON_SM_POSIX, "posix,"},
|
||||
{(bool)MCA_COMMON_SM_SYSV , "sysv," },
|
||||
{false , NULL } /* MUST BE LAST ITEM */
|
||||
};
|
||||
/* component index enum */
|
||||
typedef enum
|
||||
{
|
||||
MCA_COMMON_SM_COMP_INDEX_MMAP = 0,
|
||||
MCA_COMMON_SM_COMP_INDEX_POSIX,
|
||||
MCA_COMMON_SM_COMP_INDEX_SYSV,
|
||||
MCA_COMMON_SM_COMP_INDEX_NONE /* MUST BE LAST ITEM */
|
||||
} mca_common_sm_comp_index_map_t;
|
||||
|
||||
/**
|
||||
* list of RML messages that have arrived that have not yet been
|
||||
* consumed by the thread who is looking to complete its component
|
||||
* initialization based on the contents of the RML message.
|
||||
*/
|
||||
static opal_list_t pending_rml_msgs;
|
||||
|
||||
/******************************************************************************/
|
||||
/* STATIC UTILITY FUNCTIONS */
|
||||
/******************************************************************************/
|
||||
|
||||
/******************************************************************************/
|
||||
/**
|
||||
* this routine selects the common sm component that corresponds to
|
||||
* sm_component_index's value.
|
||||
*
|
||||
* @param sm_component_index index corresponding to the common sm component that
|
||||
* is to be selected. (IN)
|
||||
*/
|
||||
static void
|
||||
select_common_sm_component(int sm_component_index)
|
||||
{
|
||||
switch (sm_component_index)
|
||||
{
|
||||
case MCA_COMMON_SM_COMP_INDEX_POSIX:
|
||||
sm_init = mca_common_sm_posix_init;
|
||||
sm_seg_alloc = mca_common_sm_posix_seg_alloc;
|
||||
sm_fini = mca_common_sm_posix_fini;
|
||||
break;
|
||||
case MCA_COMMON_SM_COMP_INDEX_MMAP:
|
||||
#if !MCA_COMMON_SM_WINDOWS
|
||||
sm_init = mca_common_sm_mmap_init;
|
||||
sm_seg_alloc = mca_common_sm_mmap_seg_alloc;
|
||||
sm_fini = mca_common_sm_mmap_fini;
|
||||
#else /* MCA_COMMON_SM_WINDOWS */
|
||||
sm_init = mca_common_sm_windows_init;
|
||||
sm_seg_alloc = mca_common_sm_windows_seg_alloc;
|
||||
sm_fini = mca_common_sm_windows_fini;
|
||||
#endif
|
||||
break;
|
||||
case MCA_COMMON_SM_COMP_INDEX_SYSV:
|
||||
sm_init = mca_common_sm_sysv_init;
|
||||
sm_seg_alloc = mca_common_sm_sysv_seg_alloc;
|
||||
sm_fini = mca_common_sm_sysv_fini;
|
||||
break;
|
||||
case MCA_COMMON_SM_COMP_INDEX_NONE:
|
||||
sm_init = NULL;
|
||||
sm_seg_alloc = NULL;
|
||||
sm_fini = NULL;
|
||||
break;
|
||||
default:
|
||||
sm_init = NULL;
|
||||
sm_seg_alloc = NULL;
|
||||
sm_fini = NULL;
|
||||
opal_output(0, "WARNING: invalid common sm component index.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/**
|
||||
* this routine performs a series of run-time tests that determines whether or
|
||||
* not a particular common sm component can be selected safely. once a component
|
||||
* is successfully selected, its component index is returned.
|
||||
*
|
||||
* @return index corresponding to the selected common sm component. see
|
||||
* mca_common_sm_comp_index_map_t for valid values.
|
||||
*/
|
||||
static int
|
||||
query_sm_components(void)
|
||||
{
|
||||
int help_msg_displayed = 0;
|
||||
int sm_component_index = MCA_COMMON_SM_COMP_INDEX_NONE;
|
||||
int i;
|
||||
|
||||
if (NULL != sm_argv)
|
||||
{
|
||||
MCA_COMMON_SM_OUTPUT_VERBOSE("looking for available components");
|
||||
for (i = 0; NULL != sm_argv[i]; ++i)
|
||||
{
|
||||
if (0 == strcasecmp(sm_argv[i], "posix"))
|
||||
{
|
||||
#if !MCA_COMMON_SM_POSIX
|
||||
if (!help_msg_displayed)
|
||||
{
|
||||
orte_show_help("help-mpi-common-sm.txt",
|
||||
"sm support",
|
||||
1,
|
||||
sm_argv[i]);
|
||||
help_msg_displayed = 1;
|
||||
}
|
||||
#else /* MCA_COMMON_SM_POSIX */
|
||||
MCA_COMMON_SM_OUTPUT_VERBOSE("querying posix");
|
||||
/**
|
||||
* make sure that we can safely use posix sm on this system
|
||||
*/
|
||||
if (OMPI_SUCCESS ==
|
||||
mca_common_sm_posix_component_query())
|
||||
{
|
||||
MCA_COMMON_SM_OUTPUT_VERBOSE("selecting posix");
|
||||
sm_component_index = MCA_COMMON_SM_COMP_INDEX_POSIX;
|
||||
break;
|
||||
}
|
||||
else /* let the user know that we tried posix and failed */
|
||||
{
|
||||
MCA_COMMON_SM_OUTPUT_VERBOSE("cannot select posix");
|
||||
orte_show_help("help-mpi-common-sm.txt",
|
||||
"sm rt test fail",
|
||||
1,
|
||||
"Posix");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else if (0 == strcasecmp(sm_argv[i], "mmap"))
|
||||
{
|
||||
MCA_COMMON_SM_OUTPUT_VERBOSE("selecting mmap");
|
||||
/* there is no run-time test for mmap, so just select it */
|
||||
sm_component_index = MCA_COMMON_SM_COMP_INDEX_MMAP;
|
||||
break;
|
||||
}
|
||||
else if (0 == strcasecmp(sm_argv[i], "sysv"))
|
||||
{
|
||||
#if !MCA_COMMON_SM_SYSV
|
||||
if (!help_msg_displayed)
|
||||
{
|
||||
orte_show_help("help-mpi-common-sm.txt",
|
||||
"sm support",
|
||||
1,
|
||||
sm_argv[i]);
|
||||
help_msg_displayed = 1;
|
||||
}
|
||||
#else /* MCA_COMMON_SM_SYSV */
|
||||
MCA_COMMON_SM_OUTPUT_VERBOSE("querying sysv");
|
||||
/* make sure that we can safely use sysv on this system */
|
||||
if (OMPI_SUCCESS == mca_common_sm_sysv_component_query())
|
||||
{
|
||||
MCA_COMMON_SM_OUTPUT_VERBOSE("selecting sysv");
|
||||
sm_component_index = MCA_COMMON_SM_COMP_INDEX_SYSV;
|
||||
break;
|
||||
}
|
||||
else /* let the user know that we tried sysv and failed */
|
||||
{
|
||||
MCA_COMMON_SM_OUTPUT_VERBOSE("cannot select sysv");
|
||||
orte_show_help("help-mpi-common-sm.txt",
|
||||
"sm rt test fail",
|
||||
1,
|
||||
"System V");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else /* unknown value */
|
||||
{
|
||||
if (!help_msg_displayed)
|
||||
{
|
||||
orte_show_help("help-mpi-common-sm.txt",
|
||||
"sm support",
|
||||
1,
|
||||
sm_argv[i]);
|
||||
help_msg_displayed = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (MCA_COMMON_SM_COMP_INDEX_NONE == sm_component_index)
|
||||
{
|
||||
MCA_COMMON_SM_OUTPUT_VERBOSE("no component selected");
|
||||
}
|
||||
|
||||
return sm_component_index;
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
int
|
||||
mca_common_sm_param_register(mca_base_component_t *c)
|
||||
{
|
||||
char sm_avail_help_str[OPAL_PATH_MAX];
|
||||
|
||||
/* also using sysv_index's value as an initialization flag */
|
||||
if (-1 == sysv_index)
|
||||
{
|
||||
if (MCA_COMMON_SM_SYSV)
|
||||
{
|
||||
snprintf(
|
||||
sm_avail_help_str,
|
||||
sizeof(sm_avail_help_str) - 1,
|
||||
"Which shared memory support will be used. "
|
||||
"Valid values: sysv,mmap - or a comma delimited "
|
||||
"combination of them (order dependent). The first component "
|
||||
"that is successfully selected is used."
|
||||
);
|
||||
/**
|
||||
* construct a comma-separated list of valid options for "all".
|
||||
* notice that we are going to try sysv first.
|
||||
*/
|
||||
snprintf(sm_all_buff, sizeof(sm_all_buff) - 1, "sysv,mmap");
|
||||
}
|
||||
else /* only mmap is available */
|
||||
{
|
||||
snprintf(
|
||||
sm_avail_help_str,
|
||||
sizeof(sm_avail_help_str) - 1,
|
||||
"Which shared memory support will be used. "
|
||||
"Valid values: mmap."
|
||||
);
|
||||
snprintf(sm_all_buff, sizeof(sm_all_buff) - 1, "mmap");
|
||||
}
|
||||
int i;
|
||||
char *last_char;
|
||||
char sm_avail_help_str[OPAL_PATH_MAX];
|
||||
|
||||
mca_base_param_reg_string_name("mpi",
|
||||
"common_sm",
|
||||
sm_avail_help_str,
|
||||
false,
|
||||
false,
|
||||
sm_params,
|
||||
&sm_params);
|
||||
memset(sm_default, '\0', sizeof(sm_default));
|
||||
|
||||
/* empty == try all available */
|
||||
if (0 == strcmp(sm_params, ""))
|
||||
/* populate sm_default with all available common sm component names */
|
||||
for (i = 0; NULL != sm_avail_table[i].sm_name; ++i)
|
||||
{
|
||||
if (NULL == (sm_argv = opal_argv_split(sm_all_buff, ',')))
|
||||
if (sm_avail_table[i].avail)
|
||||
{
|
||||
opal_output(0,
|
||||
"WARNING: could not parse mpi_common_sm request.");
|
||||
strncat(sm_default,
|
||||
sm_avail_table[i].sm_name,
|
||||
sizeof(sm_default) - 1);
|
||||
}
|
||||
}
|
||||
else
|
||||
/* remove the last comma from the char buff */
|
||||
if (NULL != (last_char = strrchr(sm_default, ',')))
|
||||
{
|
||||
if (NULL == (sm_argv = opal_argv_split(sm_params, ',')))
|
||||
{
|
||||
opal_output(0,
|
||||
"WARNING: could not parse mpi_common_sm request.");
|
||||
}
|
||||
*last_char = '\0';
|
||||
}
|
||||
|
||||
/* set up help string */
|
||||
snprintf(
|
||||
sm_avail_help_str,
|
||||
sizeof(sm_avail_help_str) - 1,
|
||||
"Which shared memory support will be used. Valid values: (%s)%s",
|
||||
sm_default,
|
||||
(i > 1) ? " - or a comma delimited combination of them "
|
||||
"(order dependent). The first component that is successfully "
|
||||
"selected is used." : "."
|
||||
);
|
||||
/* register mpi_common_sm */
|
||||
common_sm_index = mca_base_param_reg_string_name("mpi",
|
||||
"common_sm",
|
||||
sm_avail_help_str,
|
||||
false,
|
||||
false,
|
||||
/* default value */
|
||||
sm_default,
|
||||
&sm_params);
|
||||
sysv_index = mca_base_param_reg_int_name(
|
||||
"mpi",
|
||||
"common_sm_have_sysv_support",
|
||||
@ -121,135 +344,255 @@ mca_common_sm_param_register(mca_base_component_t *c)
|
||||
MCA_COMMON_SM_SYSV,
|
||||
NULL
|
||||
);
|
||||
posix_index = mca_base_param_reg_int_name(
|
||||
"mpi",
|
||||
"common_sm_have_posix_support",
|
||||
"Whether shared memory has POSIX support or not",
|
||||
false,
|
||||
true,
|
||||
MCA_COMMON_SM_POSIX,
|
||||
NULL
|
||||
);
|
||||
}
|
||||
|
||||
/* Also register MCA param synonyms for the component */
|
||||
/* also register MCA param synonyms for the component */
|
||||
mca_base_param_reg_syn(sysv_index, c, "have_sysv_support", false);
|
||||
mca_base_param_reg_syn(posix_index, c, "have_posix_support", false);
|
||||
mca_base_param_reg_syn(common_sm_index, c, "store", false);
|
||||
|
||||
if (OPAL_SUCCESS != mca_base_param_lookup_string(common_sm_index,
|
||||
&sm_params))
|
||||
{
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* empty string == try all available */
|
||||
if (0 == strcmp(sm_params, ""))
|
||||
{
|
||||
if (NULL == (sm_argv = opal_argv_split(sm_default, ',')))
|
||||
{
|
||||
opal_output(0,
|
||||
"WARNING: could not parse mpi_common_sm request.");
|
||||
}
|
||||
}
|
||||
/* try what the user specified */
|
||||
else
|
||||
{
|
||||
if (NULL == (sm_argv = opal_argv_split(sm_params, ',')))
|
||||
{
|
||||
opal_output(0,
|
||||
"WARNING: could not parse mpi_common_sm request.");
|
||||
}
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
mca_common_sm_module_t *
|
||||
mca_common_sm_module_t *
|
||||
mca_common_sm_init(ompi_proc_t **procs,
|
||||
size_t num_procs,
|
||||
size_t size,
|
||||
size_t size,
|
||||
char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment)
|
||||
{
|
||||
if (!initialized)
|
||||
{
|
||||
int help_msg_displayed = 0;
|
||||
int i;
|
||||
size_t num_local_procs = 0;
|
||||
bool found_lowest = false;
|
||||
bool lowest;
|
||||
size_t p;
|
||||
ompi_proc_t *temp_proc;
|
||||
|
||||
if (NULL != sm_argv)
|
||||
/**
|
||||
* NOTE: the selected component's init routine, unlike mca_common_sm_init,
|
||||
* must be provided with:
|
||||
* o a SORTED procs array
|
||||
* o the number of LOCAL processes within procs array
|
||||
*
|
||||
* so always do the following before calling sm_init:
|
||||
* o reorder procs array to have all the local procs at the beginning.
|
||||
* o look for the local proc with the lowest name.
|
||||
* o determine the number of local procs.
|
||||
* o ensure that procs[0] is the lowest named process.
|
||||
*/
|
||||
for (p = 0; p < num_procs; ++p)
|
||||
{
|
||||
if (OPAL_PROC_ON_LOCAL_NODE(procs[p]->proc_flags))
|
||||
{
|
||||
/* if we don't have a lowest, save the first one */
|
||||
if (!found_lowest)
|
||||
{
|
||||
procs[0] = procs[p];
|
||||
found_lowest = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* save this proc */
|
||||
procs[num_local_procs] = procs[p];
|
||||
/**
|
||||
* if we have a new lowest, swap it with position 0
|
||||
* so that procs[0] is always the lowest named proc
|
||||
*/
|
||||
if (orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
&(procs[p]->proc_name),
|
||||
&(procs[0]->proc_name)) < 0)
|
||||
{
|
||||
temp_proc = procs[0];
|
||||
procs[0] = procs[p];
|
||||
procs[num_local_procs] = temp_proc;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* iterate through the entire list
|
||||
* stop when a valid component has been selected.
|
||||
*
|
||||
* warn the user when an invalid option was specified,
|
||||
* but continue searching for a valid alternative.
|
||||
* regardless of the comparisons above, we found
|
||||
* another proc on the local node, so increment
|
||||
*/
|
||||
for (i = 0; NULL != sm_argv[i] && NULL == sm_init; ++i)
|
||||
{
|
||||
if (0 == strcasecmp(sm_argv[i], "mmap"))
|
||||
{
|
||||
#if !MCA_COMMON_SM_WINDOWS
|
||||
sm_init = mca_common_sm_mmap_init;
|
||||
sm_init_group = mca_common_sm_mmap_init_group;
|
||||
sm_seg_alloc = mca_common_sm_mmap_seg_alloc;
|
||||
sm_fini = mca_common_sm_mmap_fini;
|
||||
#else /* MCA_COMMON_SM_WINDOWS */
|
||||
sm_init = mca_common_sm_windows_init;
|
||||
sm_init_group = mca_common_sm_windows_init_group;
|
||||
sm_seg_alloc = mca_common_sm_windows_seg_alloc;
|
||||
sm_fini = mca_common_sm_windows_fini;
|
||||
#endif
|
||||
}
|
||||
else if (0 == strcasecmp(sm_argv[i], "sysv"))
|
||||
{
|
||||
#if !MCA_COMMON_SM_SYSV
|
||||
if (!help_msg_displayed)
|
||||
{
|
||||
orte_show_help("help-mpi-common-sm.txt",
|
||||
"sm support",
|
||||
1,
|
||||
sm_argv[i]);
|
||||
help_msg_displayed = 1;
|
||||
}
|
||||
#else /* MCA_COMMON_SM_SYSV */
|
||||
/* make sure that we can safely use sysv on this system */
|
||||
if (OMPI_SUCCESS == mca_common_sm_sysv_component_query())
|
||||
{
|
||||
sm_init = mca_common_sm_sysv_init;
|
||||
sm_init_group = mca_common_sm_sysv_init_group;
|
||||
sm_seg_alloc = mca_common_sm_sysv_seg_alloc;
|
||||
sm_fini = mca_common_sm_sysv_fini;
|
||||
}
|
||||
else /* let the user know that we tried sysv and failed */
|
||||
{
|
||||
orte_show_help("help-mpi-common-sm.txt",
|
||||
"sysv rt test fail",
|
||||
1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else /* unknown value */
|
||||
{
|
||||
if (!help_msg_displayed)
|
||||
{
|
||||
orte_show_help("help-mpi-common-sm.txt",
|
||||
"sm support",
|
||||
1,
|
||||
sm_argv[i]);
|
||||
help_msg_displayed = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (NULL != sm_argv)
|
||||
{
|
||||
opal_argv_free(sm_argv);
|
||||
}
|
||||
++num_local_procs;
|
||||
}
|
||||
initialized = 1;
|
||||
}
|
||||
|
||||
/* call the selected init function */
|
||||
/* if there is less than 2 local processes, there's nothing to do. */
|
||||
if (num_local_procs < 2)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!initialized)
|
||||
{
|
||||
mca_common_sm_rml_sm_info_t sm_info;
|
||||
sm_info.id = MCA_COMMON_SM_COMP_INDEX_NONE;
|
||||
memset(sm_info.posix_fname_buff,
|
||||
'\0',
|
||||
OMPI_COMMON_SM_POSIX_FILE_LEN_MAX);
|
||||
|
||||
lowest = (0 == orte_util_compare_name_fields(
|
||||
ORTE_NS_CMP_ALL,
|
||||
ORTE_PROC_MY_NAME,
|
||||
&(procs[0]->proc_name)));
|
||||
|
||||
/**
|
||||
* lock here to prevent multiple threads from invoking this function
|
||||
* simultaneously. the critical section we're protecting is usage of
|
||||
* the RML in this block.
|
||||
*/
|
||||
opal_mutex_lock(&mutex);
|
||||
|
||||
OBJ_CONSTRUCT(&(pending_rml_msgs), opal_list_t);
|
||||
|
||||
/**
|
||||
* figure out if i am the lowest proc in the group.
|
||||
* if i am, select a common sm component and send its index to the rest
|
||||
* of the local procs so they can select the same common sm component.
|
||||
*/
|
||||
if (lowest)
|
||||
{
|
||||
/* get the component index */
|
||||
sm_info.id = query_sm_components();
|
||||
}
|
||||
/* no return code check here because the error
|
||||
* path is the same as the expected path */
|
||||
mca_common_sm_rml_info_bcast(&sm_info,
|
||||
procs,
|
||||
num_local_procs,
|
||||
OMPI_RML_TAG_COMMON_SM_COMP_INDEX,
|
||||
lowest,
|
||||
file_name,
|
||||
&(pending_rml_msgs));
|
||||
|
||||
opal_mutex_unlock(&mutex);
|
||||
select_common_sm_component(sm_info.id);
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
if (NULL != sm_init)
|
||||
{
|
||||
return sm_init(procs, num_procs, size,
|
||||
file_name, size_ctl_structure,
|
||||
/* notice that we are passing a SORTED procs array to the selected
|
||||
* component along with the number of LOCAL processes found within
|
||||
* procs.
|
||||
*/
|
||||
return sm_init(procs,
|
||||
num_local_procs,
|
||||
size,
|
||||
file_name,
|
||||
size_ctl_structure,
|
||||
data_seg_alignment);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/**
|
||||
* This routine is the same as mca_common_sm_mmap_init() except that
|
||||
* it takes an (ompi_group_t *) parameter to specify the peers rather
|
||||
* than an array of procs. Unlike mca_common_sm_mmap_init(), the
|
||||
* group must contain *only* local peers, or this function will return
|
||||
* NULL and not create any shared memory segment.
|
||||
*/
|
||||
mca_common_sm_module_t *
|
||||
mca_common_sm_init_group(ompi_group_t *group,
|
||||
size_t size,
|
||||
size_t size,
|
||||
char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment)
|
||||
{
|
||||
if (NULL != sm_init_group)
|
||||
mca_common_sm_module_t *ret = NULL;
|
||||
ompi_proc_t **procs = NULL;
|
||||
|
||||
/* make sure sm_init has been properly initialized. do this because
|
||||
* sm_init_group only does prep work before passing along the real work to
|
||||
* sm_init.
|
||||
*/
|
||||
if (NULL != sm_init)
|
||||
{
|
||||
return sm_init_group(group, size,
|
||||
file_name, size_ctl_structure,
|
||||
data_seg_alignment);
|
||||
size_t i;
|
||||
size_t group_size;
|
||||
ompi_proc_t *proc;
|
||||
|
||||
/* if there is less than 2 procs, there's nothing to do */
|
||||
if ((group_size = ompi_group_size(group)) < 2)
|
||||
{
|
||||
goto out;
|
||||
}
|
||||
if (NULL == (procs = (ompi_proc_t **)
|
||||
malloc(sizeof(ompi_proc_t *) * group_size)))
|
||||
|
||||
{
|
||||
ORTE_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
|
||||
goto out;
|
||||
}
|
||||
/* make sure that all the procs in the group are local */
|
||||
for (i = 0; i < group_size; ++i)
|
||||
{
|
||||
proc = ompi_group_peer_lookup(group, i);
|
||||
if (!OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags))
|
||||
{
|
||||
goto out;
|
||||
}
|
||||
procs[i] = proc;
|
||||
}
|
||||
/* let sm_init take care of the rest ... */
|
||||
ret = sm_init(procs,
|
||||
group_size,
|
||||
size,
|
||||
file_name,
|
||||
size_ctl_structure,
|
||||
data_seg_alignment);
|
||||
}
|
||||
return NULL;
|
||||
|
||||
out:
|
||||
if (NULL != procs)
|
||||
{
|
||||
free(procs);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
void *
|
||||
mca_common_sm_seg_alloc(struct mca_mpool_base_module_t* mpool,
|
||||
size_t* size,
|
||||
mca_mpool_base_registration_t** registration)
|
||||
mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool,
|
||||
size_t *size,
|
||||
mca_mpool_base_registration_t **registration)
|
||||
{
|
||||
if (NULL != sm_seg_alloc)
|
||||
if (NULL != sm_seg_alloc)
|
||||
{
|
||||
return sm_seg_alloc(mpool, size, registration);
|
||||
}
|
||||
@ -257,10 +600,15 @@ mca_common_sm_seg_alloc(struct mca_mpool_base_module_t* mpool,
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
int
|
||||
int
|
||||
mca_common_sm_fini(mca_common_sm_module_t *mca_common_sm_module)
|
||||
{
|
||||
if (NULL != sm_fini)
|
||||
if (NULL != sm_argv)
|
||||
{
|
||||
opal_argv_free(sm_argv);
|
||||
sm_argv = NULL;
|
||||
}
|
||||
if (NULL != sm_fini)
|
||||
{
|
||||
return sm_fini(mca_common_sm_module);
|
||||
}
|
||||
|
@ -5,17 +5,17 @@
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
@ -28,27 +28,48 @@
|
||||
#include "opal/class/opal_object.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/sys/atomic.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "ompi/group/group.h"
|
||||
#include "ompi/mca/btl/base/base.h"
|
||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||
|
||||
#define MCA_COMMON_SM_OUTPUT_VERBOSE(msg) \
|
||||
opal_output_verbose(100, \
|
||||
mca_btl_base_output, \
|
||||
"mca: common: sm: %s", msg);
|
||||
|
||||
/* posix sm file name length max. on some systems shm_open's file name limit
|
||||
* is pretty low (32 chars, for instance ). 16 is plenty for our needs, but
|
||||
* extra work on our end is needed to ensure things work properly. if a
|
||||
* system's limit is lower than OMPI_COMMON_SM_POSIX_FILE_LEN_MAX, then the
|
||||
* run-time test will catch that fact and posix sm will be disqualified. see
|
||||
* comments regarding this in common_sm_posix.c.
|
||||
*/
|
||||
#define OMPI_COMMON_SM_POSIX_FILE_LEN_MAX 16
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
struct mca_mpool_base_module_t;
|
||||
|
||||
typedef struct mca_common_sm_seg_header_t
|
||||
typedef struct mca_common_sm_seg_header_t
|
||||
{
|
||||
/* lock to control atomic access */
|
||||
opal_atomic_lock_t seg_lock;
|
||||
/* is the segment ready for use */
|
||||
volatile int32_t seg_inited;
|
||||
/**
|
||||
* number of local processes that are
|
||||
* attached to the shared memory segment
|
||||
*/
|
||||
volatile int32_t seg_att;
|
||||
/* offset to next available memory location available for allocation */
|
||||
size_t seg_offset;
|
||||
/* total size of the segment */
|
||||
size_t seg_size;
|
||||
} mca_common_sm_seg_header_t;
|
||||
|
||||
typedef struct mca_common_sm_module_t
|
||||
typedef struct mca_common_sm_module_t
|
||||
{
|
||||
/* double link list element */
|
||||
opal_list_item_t module_item;
|
||||
@ -75,7 +96,7 @@ mca_common_sm_param_register(mca_base_component_t *c);
|
||||
/**
|
||||
* Register the MCA parameters for common sm.
|
||||
*/
|
||||
int
|
||||
OMPI_DECLSPEC extern int
|
||||
mca_common_sm_param_register(mca_base_component_t *c);
|
||||
|
||||
/**
|
||||
@ -84,7 +105,7 @@ mca_common_sm_param_register(mca_base_component_t *c);
|
||||
* the shared memory segment does not exist before any of the current
|
||||
* set of processes try and open it.
|
||||
*
|
||||
* @param procs - array of (ompi_proc_t*)'s to create this shared
|
||||
* @param procs - array of (ompi_proc_t *)'s to create this shared
|
||||
* memory segment for. This array must be writable; it may be edited
|
||||
* (in undefined ways) if the array contains procs that are not on
|
||||
* this host. It is assumed that the caller will simply free this
|
||||
@ -102,9 +123,9 @@ mca_common_sm_param_register(mca_base_component_t *c);
|
||||
* as its first segment (IN)
|
||||
*
|
||||
* @param data_set_alignment alignment of the data segment. this
|
||||
* follows the control structure. If this
|
||||
* value if 0, then assume that there will
|
||||
* be no data segment following the control
|
||||
* follows the control structure. If this
|
||||
* value if 0, then assume that there will
|
||||
* be no data segment following the control
|
||||
* structure. (IN)
|
||||
*
|
||||
* @returnvalue pointer to control structure at head of shared memory segment.
|
||||
@ -112,73 +133,73 @@ mca_common_sm_param_register(mca_base_component_t *c);
|
||||
OMPI_DECLSPEC extern mca_common_sm_module_t *
|
||||
mca_common_sm_init(ompi_proc_t **procs,
|
||||
size_t num_procs,
|
||||
size_t size,
|
||||
size_t size,
|
||||
char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment);
|
||||
|
||||
typedef mca_common_sm_module_t *
|
||||
(*mca_common_sm_init_fn_t)(ompi_proc_t **procs,
|
||||
size_t num_procs,
|
||||
size_t size,
|
||||
size_t size,
|
||||
char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment);
|
||||
|
||||
/**
|
||||
* This routine is used to set up a shared memory segment (whether
|
||||
* it's an mmaped file or a SYSV IPC segment). It is assumed that
|
||||
* the shared memory segment does not exist before any of the current
|
||||
* set of processes try and open it.
|
||||
* This routine is used to set up a shared memory segment (whether
|
||||
* it's an mmaped file or a SYSV IPC segment). It is assumed that
|
||||
* the shared memory segment does not exist before any of the current
|
||||
* set of processes try and open it.
|
||||
*
|
||||
* This routine is the same as mca_common_sm_mmap_init() except that
|
||||
* it takes an (ompi_group_t*) parameter to specify the peers rather
|
||||
* it takes an (ompi_group_t *) parameter to specify the peers rather
|
||||
* than an array of procs. Unlike mca_common_sm_mmap_init(), the
|
||||
* group must contain *only* local peers, or this function will return
|
||||
* NULL and not create any shared memory segment.
|
||||
*/
|
||||
OMPI_DECLSPEC extern mca_common_sm_module_t *
|
||||
mca_common_sm_init_group(ompi_group_t *group,
|
||||
size_t size,
|
||||
size_t size,
|
||||
char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment);
|
||||
|
||||
typedef mca_common_sm_module_t *
|
||||
(*mca_common_sm_init_group_fn_t)(ompi_group_t *group,
|
||||
size_t size,
|
||||
size_t size,
|
||||
char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment);
|
||||
|
||||
/**
|
||||
* callback from the sm mpool
|
||||
*/
|
||||
OMPI_DECLSPEC extern void *
|
||||
mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool,
|
||||
size_t* size,
|
||||
mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool,
|
||||
size_t* size,
|
||||
mca_mpool_base_registration_t **registration);
|
||||
|
||||
typedef void *
|
||||
(*mca_common_sm_seg_alloc_fn_t)(struct mca_mpool_base_module_t *mpool,
|
||||
size_t* size,
|
||||
(*mca_common_sm_seg_alloc_fn_t)(struct mca_mpool_base_module_t *mpool,
|
||||
size_t *size,
|
||||
mca_mpool_base_registration_t **registration);
|
||||
|
||||
/**
|
||||
* This function will release all local resources attached to the
|
||||
* shared memory segment. We assume that the operating system will
|
||||
* shared memory segment. We assume that the operating system will
|
||||
* release the memory resources when the last process release it.
|
||||
*
|
||||
* @param mca_common_sm_module - instance that is shared between
|
||||
* @param mca_common_sm_module - instance that is shared between
|
||||
* components that use shared memory.
|
||||
*
|
||||
* @returnvalue 0 if everything was OK, otherwise a negative value.
|
||||
*/
|
||||
|
||||
OMPI_DECLSPEC extern int
|
||||
OMPI_DECLSPEC extern int
|
||||
mca_common_sm_fini(mca_common_sm_module_t *mca_common_sm_module);
|
||||
|
||||
typedef int
|
||||
typedef int
|
||||
(*mca_common_sm_fini_fn_t)(mca_common_sm_module_t *mca_common_sm_module);
|
||||
|
||||
/*
|
||||
|
@ -60,6 +60,8 @@
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "ompi/mca/dpm/dpm.h"
|
||||
#include "ompi/mca/mpool/sm/mpool_sm.h"
|
||||
|
||||
#include "common_sm_rml.h"
|
||||
#include "common_sm_mmap.h"
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
@ -69,6 +71,13 @@ OBJ_CLASS_INSTANCE(
|
||||
NULL
|
||||
);
|
||||
|
||||
/**
|
||||
* list of RML messages that have arrived that have not yet been
|
||||
* consumed by the thread who is looking to complete its component
|
||||
* initialization based on the contents of the RML message.
|
||||
*/
|
||||
static opal_list_t pending_rml_msgs;
|
||||
static bool pending_rml_msgs_init = false;
|
||||
|
||||
/*
|
||||
* Lock to protect multiple instances of mmap_init() from being
|
||||
@ -76,24 +85,10 @@ OBJ_CLASS_INSTANCE(
|
||||
*/
|
||||
static opal_mutex_t mutex;
|
||||
|
||||
/*
|
||||
* List of RML messages that have arrived that have not yet been
|
||||
* consumed by the thread who is looking to attach to the backing file
|
||||
* that the RML message corresponds to.
|
||||
/**
|
||||
* shared memory information used for initialization and setup.
|
||||
*/
|
||||
static opal_list_t pending_rml_msgs;
|
||||
static bool pending_rml_msgs_init = false;
|
||||
|
||||
/*
|
||||
* Items on the pending_rml_msgs list
|
||||
*/
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
char file_name[OPAL_PATH_MAX];
|
||||
int sm_file_inited;
|
||||
} pending_mmap_rml_msg_t;
|
||||
|
||||
OBJ_CLASS_INSTANCE(pending_mmap_rml_msg_t, opal_list_item_t, NULL, NULL);
|
||||
static mca_common_sm_rml_sm_info_t sm_info;
|
||||
|
||||
static mca_common_sm_module_mmap_t *
|
||||
create_map(int fd, size_t size,
|
||||
@ -146,6 +141,10 @@ create_map(int fd, size_t size,
|
||||
map->super.module_seg_addr = (unsigned char *)seg;
|
||||
map->super.module_size = size;
|
||||
|
||||
/* map object successful initialized - we can safely increment seg_att */
|
||||
opal_atomic_wmb();
|
||||
opal_atomic_add_32(&map->super.module_seg->seg_att, 1);
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
@ -160,82 +159,48 @@ mca_common_sm_mmap_component_query(void)
|
||||
}
|
||||
|
||||
mca_common_sm_module_t *
|
||||
mca_common_sm_mmap_init(ompi_proc_t **procs,
|
||||
size_t num_procs,
|
||||
mca_common_sm_mmap_init(ompi_proc_t **sorted_procs,
|
||||
size_t num_loc_procs,
|
||||
size_t size, char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment)
|
||||
{
|
||||
int fd = -1;
|
||||
bool lowest;
|
||||
mca_common_sm_module_mmap_t *map = NULL;
|
||||
size_t mem_offset, p;
|
||||
int rc = 0, sm_file_inited = 0, num_local_procs;
|
||||
struct iovec iov[3];
|
||||
int sm_file_created = OMPI_RML_TAG_SM_BACK_FILE_CREATED;
|
||||
char filename_to_send[OPAL_PATH_MAX];
|
||||
opal_list_item_t *item;
|
||||
pending_mmap_rml_msg_t *rml_msg;
|
||||
ompi_proc_t *temp_proc;
|
||||
bool found_lowest = false;
|
||||
size_t mem_offset;
|
||||
int num_local_procs;
|
||||
|
||||
/* Reorder all procs array to have all the local procs at the
|
||||
beginning. Simultaneously look for the local proc with the
|
||||
lowest name. Ensure that procs[0] is the lowest named
|
||||
process. */
|
||||
for (num_local_procs = p = 0; p < num_procs; p++) {
|
||||
if (OPAL_PROC_ON_LOCAL_NODE(procs[p]->proc_flags)) {
|
||||
/* If we don't have a lowest, save the first one */
|
||||
if (!found_lowest) {
|
||||
procs[0] = procs[p];
|
||||
found_lowest = true;
|
||||
} else {
|
||||
/* Save this proc */
|
||||
procs[num_local_procs] = procs[p];
|
||||
/* If we have a new lowest, swap it with position 0 so
|
||||
that procs[0] is always the lowest named proc */
|
||||
if (orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
&(procs[p]->proc_name),
|
||||
&(procs[0]->proc_name)) < 0) {
|
||||
temp_proc = procs[0];
|
||||
procs[0] = procs[p];
|
||||
procs[num_local_procs] = temp_proc;
|
||||
}
|
||||
}
|
||||
/* Regardless of the comparisons above, we found another
|
||||
proc on the local node, so increment */
|
||||
++num_local_procs;
|
||||
}
|
||||
}
|
||||
/* If there's no local procs, there's nothing to do */
|
||||
if (0 == num_local_procs) {
|
||||
return NULL;
|
||||
}
|
||||
num_procs = num_local_procs;
|
||||
lowest = (0 == orte_util_compare_name_fields(
|
||||
ORTE_NS_CMP_ALL,
|
||||
ORTE_PROC_MY_NAME,
|
||||
&(sorted_procs[0]->proc_name)));
|
||||
|
||||
iov[0].iov_base = &sm_file_created;
|
||||
iov[0].iov_len = sizeof(sm_file_created);
|
||||
memset(filename_to_send, 0, sizeof(filename_to_send));
|
||||
strncpy(filename_to_send, file_name, sizeof(filename_to_send) - 1);
|
||||
iov[1].iov_base = filename_to_send;
|
||||
iov[1].iov_len = sizeof(filename_to_send);
|
||||
iov[2].iov_base = &sm_file_inited;
|
||||
iov[2].iov_len = sizeof(sm_file_inited);
|
||||
/* using sm_info.id as an initialization marker:
|
||||
* o 0 -> not initialized; 1 -> initialized
|
||||
*/
|
||||
sm_info.id = 0;
|
||||
memset(sm_info.posix_fname_buff, '\0', OMPI_COMMON_SM_POSIX_FILE_LEN_MAX);
|
||||
/**
|
||||
* remember that this function was passed
|
||||
* a sorted procs array and a local proc count.
|
||||
*/
|
||||
num_local_procs = num_loc_procs;
|
||||
|
||||
/* Lock here to prevent multiple threads from invoking this
|
||||
function simultaneously. The critical section we're protecting
|
||||
is usage of the RML in this block. */
|
||||
opal_mutex_lock(&mutex);
|
||||
|
||||
if (!pending_rml_msgs_init) {
|
||||
if (!pending_rml_msgs_init)
|
||||
{
|
||||
OBJ_CONSTRUCT(&(pending_rml_msgs), opal_list_t);
|
||||
pending_rml_msgs_init = true;
|
||||
}
|
||||
|
||||
/* Figure out if I am the lowest rank in the group. If so, I will
|
||||
create the shared file. */
|
||||
if (0 == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
ORTE_PROC_MY_NAME,
|
||||
&(procs[0]->proc_name))) {
|
||||
if (lowest) {
|
||||
/* check, whether the specified filename is on a network file system */
|
||||
if (opal_path_nfs(file_name)) {
|
||||
orte_show_help("help-mpi-common-sm.txt", "mmap on nfs", 1,
|
||||
@ -262,7 +227,7 @@ mca_common_sm_mmap_init(ompi_proc_t **procs,
|
||||
map = create_map(fd, size, file_name, size_ctl_structure,
|
||||
data_seg_alignment);
|
||||
if (map != NULL) {
|
||||
sm_file_inited = 1;
|
||||
sm_info.id = 1;
|
||||
|
||||
/* initialize the segment - only the first process
|
||||
to open the file */
|
||||
@ -279,86 +244,38 @@ mca_common_sm_mmap_init(ompi_proc_t **procs,
|
||||
fd = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Signal the rest of the local procs that the backing file
|
||||
has been created. Bump up the libevent polling frequency
|
||||
while we're using the RML. */
|
||||
opal_progress_event_users_increment();
|
||||
for (p = 1; p < num_procs; p++) {
|
||||
rc = orte_rml.send(&(procs[p]->proc_name), iov, 3,
|
||||
OMPI_RML_TAG_SM_BACK_FILE_CREATED, 0);
|
||||
if (rc < (ssize_t) (iov[0].iov_len + iov[1].iov_len + iov[2].iov_len)) {
|
||||
ORTE_ERROR_LOG(OMPI_ERR_COMM_FAILURE);
|
||||
opal_progress_event_users_decrement();
|
||||
/* Signal the rest of the local procs that the backing file
|
||||
has been created. */
|
||||
if (OMPI_SUCCESS != mca_common_sm_rml_info_bcast(
|
||||
&sm_info,
|
||||
sorted_procs,
|
||||
num_local_procs,
|
||||
OMPI_RML_TAG_SM_BACK_FILE_CREATED,
|
||||
lowest,
|
||||
file_name,
|
||||
&(pending_rml_msgs))) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Free it all -- bad things are going to happen */
|
||||
if (1 == sm_file_inited) {
|
||||
munmap(map->super.module_seg_addr, size);
|
||||
close(fd);
|
||||
unlink(file_name);
|
||||
fd = -1;
|
||||
}
|
||||
goto out;
|
||||
if (lowest)
|
||||
{
|
||||
if (1 == sm_info.id)
|
||||
{
|
||||
/* wait until all other local procs have reported in */
|
||||
while (num_local_procs > map->super.module_seg->seg_att);
|
||||
{
|
||||
opal_atomic_rmb();
|
||||
}
|
||||
/**
|
||||
* all other local procs reported in, so it's safe to unlink
|
||||
*/
|
||||
unlink(file_name);
|
||||
}
|
||||
opal_progress_event_users_decrement();
|
||||
} else {
|
||||
/* All other procs wait for the file to be initialized before
|
||||
using the backing file. However, since these shared
|
||||
backing files may be created simultaneously in multiple
|
||||
threads, the RML messages may arrive in any order. So
|
||||
first check to see if we previously received a message for
|
||||
me. */
|
||||
for (item = opal_list_get_first(&pending_rml_msgs);
|
||||
opal_list_get_end(&pending_rml_msgs) != item;
|
||||
item = opal_list_get_next(item)) {
|
||||
rml_msg = (pending_mmap_rml_msg_t*) item;
|
||||
if (0 == strcmp(rml_msg->file_name, file_name)) {
|
||||
opal_list_remove_item(&pending_rml_msgs, item);
|
||||
sm_file_inited = rml_msg->sm_file_inited;
|
||||
OBJ_RELEASE(item);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* If we didn't find a message already waiting, block on
|
||||
receiving from the RML. */
|
||||
if (opal_list_get_end(&pending_rml_msgs) == item) {
|
||||
while (1) {
|
||||
/* Bump up the libevent polling frequency while we're
|
||||
in this RML recv, just to ensure we're checking
|
||||
libevent frequently. */
|
||||
opal_progress_event_users_increment();
|
||||
rc = orte_rml.recv(&(procs[0]->proc_name), iov, 3,
|
||||
OMPI_RML_TAG_SM_BACK_FILE_CREATED, 0);
|
||||
opal_progress_event_users_decrement();
|
||||
if (rc < 0) {
|
||||
ORTE_ERROR_LOG(OMPI_ERR_RECV_LESS_THAN_POSTED);
|
||||
/* fd/map wasn't opened here; no need to close/reset */
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Was the message for me? If so, we're done */
|
||||
if (0 == strcmp(filename_to_send, file_name)) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* If not, put it on the pending list and try again */
|
||||
rml_msg = OBJ_NEW(pending_mmap_rml_msg_t);
|
||||
if (NULL == rml_msg) {
|
||||
ORTE_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
|
||||
/* fd/map wasn't opened here; no need to close/reset */
|
||||
goto out;
|
||||
}
|
||||
memcpy(rml_msg->file_name, filename_to_send,
|
||||
sizeof(rml_msg->file_name));
|
||||
rml_msg->sm_file_inited = sm_file_inited;
|
||||
opal_list_append(&pending_rml_msgs, &(rml_msg->super));
|
||||
}
|
||||
}
|
||||
|
||||
/* check to see if file inited correctly */
|
||||
if (sm_file_inited != 0) {
|
||||
/* check to see if file initialized correctly */
|
||||
if (sm_info.id != 0) {
|
||||
fd = open(file_name, O_RDWR, 0600);
|
||||
|
||||
if (fd != -1) {
|
||||
@ -378,44 +295,6 @@ out:
|
||||
return &(map->super);
|
||||
}
|
||||
|
||||
/*
|
||||
* Same as mca_common_sm_mmap_init(), but takes an (ompi_group_t*)
|
||||
* argument instead of na array of ompi_proc_t's.
|
||||
*
|
||||
* This function just checks the group to ensure that all the procs
|
||||
* are local, and if they are, calls mca_common_sm_mmap_init().
|
||||
*/
|
||||
mca_common_sm_module_t *
|
||||
mca_common_sm_mmap_init_group(ompi_group_t *group,
|
||||
size_t size,
|
||||
char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment)
|
||||
{
|
||||
size_t i, group_size;
|
||||
ompi_proc_t *proc, **procs;
|
||||
mca_common_sm_module_t *ret;
|
||||
|
||||
group_size = ompi_group_size(group);
|
||||
procs = (ompi_proc_t**) malloc(sizeof(ompi_proc_t*) * group_size);
|
||||
if (NULL == procs) {
|
||||
return NULL;
|
||||
}
|
||||
for (i = 0; i < group_size; ++i) {
|
||||
proc = ompi_group_peer_lookup(group,i);
|
||||
if (!OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) {
|
||||
free(procs);
|
||||
return NULL;
|
||||
}
|
||||
procs[i] = proc;
|
||||
}
|
||||
|
||||
ret = mca_common_sm_mmap_init(procs, group_size, size, file_name,
|
||||
size_ctl_structure, data_seg_alignment);
|
||||
free(procs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
mca_common_sm_mmap_fini(mca_common_sm_module_t *mca_common_sm_module)
|
||||
{
|
||||
|
@ -5,17 +5,17 @@
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
@ -27,16 +27,15 @@
|
||||
#include "opal/class/opal_object.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/sys/atomic.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "ompi/group/group.h"
|
||||
#include "ompi/mca/common/sm/common_sm.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
struct mca_mpool_base_module_t;
|
||||
|
||||
typedef struct mca_common_sm_module_mmap_t
|
||||
typedef struct mca_common_sm_module_mmap_t
|
||||
{
|
||||
mca_common_sm_module_t super;
|
||||
} mca_common_sm_module_mmap_t;
|
||||
@ -49,13 +48,15 @@ OBJ_CLASS_DECLARATION(mca_common_sm_module_mmap_t);
|
||||
* exist before any of the current set of processes try and open
|
||||
* it.
|
||||
*
|
||||
* @param procs - array of (ompi_proc_t*)'s to create this shared
|
||||
* memory segment for. This array must be writable; it may be edited
|
||||
* (in undefined ways) if the array contains procs that are not on
|
||||
* this host. It is assumed that the caller will simply free this
|
||||
* array upon return. (INOUT)
|
||||
* @param sorted_procs - array of (ompi_proc_t *)'s to create this shared memory
|
||||
* segment for. this routine, unlike the top-level
|
||||
* mca_common_sm_init routine, assumes that sorted_procs
|
||||
* is in the following state: all the local procs at the
|
||||
* beginning; sorted_procs[0] is the lowest named process.
|
||||
* (IN)
|
||||
*
|
||||
* @param num_procs - length of the procs array (IN)
|
||||
* @param num_loc_procs - number of local procs contained within sorted_procs
|
||||
* (IN)
|
||||
*
|
||||
* @param size - size of the file, in bytes (IN)
|
||||
*
|
||||
@ -67,46 +68,27 @@ OBJ_CLASS_DECLARATION(mca_common_sm_module_mmap_t);
|
||||
* as its first segment (IN)
|
||||
*
|
||||
* @param data_set_alignment alignment of the data segment. this
|
||||
* follows the control structure. If this
|
||||
* value if 0, then assume that there will
|
||||
* be no data segment following the control
|
||||
* follows the control structure. If this
|
||||
* value if 0, then assume that there will
|
||||
* be no data segment following the control
|
||||
* structure. (IN)
|
||||
*
|
||||
* @return value pointer to control structure at head of file.
|
||||
*/
|
||||
OMPI_DECLSPEC extern mca_common_sm_module_t *
|
||||
mca_common_sm_mmap_init(ompi_proc_t **procs,
|
||||
size_t num_procs,
|
||||
size_t size,
|
||||
mca_common_sm_mmap_init(ompi_proc_t **sorted_procs,
|
||||
size_t num_loc_procs,
|
||||
size_t size,
|
||||
char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment);
|
||||
|
||||
/**
|
||||
* This routine is used to set up a shared memory file, backed
|
||||
* by a specified file. It is assumed that the file does not
|
||||
* exist before any of the current set of processes try and open
|
||||
* it.
|
||||
*
|
||||
* This routine is the same as mca_common_sm_mmap_init() except that
|
||||
* it takes an (ompi_group_t*) parameter to specify the peers rather
|
||||
* than an array of procs. Unlike mca_common_sm_mmap_init(), the
|
||||
* group must contain *only* local peers, or this function will return
|
||||
* NULL and not create any shared memory segment.
|
||||
*/
|
||||
OMPI_DECLSPEC extern mca_common_sm_module_t *
|
||||
mca_common_sm_mmap_init_group(ompi_group_t *group,
|
||||
size_t size,
|
||||
char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment);
|
||||
|
||||
/*
|
||||
* Callback from the sm mpool
|
||||
*/
|
||||
OMPI_DECLSPEC extern void *
|
||||
mca_common_sm_mmap_seg_alloc(struct mca_mpool_base_module_t *mpool,
|
||||
size_t *size,
|
||||
mca_common_sm_mmap_seg_alloc(struct mca_mpool_base_module_t *mpool,
|
||||
size_t *size,
|
||||
mca_mpool_base_registration_t **registration);
|
||||
|
||||
/**
|
||||
@ -119,13 +101,13 @@ mca_common_sm_mmap_seg_alloc(struct mca_mpool_base_module_t *mpool,
|
||||
* @returnvalue 0 if everything was OK, otherwise a negative value.
|
||||
*/
|
||||
|
||||
OMPI_DECLSPEC extern int
|
||||
OMPI_DECLSPEC extern int
|
||||
mca_common_sm_mmap_fini(mca_common_sm_module_t *mca_common_sm_module);
|
||||
|
||||
/**
|
||||
* component query routine
|
||||
*/
|
||||
OMPI_DECLSPEC extern int
|
||||
OMPI_DECLSPEC extern int
|
||||
mca_common_sm_mmap_component_query(void);
|
||||
|
||||
END_C_DECLS
|
||||
|
504
ompi/mca/common/sm/common_sm_posix.c
Обычный файл
504
ompi/mca/common/sm/common_sm_posix.c
Обычный файл
@ -0,0 +1,504 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include <errno.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif /* HAVE_STRING_H */
|
||||
#ifdef HAVE_FCNTL_H
|
||||
#include <fcntl.h>
|
||||
#endif /* HAVE_FCNTL_H */
|
||||
#ifdef HAVE_TIME_H
|
||||
#include <time.h>
|
||||
#endif /* HAVE_TIME_H */
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_STAT_H
|
||||
#include <sys/stat.h>
|
||||
#endif /* HAVE_SYS_STAT_H */
|
||||
#ifdef HAVE_SYS_MMAN_H
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/path.h"
|
||||
#include "opal/align.h"
|
||||
#include "opal/threads/mutex.h"
|
||||
#include "opal/util/opal_sos.h"
|
||||
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "ompi/mca/dpm/dpm.h"
|
||||
#include "ompi/mca/mpool/sm/mpool_sm.h"
|
||||
|
||||
#include "common_sm_rml.h"
|
||||
#include "common_sm_posix.h"
|
||||
|
||||
/* max number of attempts to find an available
|
||||
* shm_open file name. see comments below.
|
||||
*/
|
||||
#define OMPI_COMMON_SM_POSIX_MAX_ATTEMPTS 64
|
||||
/* need the / for Solaris 10 and others, i'm sure */
|
||||
#define OMPI_COMMON_SM_POSIX_FILE_NAME_PREFIX "/open_mpi."
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
mca_common_sm_module_posix_t,
|
||||
opal_object_t,
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
|
||||
/**
|
||||
* lock to protect multiple instances of posix_init() from being
|
||||
* invoked simultaneously (because of rml usage).
|
||||
*/
|
||||
static opal_mutex_t mutex;
|
||||
|
||||
/**
|
||||
* shared memory information used for initialization and setup.
|
||||
*/
|
||||
static mca_common_sm_rml_sm_info_t sm_info;
|
||||
|
||||
/**
|
||||
* list of RML messages that have arrived that have not yet been
|
||||
* consumed by the thread who is looking to complete its component
|
||||
* initialization based on the contents of the RML message.
|
||||
*/
|
||||
static opal_list_t pending_rml_msgs;
|
||||
static bool pending_rml_msgs_init = false;
|
||||
|
||||
/******************************************************************************/
|
||||
/**
|
||||
* this routine searches for an available shm_open file name.
|
||||
*
|
||||
* @return if successful, a non-negative file descriptor is returned and
|
||||
* posix_file_name_buff will contain the file name associated with the
|
||||
* successful shm_open. otherwise, -1 is returned and the contents of
|
||||
* posix_file_name_buff is undefined.
|
||||
*/
|
||||
static int
|
||||
posix_shm_open(char *posix_file_name_buff, size_t size)
|
||||
{
|
||||
int attempt = 0;
|
||||
int fd = -1;
|
||||
/* format: /open_mpi.nnnn
|
||||
* see comment in common_sm.h that explains
|
||||
* why we chose to do things this way.
|
||||
*/
|
||||
snprintf(posix_file_name_buff,
|
||||
size,
|
||||
"%s%04d",
|
||||
OMPI_COMMON_SM_POSIX_FILE_NAME_PREFIX,
|
||||
attempt++);
|
||||
/**
|
||||
* workaround for simultaneous posix shm_opens on the same node (e.g.
|
||||
* multiple Open MPI jobs sharing a node). name collision during
|
||||
* component runtime will happen, so protect against it.
|
||||
*/
|
||||
while (attempt < OMPI_COMMON_SM_POSIX_MAX_ATTEMPTS)
|
||||
{
|
||||
/* the check for the existence of the object and its
|
||||
* creation if it does not exist are performed atomically.
|
||||
*/
|
||||
if ((fd = shm_open(posix_file_name_buff,
|
||||
O_CREAT | O_EXCL | O_RDWR,
|
||||
0600)) < 0)
|
||||
{
|
||||
int err = errno;
|
||||
if (EEXIST == err)
|
||||
{
|
||||
/* try again with a different name */
|
||||
snprintf(posix_file_name_buff,
|
||||
size,
|
||||
"%s%04d",
|
||||
OMPI_COMMON_SM_POSIX_FILE_NAME_PREFIX,
|
||||
attempt++);
|
||||
continue;
|
||||
}
|
||||
else /* a "real" error occurred, notify the user and return -1 */
|
||||
{
|
||||
orte_show_help("help-mpi-common-sm.txt",
|
||||
"sys call fail",
|
||||
1,
|
||||
orte_process_info.nodename,
|
||||
"shm_open(2)",
|
||||
posix_file_name_buff,
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
strerror(err),
|
||||
err);
|
||||
fd = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else /* success! */
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (OMPI_COMMON_SM_POSIX_MAX_ATTEMPTS <= attempt)
|
||||
{
|
||||
MCA_COMMON_SM_OUTPUT_VERBOSE("max attempts exceeded: could not find an "
|
||||
"available posix shared object file name");
|
||||
}
|
||||
return fd;
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
static mca_common_sm_module_posix_t *
|
||||
create_map(int fd,
|
||||
size_t size,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment)
|
||||
{
|
||||
unsigned char *addr = NULL;
|
||||
mca_common_sm_module_posix_t *map = NULL;
|
||||
mca_common_sm_seg_header_t *seg = NULL;
|
||||
|
||||
/* map the file and initialize segment state */
|
||||
if (MAP_FAILED == (seg = (mca_common_sm_seg_header_t *)
|
||||
mmap(NULL,
|
||||
size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED,
|
||||
fd,
|
||||
0)))
|
||||
{
|
||||
int err = errno;
|
||||
orte_show_help("help-mpi-common-sm.txt",
|
||||
"sys call fail",
|
||||
1,
|
||||
orte_process_info.nodename,
|
||||
"mmap(2)",
|
||||
"",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
strerror(err),
|
||||
err);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* set up the map object */
|
||||
map = OBJ_NEW(mca_common_sm_module_posix_t);
|
||||
|
||||
/**
|
||||
* the first entry in the file is the control structure. the first entry
|
||||
* in the control structure is an mca_common_sm_seg_header_t element
|
||||
*/
|
||||
map->super.module_seg = seg;
|
||||
|
||||
addr = ((unsigned char *)seg) + size_ctl_structure;
|
||||
/**
|
||||
* if we have a data segment (i.e., if 0 != data_seg_alignment),
|
||||
* then make it the first aligned address after the control
|
||||
* structure. if this happens, this is a programming error in
|
||||
* Open MPI!
|
||||
*/
|
||||
if (0 != data_seg_alignment)
|
||||
{
|
||||
addr = OPAL_ALIGN_PTR(addr, data_seg_alignment, unsigned char *);
|
||||
|
||||
/* is addr past end of shared memory object ? */
|
||||
if ((unsigned char *)seg + size < addr)
|
||||
{
|
||||
orte_show_help("help-mpi-common-sm.txt",
|
||||
"mmap too small",
|
||||
1,
|
||||
orte_process_info.nodename,
|
||||
(unsigned long)size,
|
||||
(unsigned long)size_ctl_structure,
|
||||
(unsigned long)data_seg_alignment);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
map->super.module_data_addr = addr;
|
||||
map->super.module_seg_addr = (unsigned char *)seg;
|
||||
map->super.module_size = size;
|
||||
|
||||
/* map object successful initialized - we can safely increment seg_att */
|
||||
opal_atomic_wmb();
|
||||
opal_atomic_add_32(&map->super.module_seg->seg_att, 1);
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/**
|
||||
* this routine performs the posix sm component run-time test.
|
||||
*
|
||||
* @return OMPI_SUCCESS if posix sm can be used, OMPI_ERR_NOT_SUPPORTED
|
||||
* otherwise.
|
||||
*/
|
||||
int
|
||||
mca_common_sm_posix_component_query(void)
|
||||
{
|
||||
int rc = OMPI_SUCCESS;
|
||||
int fd = -1;
|
||||
|
||||
if (-1 == (fd = posix_shm_open(sm_info.posix_fname_buff,
|
||||
(OMPI_COMMON_SM_POSIX_FILE_LEN_MAX - 1))))
|
||||
{
|
||||
rc = OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
if (-1 != fd)
|
||||
{
|
||||
shm_unlink(sm_info.posix_fname_buff);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/**
|
||||
* this routine assumes that sorted_procs is in the following state:
|
||||
* o all the local procs at the beginning.
|
||||
* o sorted_procs[0] is the lowest named process.
|
||||
*/
|
||||
mca_common_sm_module_t *
|
||||
mca_common_sm_posix_init(ompi_proc_t **sorted_procs,
|
||||
size_t num_local_procs,
|
||||
size_t size,
|
||||
char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment)
|
||||
{
|
||||
int fd = -1;
|
||||
mca_common_sm_module_posix_t *map = NULL;
|
||||
bool lowest;
|
||||
size_t mem_offset;
|
||||
int n_local_procs;
|
||||
|
||||
lowest = (0 == orte_util_compare_name_fields(
|
||||
ORTE_NS_CMP_ALL,
|
||||
ORTE_PROC_MY_NAME,
|
||||
&(sorted_procs[0]->proc_name)));
|
||||
|
||||
/* using sm_info.id as an initialization marker:
|
||||
* o 0 -> not initialized; 1 -> initialized
|
||||
*/
|
||||
sm_info.id = 0;
|
||||
memset(sm_info.posix_fname_buff, '\0', OMPI_COMMON_SM_POSIX_FILE_LEN_MAX);
|
||||
|
||||
/**
|
||||
* lock here to prevent multiple threads from invoking this function
|
||||
* simultaneously. the critical section we're protecting is usage of
|
||||
* the RML in this block.
|
||||
*/
|
||||
opal_mutex_lock(&mutex);
|
||||
|
||||
if (!pending_rml_msgs_init)
|
||||
{
|
||||
OBJ_CONSTRUCT(&(pending_rml_msgs), opal_list_t);
|
||||
pending_rml_msgs_init = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* figure out if i am the lowest proc in the group.
|
||||
* if i am, initialize the shared memory object.
|
||||
*/
|
||||
if (lowest)
|
||||
{
|
||||
/* initialize POSIX shared memory object */
|
||||
if (-1 == (fd = posix_shm_open(sm_info.posix_fname_buff,
|
||||
(OMPI_COMMON_SM_POSIX_FILE_LEN_MAX - 1))))
|
||||
{
|
||||
/* do nothing. if a real error occurred or the file name search
|
||||
* limit was reached, posix_shm_open will take care of the
|
||||
* notification part.
|
||||
*/
|
||||
;
|
||||
}
|
||||
else if (0 != ftruncate(fd, size))
|
||||
{
|
||||
int err = errno;
|
||||
orte_show_help("help-mpi-common-sm.txt",
|
||||
"sys call fail",
|
||||
1,
|
||||
orte_process_info.nodename,
|
||||
"ftruncate(2)",
|
||||
"",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
strerror(err),
|
||||
err);
|
||||
shm_unlink(sm_info.posix_fname_buff);
|
||||
}
|
||||
else
|
||||
{
|
||||
map = create_map(fd,
|
||||
size,
|
||||
size_ctl_structure,
|
||||
data_seg_alignment);
|
||||
if (NULL != map)
|
||||
{
|
||||
sm_info.id = 1;
|
||||
/* initialize the segment */
|
||||
mem_offset =
|
||||
map->super.module_data_addr -
|
||||
(unsigned char *)map->super.module_seg;
|
||||
map->super.module_seg->seg_offset = mem_offset;
|
||||
map->super.module_seg->seg_size = size - mem_offset;
|
||||
#if 0 /* i don't think this unlock is needed, but it's in mmap's source */
|
||||
opal_atomic_unlock(&map->super.module_seg->seg_lock);
|
||||
#endif
|
||||
map->super.module_seg->seg_inited = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
shm_unlink(sm_info.posix_fname_buff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* signal the rest of the local procs that a
|
||||
* new shared memory object has been created.
|
||||
*/
|
||||
if (OMPI_SUCCESS != mca_common_sm_rml_info_bcast(
|
||||
&sm_info,
|
||||
sorted_procs,
|
||||
num_local_procs,
|
||||
OMPI_RML_TAG_SM_BACK_FILE_CREATED,
|
||||
lowest,
|
||||
file_name,
|
||||
&(pending_rml_msgs)))
|
||||
{
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!lowest)
|
||||
{
|
||||
/* make certain that things were initialized correctly */
|
||||
if (0 != sm_info.id)
|
||||
{
|
||||
if ((fd = shm_open(sm_info.posix_fname_buff, O_RDWR, 0600)) > 0)
|
||||
{
|
||||
map = create_map(fd,
|
||||
size,
|
||||
size_ctl_structure,
|
||||
data_seg_alignment);
|
||||
}
|
||||
}
|
||||
}
|
||||
/* if all things were initialized properly, wait until all other local
|
||||
* procs have reported in before calling shm_unlink
|
||||
*/
|
||||
else
|
||||
{
|
||||
if (1 == sm_info.id)
|
||||
{
|
||||
n_local_procs = (int)num_local_procs;
|
||||
while (n_local_procs > map->super.module_seg->seg_att);
|
||||
{
|
||||
opal_atomic_rmb();
|
||||
}
|
||||
/* all other local procs reported in, so it's safe to shm_unlink */
|
||||
shm_unlink(sm_info.posix_fname_buff);
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
opal_mutex_unlock(&mutex);
|
||||
|
||||
return &(map->super);
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
int
|
||||
mca_common_sm_posix_fini(mca_common_sm_module_t *mca_common_sm_module)
|
||||
{
|
||||
/* no need for shm_unlink here because it was already taken care of */
|
||||
mca_common_sm_module_posix_t *posix_module =
|
||||
(mca_common_sm_module_posix_t *)mca_common_sm_module;
|
||||
int rc = OMPI_SUCCESS;
|
||||
|
||||
if (NULL != posix_module->super.module_seg)
|
||||
{
|
||||
rc = munmap((void *)posix_module->super.module_seg_addr,
|
||||
posix_module->super.module_size);
|
||||
posix_module->super.module_seg_addr = NULL;
|
||||
posix_module->super.module_size = 0;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/**
|
||||
* allocate memory from a previously allocated shared memory
|
||||
* block.
|
||||
*
|
||||
* @param size size of request, in bytes (IN)
|
||||
*
|
||||
* @retval addr virtual address
|
||||
*/
|
||||
|
||||
void *
|
||||
mca_common_sm_posix_seg_alloc(struct mca_mpool_base_module_t* mpool,
|
||||
size_t* size,
|
||||
mca_mpool_base_registration_t** registration)
|
||||
{
|
||||
mca_mpool_sm_module_t *sm_module = (mca_mpool_sm_module_t *) mpool;
|
||||
mca_common_sm_module_posix_t *map =
|
||||
(mca_common_sm_module_posix_t *)sm_module->sm_common_module;
|
||||
mca_common_sm_seg_header_t *seg = map->super.module_seg;
|
||||
void *addr;
|
||||
|
||||
opal_atomic_lock(&seg->seg_lock);
|
||||
if(seg->seg_offset + *size > seg->seg_size)
|
||||
{
|
||||
addr = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t fixup;
|
||||
|
||||
/* add base address to segment offset */
|
||||
addr = map->super.module_data_addr + seg->seg_offset;
|
||||
seg->seg_offset += *size;
|
||||
|
||||
/**
|
||||
* fix up seg_offset so next allocation is aligned on a
|
||||
* sizeof(long) boundry. Do it here so that we don't have to
|
||||
* check before checking remaining size in buffer
|
||||
*/
|
||||
if ((fixup = (seg->seg_offset & (sizeof(long) - 1))) > 0)
|
||||
{
|
||||
seg->seg_offset += sizeof(long) - fixup;
|
||||
}
|
||||
}
|
||||
if (NULL != registration)
|
||||
{
|
||||
*registration = NULL;
|
||||
}
|
||||
opal_atomic_unlock(&seg->seg_lock);
|
||||
return addr;
|
||||
}
|
||||
|
113
ompi/mca/common/sm/common_sm_posix.h
Обычный файл
113
ompi/mca/common/sm/common_sm_posix.h
Обычный файл
@ -0,0 +1,113 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _COMMON_SM_POSIX_H_
|
||||
#define _COMMON_SM_POSIX_H_
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "opal/class/opal_object.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/sys/atomic.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "ompi/mca/common/sm/common_sm.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
struct mca_mpool_base_module_t;
|
||||
|
||||
typedef struct mca_common_sm_module_posix_t
|
||||
{
|
||||
mca_common_sm_module_t super;
|
||||
} mca_common_sm_module_posix_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_common_sm_module_posix_t);
|
||||
|
||||
/**
|
||||
* This routine is used to set up a POSIX shared memory object.
|
||||
*
|
||||
* @param sorted_procs - array of (ompi_proc_t *)'s to create this shared memory
|
||||
* segment for. this routine, unlike the top-level
|
||||
* mca_common_sm_init routine, assumes that sorted_procs
|
||||
* is in the following state: all the local procs at the
|
||||
* beginning; sorted_procs[0] is the lowest named process.
|
||||
* (IN)
|
||||
*
|
||||
* @param num_local_procs - number of local procs contained within
|
||||
* sorted_procs (IN)
|
||||
*
|
||||
* @param size - size of the shared memory segment, in bytes (IN)
|
||||
*
|
||||
* @param file_name - strictly used for RML message identification/queueing (IN)
|
||||
*
|
||||
* @param size_ctl_structure size of the control structure at
|
||||
* the head of the file. The control structure
|
||||
* is assumed to have mca_common_sm_seg_header_t
|
||||
* as its first segment (IN)
|
||||
*
|
||||
* @param data_set_alignment alignment of the data segment. this
|
||||
* follows the control structure. If this
|
||||
* value if 0, then assume that there will
|
||||
* be no data segment following the control
|
||||
* structure. (IN)
|
||||
*
|
||||
* @return value pointer to control structure at head of file.
|
||||
*/
|
||||
OMPI_DECLSPEC extern mca_common_sm_module_t *
|
||||
mca_common_sm_posix_init(ompi_proc_t **sorted_procs,
|
||||
size_t num_local_procs,
|
||||
size_t size,
|
||||
char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment);
|
||||
|
||||
/**
|
||||
* Callback from the sm mpool
|
||||
*/
|
||||
OMPI_DECLSPEC extern void *
|
||||
mca_common_sm_posix_seg_alloc(struct mca_mpool_base_module_t *mpool,
|
||||
size_t *size,
|
||||
mca_mpool_base_registration_t **registration);
|
||||
|
||||
/**
|
||||
* This function will release all local resources attached to the
|
||||
* shared memory segment.
|
||||
*
|
||||
* @param mca_common_sm_module - the control structure at head of the segment.
|
||||
*
|
||||
* @returnvalue 0 if everything was OK, otherwise a negative value.
|
||||
*/
|
||||
|
||||
OMPI_DECLSPEC extern int
|
||||
mca_common_sm_posix_fini(mca_common_sm_module_t *mca_common_sm_module);
|
||||
|
||||
/**
|
||||
* component query routine
|
||||
*/
|
||||
|
||||
OMPI_DECLSPEC extern int
|
||||
mca_common_sm_posix_component_query(void);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* _COMMON_SM_POSIX_H_ */
|
||||
|
184
ompi/mca/common/sm/common_sm_rml.c
Обычный файл
184
ompi/mca/common/sm/common_sm_rml.c
Обычный файл
@ -0,0 +1,184 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif
|
||||
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/mca/dpm/dpm.h"
|
||||
#include "ompi/mca/common/sm/common_sm_rml.h"
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
mca_common_sm_rml_pending_rml_msg_types_t,
|
||||
opal_object_t,
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
|
||||
/******************************************************************************/
|
||||
/**
|
||||
* this routine assumes that sorted_procs is in the following state:
|
||||
* o all the local procs at the beginning.
|
||||
* o sorted_procs[0] is the lowest named process.
|
||||
*/
|
||||
int
|
||||
mca_common_sm_rml_info_bcast(mca_common_sm_rml_sm_info_t *sm_info,
|
||||
ompi_proc_t **procs,
|
||||
size_t num_procs,
|
||||
int tag,
|
||||
bool bcast_root,
|
||||
const char *file_name,
|
||||
opal_list_t *pending_rml_msgs)
|
||||
{
|
||||
int rc = OMPI_SUCCESS;
|
||||
struct iovec iov[MCA_COMMON_SM_RML_MSG_LEN];
|
||||
int iovrc;
|
||||
size_t p;
|
||||
char filename_to_send[OPAL_PATH_MAX];
|
||||
|
||||
strncpy(filename_to_send, file_name, sizeof(filename_to_send) - 1);
|
||||
|
||||
/* let the first item be the queueing id name */
|
||||
iov[0].iov_base = (ompi_iov_base_ptr_t)filename_to_send;
|
||||
iov[0].iov_len = sizeof(filename_to_send);
|
||||
iov[1].iov_base = (ompi_iov_base_ptr_t)sm_info;
|
||||
iov[1].iov_len = sizeof(mca_common_sm_rml_sm_info_t);
|
||||
|
||||
/**
|
||||
* figure out if i am the root proc in the group.
|
||||
* if i am, bcast the message the rest of the local procs.
|
||||
*/
|
||||
if (bcast_root)
|
||||
{
|
||||
opal_progress_event_users_increment();
|
||||
/* first num_procs items should be local procs */
|
||||
for (p = 1; p < num_procs; ++p)
|
||||
{
|
||||
iovrc = orte_rml.send(&(procs[p]->proc_name),
|
||||
iov,
|
||||
MCA_COMMON_SM_RML_MSG_LEN,
|
||||
tag,
|
||||
0);
|
||||
if ((ssize_t)(iov[0].iov_len +
|
||||
iov[1].iov_len) > iovrc)
|
||||
{
|
||||
ORTE_ERROR_LOG(OMPI_ERR_COMM_FAILURE);
|
||||
opal_progress_event_users_decrement();
|
||||
rc = OMPI_ERROR;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
opal_progress_event_users_decrement();
|
||||
}
|
||||
else /* i am NOT the root ("lowest") proc */
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
mca_common_sm_rml_pending_rml_msg_types_t *rml_msg;
|
||||
/**
|
||||
* because a component query can be performed simultaneously in multiple
|
||||
* threads, the RML messages may arrive in any order. so first check to
|
||||
* see if we previously received a message for me.
|
||||
*/
|
||||
for (item = opal_list_get_first(pending_rml_msgs);
|
||||
opal_list_get_end(pending_rml_msgs) != item;
|
||||
item = opal_list_get_next(item))
|
||||
{
|
||||
rml_msg = (mca_common_sm_rml_pending_rml_msg_types_t *)item;
|
||||
/* was the message for me? */
|
||||
if (0 == strcmp(rml_msg->rml_file_name, file_name))
|
||||
{
|
||||
opal_list_remove_item(pending_rml_msgs, item);
|
||||
strncpy(sm_info->posix_fname_buff,
|
||||
rml_msg->posix_fname_buff,
|
||||
OMPI_COMMON_SM_POSIX_FILE_LEN_MAX);
|
||||
sm_info->id = rml_msg->id;
|
||||
OBJ_RELEASE(item);
|
||||
break;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* if we didn't find a message already waiting, block on
|
||||
* receiving from the RML.
|
||||
*/
|
||||
if (opal_list_get_end(pending_rml_msgs) == item)
|
||||
{
|
||||
do
|
||||
{
|
||||
/**
|
||||
* bump up the libevent polling frequency while we're
|
||||
* in this RML recv, just to ensure we're checking
|
||||
* libevent frequently.
|
||||
*/
|
||||
opal_progress_event_users_increment();
|
||||
iovrc = orte_rml.recv(&(procs[0]->proc_name),
|
||||
iov,
|
||||
MCA_COMMON_SM_RML_MSG_LEN,
|
||||
tag,
|
||||
0);
|
||||
opal_progress_event_users_decrement();
|
||||
if (iovrc < 0)
|
||||
{
|
||||
ORTE_ERROR_LOG(OMPI_ERR_RECV_LESS_THAN_POSTED);
|
||||
rc = OMPI_ERROR;
|
||||
goto out;
|
||||
}
|
||||
/* was the message for me? if so, we're done */
|
||||
if (0 == strcmp(filename_to_send, file_name))
|
||||
{
|
||||
break;
|
||||
}
|
||||
/* if not, put it on the pending list and try again */
|
||||
if (NULL == (rml_msg =
|
||||
OBJ_NEW(mca_common_sm_rml_pending_rml_msg_types_t)))
|
||||
{
|
||||
ORTE_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
|
||||
rc = OMPI_ERROR;
|
||||
goto out;
|
||||
}
|
||||
/* safe because sizeof(rml_msg->file_name) ==
|
||||
* sizeof(filename_to_send), same same goes for
|
||||
* rml_msg->posix_fname_buff and sm_info->posix_fname_buff */
|
||||
memcpy(rml_msg->rml_file_name,
|
||||
filename_to_send,
|
||||
OPAL_PATH_MAX);
|
||||
memcpy(rml_msg->posix_fname_buff,
|
||||
sm_info->posix_fname_buff,
|
||||
OMPI_COMMON_SM_POSIX_FILE_LEN_MAX);
|
||||
rml_msg->id = sm_info->id;
|
||||
opal_list_append(pending_rml_msgs, &(rml_msg->super));
|
||||
} while(1);
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
return rc;
|
||||
}
|
||||
|
73
ompi/mca/common/sm/common_sm_rml.h
Обычный файл
73
ompi/mca/common/sm/common_sm_rml.h
Обычный файл
@ -0,0 +1,73 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _COMMON_SM_RML_H_
|
||||
#define _COMMON_SM_RML_H_
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/class/opal_object.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "ompi/mca/common/sm/common_sm.h"
|
||||
|
||||
#define MCA_COMMON_SM_RML_MSG_LEN 2
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/**
|
||||
* shared memory information used for initialization and setup.
|
||||
*/
|
||||
typedef struct mca_common_sm_rml_sm_info_t
|
||||
{
|
||||
char posix_fname_buff[OMPI_COMMON_SM_POSIX_FILE_LEN_MAX];
|
||||
int id;
|
||||
} mca_common_sm_rml_sm_info_t;
|
||||
|
||||
/**
|
||||
* items on the pending_rml_msgs list
|
||||
*/
|
||||
typedef struct mca_common_sm_rml_pending_rml_msg_types_t
|
||||
{
|
||||
opal_list_item_t super;
|
||||
char rml_file_name[OPAL_PATH_MAX];
|
||||
char posix_fname_buff[OMPI_COMMON_SM_POSIX_FILE_LEN_MAX];
|
||||
int id;
|
||||
} mca_common_sm_rml_pending_rml_msg_types_t;
|
||||
|
||||
/**
|
||||
* routine used to broadcast common sm initialization information to all local
|
||||
* processes in procs.
|
||||
*/
|
||||
OMPI_DECLSPEC extern int
|
||||
mca_common_sm_rml_info_bcast(mca_common_sm_rml_sm_info_t *sm_info,
|
||||
ompi_proc_t **procs,
|
||||
size_t num_procs,
|
||||
int tag,
|
||||
bool bcast_root,
|
||||
const char *file_name,
|
||||
opal_list_t *pending_rml_msgs);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* _COMMON_SM_RML_H_*/
|
||||
|
@ -59,6 +59,8 @@
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "ompi/mca/dpm/dpm.h"
|
||||
#include "ompi/mca/mpool/sm/mpool_sm.h"
|
||||
|
||||
#include "common_sm_rml.h"
|
||||
#include "common_sm_sysv.h"
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
@ -74,32 +76,20 @@ OBJ_CLASS_INSTANCE(
|
||||
*/
|
||||
static opal_mutex_t mutex;
|
||||
|
||||
/**
|
||||
* shared memory information used for initialization and setup.
|
||||
*/
|
||||
static mca_common_sm_rml_sm_info_t sm_info;
|
||||
|
||||
/**
|
||||
* list of RML messages that have arrived that have not yet been
|
||||
* consumed by the thread who is looking to attach to the shared
|
||||
* memory segment that the RML message corresponds to.
|
||||
* consumed by the thread who is looking to complete its component
|
||||
* initialization based on the contents of the RML message.
|
||||
*/
|
||||
static opal_list_t pending_rml_msgs;
|
||||
static bool pending_rml_msgs_init = false;
|
||||
|
||||
/**
|
||||
* items on the pending_rml_msgs list
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
opal_list_item_t super;
|
||||
char file_name[OPAL_PATH_MAX];
|
||||
int shmem_seg_inited;
|
||||
int shmid;
|
||||
} pending_sysv_rml_msg_t;
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
pending_sysv_rml_msg_t,
|
||||
opal_list_item_t,
|
||||
NULL,
|
||||
NULL
|
||||
);
|
||||
|
||||
/******************************************************************************/
|
||||
static mca_common_sm_module_sysv_t *
|
||||
create_shmem_seg(int shmid,
|
||||
int is_root,
|
||||
@ -208,10 +198,10 @@ create_shmem_seg(int shmid,
|
||||
|
||||
/******************************************************************************/
|
||||
/**
|
||||
* mca_common_sm_sysv_component_query
|
||||
* the run-time test
|
||||
* mca_common_sm_sysv_component_query
|
||||
* the sysv run-time test
|
||||
*/
|
||||
int
|
||||
int
|
||||
mca_common_sm_sysv_component_query(void)
|
||||
{
|
||||
char c = 'j';
|
||||
@ -220,7 +210,7 @@ mca_common_sm_sysv_component_query(void)
|
||||
char *a = NULL;
|
||||
char *addr = (char *)-1;
|
||||
struct shmid_ds tmp_buff;
|
||||
|
||||
|
||||
if (-1 == (shmid = shmget(IPC_PRIVATE,
|
||||
(size_t)(getpagesize()),
|
||||
IPC_CREAT | IPC_EXCL | SHM_R | SHM_W)))
|
||||
@ -231,7 +221,7 @@ mca_common_sm_sysv_component_query(void)
|
||||
{
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
||||
/* protect against lazy establishment - may not be needed, but can't hurt */
|
||||
a = addr;
|
||||
*a = c;
|
||||
@ -262,78 +252,24 @@ out:
|
||||
* mca_common_sm_sysv_init
|
||||
*/
|
||||
mca_common_sm_module_t *
|
||||
mca_common_sm_sysv_init(ompi_proc_t **procs,
|
||||
size_t num_procs,
|
||||
mca_common_sm_sysv_init(ompi_proc_t **sorted_procs,
|
||||
size_t num_local_procs,
|
||||
size_t size,
|
||||
char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment)
|
||||
{
|
||||
mca_common_sm_module_sysv_t *map = NULL;
|
||||
bool found_lowest = false;
|
||||
int shmid = -1;
|
||||
int rc = 0;
|
||||
size_t num_local_procs = 0;
|
||||
bool lowest;
|
||||
size_t mem_offset;
|
||||
size_t p;
|
||||
struct iovec iov[2];
|
||||
char filename_to_send[OPAL_PATH_MAX];
|
||||
opal_list_item_t *item;
|
||||
pending_sysv_rml_msg_t *rml_msg;
|
||||
ompi_proc_t *temp_proc;
|
||||
|
||||
/**
|
||||
* reorder procs array to have all the local procs at the beginning.
|
||||
* simultaneously look for the local proc with the lowest name. ensure
|
||||
* that procs[0] is the lowest named process.
|
||||
*/
|
||||
for (p = 0; p < num_procs; ++p)
|
||||
{
|
||||
if (OPAL_PROC_ON_LOCAL_NODE(procs[p]->proc_flags))
|
||||
{
|
||||
/* if we don't have a lowest, save the first one */
|
||||
if (!found_lowest)
|
||||
{
|
||||
procs[0] = procs[p];
|
||||
found_lowest = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* save this proc */
|
||||
procs[num_local_procs] = procs[p];
|
||||
/**
|
||||
* if we have a new lowest, swap it with position 0
|
||||
* so that procs[0] is always the lowest named proc
|
||||
*/
|
||||
if (orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
&(procs[p]->proc_name),
|
||||
&(procs[0]->proc_name)) < 0)
|
||||
{
|
||||
temp_proc = procs[0];
|
||||
procs[0] = procs[p];
|
||||
procs[num_local_procs] = temp_proc;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Regardless of the comparisons above, we found
|
||||
* another proc on the local node, so increment
|
||||
*/
|
||||
++num_local_procs;
|
||||
}
|
||||
}
|
||||
sm_info.id = -1;
|
||||
memset(sm_info.posix_fname_buff, '\0', OMPI_COMMON_SM_POSIX_FILE_LEN_MAX);
|
||||
|
||||
/* if there are no local procs, there's nothing to do */
|
||||
if (0 == num_local_procs)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
strncpy(filename_to_send, file_name, sizeof(filename_to_send) - 1);
|
||||
|
||||
iov[0].iov_base = &shmid;
|
||||
iov[0].iov_len = sizeof(shmid);
|
||||
iov[1].iov_base = filename_to_send;
|
||||
iov[1].iov_len = sizeof(filename_to_send);
|
||||
lowest = (0 == orte_util_compare_name_fields(
|
||||
ORTE_NS_CMP_ALL,
|
||||
ORTE_PROC_MY_NAME,
|
||||
&(sorted_procs[0]->proc_name)));
|
||||
|
||||
/**
|
||||
* lock here to prevent multiple threads from invoking this function
|
||||
@ -352,14 +288,12 @@ mca_common_sm_sysv_init(ompi_proc_t **procs,
|
||||
* figure out if i am the lowest proc in the group (aka "the root").
|
||||
* if i am, initialize the shared memory segment.
|
||||
*/
|
||||
if (0 == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
|
||||
ORTE_PROC_MY_NAME,
|
||||
&(procs[0]->proc_name)))
|
||||
if (lowest)
|
||||
{
|
||||
/* create a new shared memory segment and save the shmid. */
|
||||
if (-1 == (shmid = shmget(IPC_PRIVATE,
|
||||
size,
|
||||
IPC_CREAT | IPC_EXCL | SHM_R | SHM_W)))
|
||||
if (-1 == (sm_info.id = shmget(IPC_PRIVATE,
|
||||
size,
|
||||
IPC_CREAT | IPC_EXCL | SHM_R | SHM_W)))
|
||||
{
|
||||
/**
|
||||
* if we are here, a few of things could have happened:
|
||||
@ -380,9 +314,9 @@ mca_common_sm_sysv_init(ompi_proc_t **procs,
|
||||
err,
|
||||
size);
|
||||
}
|
||||
else /* ftok and shmget were both successful */
|
||||
else
|
||||
{
|
||||
map = create_shmem_seg(shmid,
|
||||
map = create_shmem_seg(sm_info.id,
|
||||
1, /* i am the root */
|
||||
size,
|
||||
size_ctl_structure,
|
||||
@ -396,7 +330,9 @@ mca_common_sm_sysv_init(ompi_proc_t **procs,
|
||||
map->super.module_seg->seg_offset = mem_offset;
|
||||
map->super.module_seg->seg_size = size - mem_offset;
|
||||
map->super.module_seg->seg_inited = 0;
|
||||
#if 0 /* i don't think this unlock is needed, but it's in mmap's source */
|
||||
opal_atomic_unlock(&map->super.module_seg->seg_lock);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -404,130 +340,42 @@ mca_common_sm_sysv_init(ompi_proc_t **procs,
|
||||
* best effort to delete the segment.
|
||||
* may not be needed, but can't hurt.
|
||||
*/
|
||||
shmctl(shmid, IPC_RMID, NULL);
|
||||
shmctl(sm_info.id, IPC_RMID, NULL);
|
||||
/**
|
||||
* setting shmid to -1 here will tell
|
||||
* the other procs that we failed.
|
||||
*/
|
||||
shmid = -1;
|
||||
sm_info.id = -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* signal the rest of the local procs that a new shared memory segment
|
||||
* has successfully been created and is ready to be attached to. bump
|
||||
* up the libevent polling frequency while we're using the RML.
|
||||
*/
|
||||
opal_progress_event_users_increment();
|
||||
for (p = 1; p < num_local_procs; ++p)
|
||||
{
|
||||
rc = orte_rml.send(&(procs[p]->proc_name),
|
||||
iov,
|
||||
2,
|
||||
OMPI_RML_TAG_SM_BACK_FILE_CREATED,
|
||||
0);
|
||||
if (rc < (ssize_t)(iov[0].iov_len +
|
||||
iov[1].iov_len))
|
||||
{
|
||||
ORTE_ERROR_LOG(OMPI_ERR_COMM_FAILURE);
|
||||
opal_progress_event_users_decrement();
|
||||
|
||||
/* free it all -- bad things are going to happen */
|
||||
if (NULL != map)
|
||||
{
|
||||
shmdt(map->super.module_seg_addr);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
opal_progress_event_users_decrement();
|
||||
}
|
||||
else /* i am NOT the lowest local rank */
|
||||
|
||||
/**
|
||||
* signal the rest of the local procs that a
|
||||
* new shared memory object has been created.
|
||||
*/
|
||||
if (OMPI_SUCCESS != mca_common_sm_rml_info_bcast(
|
||||
&sm_info,
|
||||
sorted_procs,
|
||||
num_local_procs,
|
||||
OMPI_RML_TAG_SM_BACK_FILE_CREATED,
|
||||
lowest,
|
||||
file_name,
|
||||
&(pending_rml_msgs)))
|
||||
{
|
||||
/**
|
||||
* all other procs will wait for the shared memory segment to be
|
||||
* initialized before attaching to it. because the shared memory
|
||||
* segment may be initialized simultaneously in multiple threads,
|
||||
* the RML messages may arrive in any order. so, first check to
|
||||
* see if we previously received a message for me.
|
||||
*/
|
||||
for (item = opal_list_get_first(&pending_rml_msgs);
|
||||
opal_list_get_end(&pending_rml_msgs) != item;
|
||||
item = opal_list_get_next(item))
|
||||
{
|
||||
rml_msg = (pending_sysv_rml_msg_t *)item;
|
||||
/* was the message for me? */
|
||||
if (0 == strcmp(rml_msg->file_name, file_name))
|
||||
{
|
||||
opal_list_remove_item(&pending_rml_msgs, item);
|
||||
/* set the shmid so i know what shared mem seg to attach to */
|
||||
shmid = rml_msg->shmid;
|
||||
OBJ_RELEASE(item);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* if we didn't find a message already waiting, block on
|
||||
* receiving from the RML.
|
||||
*/
|
||||
if (opal_list_get_end(&pending_rml_msgs) == item)
|
||||
{
|
||||
while (1)
|
||||
{
|
||||
/**
|
||||
* bump up the libevent polling frequency while we're
|
||||
* in this RML recv, just to ensure we're checking
|
||||
* libevent more frequently.
|
||||
*/
|
||||
opal_progress_event_users_increment();
|
||||
rc = orte_rml.recv(&(procs[0]->proc_name),
|
||||
iov,
|
||||
2,
|
||||
OMPI_RML_TAG_SM_BACK_FILE_CREATED,
|
||||
0);
|
||||
opal_progress_event_users_decrement();
|
||||
if (rc < 0)
|
||||
{
|
||||
ORTE_ERROR_LOG(OMPI_ERR_RECV_LESS_THAN_POSTED);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* was the message for me? if so, we're done */
|
||||
if (0 == strcmp(filename_to_send, file_name))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
/* if not, put it on the pending list and try again */
|
||||
rml_msg = OBJ_NEW(pending_sysv_rml_msg_t);
|
||||
if (NULL == rml_msg)
|
||||
{
|
||||
ORTE_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
|
||||
goto out;
|
||||
}
|
||||
memcpy(rml_msg->file_name,
|
||||
filename_to_send,
|
||||
sizeof(rml_msg->file_name));
|
||||
rml_msg->shmid = shmid;
|
||||
opal_list_append(&pending_rml_msgs, &(rml_msg->super));
|
||||
} /* end while 1 */
|
||||
}
|
||||
|
||||
/* did the root setup the shmid correctly? if so, attach to it */
|
||||
if (-1 != shmid)
|
||||
{
|
||||
map = create_shmem_seg(shmid,
|
||||
0, /* i am NOT the root */
|
||||
size,
|
||||
size_ctl_structure,
|
||||
data_seg_alignment);
|
||||
if (NULL == map)
|
||||
{
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
} /* end else - i am NOT the lowest local rank */
|
||||
goto out;
|
||||
}
|
||||
/* did the root setup the shmid correctly? if so, attach to it */
|
||||
if (!lowest && -1 != sm_info.id)
|
||||
{
|
||||
/* no return value check here because the error
|
||||
* path is the same as the expected path */
|
||||
map = create_shmem_seg(sm_info.id,
|
||||
0, /* i am NOT the root */
|
||||
size,
|
||||
size_ctl_structure,
|
||||
data_seg_alignment);
|
||||
}
|
||||
|
||||
out:
|
||||
opal_mutex_unlock(&mutex);
|
||||
@ -535,56 +383,6 @@ out:
|
||||
return &(map->super);
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/**
|
||||
* same as mca_common_sm_sysv_init(), but takes an (ompi_group_t *)
|
||||
* argument instead of an array of ompi_proc_t's.
|
||||
*
|
||||
* this function just checks the group to ensure that all the procs
|
||||
* are local, and if they are, calls mca_common_sm_sysv_init().
|
||||
*/
|
||||
mca_common_sm_module_t *
|
||||
mca_common_sm_sysv_init_group(ompi_group_t *group,
|
||||
size_t size,
|
||||
char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment)
|
||||
{
|
||||
size_t i;
|
||||
size_t group_size;
|
||||
ompi_proc_t *proc;
|
||||
ompi_proc_t **procs;
|
||||
mca_common_sm_module_t *ret;
|
||||
|
||||
group_size = ompi_group_size(group);
|
||||
procs = (ompi_proc_t **) malloc(sizeof(ompi_proc_t *) * group_size);
|
||||
|
||||
if (NULL == procs)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < group_size; ++i)
|
||||
{
|
||||
proc = ompi_group_peer_lookup(group,i);
|
||||
if (!OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags))
|
||||
{
|
||||
free(procs);
|
||||
return NULL;
|
||||
}
|
||||
procs[i] = proc;
|
||||
}
|
||||
|
||||
ret = mca_common_sm_sysv_init(procs,
|
||||
group_size,
|
||||
size,
|
||||
file_name,
|
||||
size_ctl_structure,
|
||||
data_seg_alignment);
|
||||
free(procs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/******************************************************************************/
|
||||
/**
|
||||
* sys v module finalization routine.
|
||||
@ -634,7 +432,7 @@ mca_common_sm_sysv_seg_alloc(struct mca_mpool_base_module_t* mpool,
|
||||
|
||||
opal_atomic_lock(&seg->seg_lock);
|
||||
|
||||
if(seg->seg_offset + *size > seg->seg_size)
|
||||
if (seg->seg_offset + *size > seg->seg_size)
|
||||
{
|
||||
addr = NULL;
|
||||
}
|
||||
|
@ -5,17 +5,17 @@
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
@ -27,7 +27,7 @@
|
||||
#include "opal/class/opal_object.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/sys/atomic.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "ompi/group/group.h"
|
||||
#include "ompi/mca/common/sm/common_sm.h"
|
||||
@ -36,7 +36,7 @@ BEGIN_C_DECLS
|
||||
|
||||
struct mca_mpool_base_module_t;
|
||||
|
||||
typedef struct mca_common_sm_module_sysv_t
|
||||
typedef struct mca_common_sm_module_sysv_t
|
||||
{
|
||||
mca_common_sm_module_t super;
|
||||
} mca_common_sm_module_sysv_t;
|
||||
@ -45,69 +45,48 @@ OBJ_CLASS_DECLARATION(mca_common_sm_module_sysv_t);
|
||||
|
||||
/**
|
||||
* This routine is used to set up a System V shared memory segment.
|
||||
* It is assumed that NO shared memory segment already exists with
|
||||
* key = ftok(file_name, 0) when the "creator proccess" tries to
|
||||
* shmget(key, size, ...).
|
||||
*
|
||||
* @param procs - array of (ompi_proc_t*)'s to create this shared
|
||||
* memory segment for. This array must be writable; it may be edited
|
||||
* (in undefined ways) if the array contains procs that are not on
|
||||
* this host. It is assumed that the caller will simply free this
|
||||
* array upon return. (INOUT)
|
||||
* @param sorted_procs - array of (ompi_proc_t *)'s to create this shared memory
|
||||
* segment for. this routine, unlike the top-level
|
||||
* mca_common_sm_init routine, assumes that sorted_procs
|
||||
* is in the following state: all the local procs at the
|
||||
* beginning; sorted_procs[0] is the lowest named process.
|
||||
* (IN)
|
||||
*
|
||||
* @param num_procs - length of the procs array (IN)
|
||||
* @param num_local_procs - number of local procs contained within
|
||||
* sorted_procs (IN)
|
||||
*
|
||||
* @param size - size of the shared memory segment, in bytes (IN)
|
||||
*
|
||||
* @param file_name name of file to be opened that is
|
||||
* used for shmget key generation. (IN)
|
||||
* @param file_name - strictly used for RML message identification/queueing (IN)
|
||||
*
|
||||
* @param size_ctl_structure size of the control structure at
|
||||
* the head of the file. The control structure
|
||||
* the head of the file. the control structure
|
||||
* is assumed to have mca_common_sm_seg_header_t
|
||||
* as its first segment (IN)
|
||||
*
|
||||
* @param data_set_alignment alignment of the data segment. this
|
||||
* follows the control structure. If this
|
||||
* value if 0, then assume that there will
|
||||
* be no data segment following the control
|
||||
* @param data_set_alignment alignment of the data segment. This
|
||||
* follows the control structure. If this
|
||||
* value if 0, then assume that there will
|
||||
* be no data segment following the control
|
||||
* structure. (IN)
|
||||
*
|
||||
* @return value pointer to control structure at head of file.
|
||||
*/
|
||||
OMPI_DECLSPEC extern mca_common_sm_module_t *
|
||||
mca_common_sm_sysv_init(ompi_proc_t **procs,
|
||||
size_t num_procs,
|
||||
size_t size,
|
||||
mca_common_sm_sysv_init(ompi_proc_t **sorted_procs,
|
||||
size_t num_local_procs,
|
||||
size_t size,
|
||||
char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment);
|
||||
|
||||
/**
|
||||
* This routine is used to set up a System V shared memory segment.
|
||||
* It is assumed that NO shared memory segment already exists with
|
||||
* key = ftok(file_name, 0) when the "creator (root) proccess" tries to
|
||||
* shmget(key, size, ...).
|
||||
*
|
||||
* This routine is the same as mca_common_sm_sysv_init() except that
|
||||
* it takes an (ompi_group_t*) parameter to specify the peers rather
|
||||
* than an array of procs. Unlike mca_common_sm_sysv_init(), the
|
||||
* group must contain *only* local peers, or this function will return
|
||||
* NULL and not create any shared memory segment.
|
||||
*/
|
||||
OMPI_DECLSPEC extern mca_common_sm_module_t *
|
||||
mca_common_sm_sysv_init_group(ompi_group_t *group,
|
||||
size_t size,
|
||||
char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment);
|
||||
|
||||
/**
|
||||
* Callback from the sm mpool
|
||||
*/
|
||||
OMPI_DECLSPEC extern void *
|
||||
mca_common_sm_sysv_seg_alloc(struct mca_mpool_base_module_t *mpool,
|
||||
size_t *size,
|
||||
mca_common_sm_sysv_seg_alloc(struct mca_mpool_base_module_t *mpool,
|
||||
size_t *size,
|
||||
mca_mpool_base_registration_t **registration);
|
||||
|
||||
/**
|
||||
@ -115,13 +94,13 @@ mca_common_sm_sysv_seg_alloc(struct mca_mpool_base_module_t *mpool,
|
||||
* shared memory segment. We assume that the operating system will destroy the
|
||||
* shared memory segment when the last process detaches from it.
|
||||
*
|
||||
* It is assumed that the operating system's System V IPC implementation
|
||||
* It is assumed that the operating system's System V IPC implementation
|
||||
* supports the following IPC_RMID semantics.
|
||||
*
|
||||
* Calling shmctl(shmid, IPC_RMID, ...) will actually destroy the shared memory
|
||||
* segment *after* the last process detaches from it (i.e., when the shm_nattch
|
||||
* member of the associated structure shmid_ds is zero). This behavior is
|
||||
* important because we rely on it to release all allocated shared memory
|
||||
* important because we rely on it to release all allocated shared memory
|
||||
* segments upon job termination - including abnormal job termination.
|
||||
*
|
||||
* @param mca_common_sm_module - the control structure at head of the segment.
|
||||
@ -129,14 +108,14 @@ mca_common_sm_sysv_seg_alloc(struct mca_mpool_base_module_t *mpool,
|
||||
* @returnvalue 0 if everything was OK, otherwise a negative value.
|
||||
*/
|
||||
|
||||
OMPI_DECLSPEC extern int
|
||||
OMPI_DECLSPEC extern int
|
||||
mca_common_sm_sysv_fini(mca_common_sm_module_t *mca_common_sm_module);
|
||||
|
||||
/**
|
||||
* component query routine
|
||||
*/
|
||||
|
||||
OMPI_DECLSPEC extern int
|
||||
OMPI_DECLSPEC extern int
|
||||
mca_common_sm_sysv_component_query(void);
|
||||
|
||||
END_C_DECLS
|
||||
|
@ -79,8 +79,8 @@ mca_common_sm_windows_component_query(void)
|
||||
}
|
||||
|
||||
mca_common_sm_module_t *
|
||||
mca_common_sm_windows_init(ompi_proc_t **procs,
|
||||
size_t num_procs,
|
||||
mca_common_sm_windows_init(ompi_proc_t **sorted_procs,
|
||||
size_t num_local_procs,
|
||||
size_t size, char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment)
|
||||
@ -197,44 +197,6 @@ mca_common_sm_windows_init(ompi_proc_t **procs,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Same as mca_common_sm_windows_init(), but takes an (ompi_group_t*)
|
||||
* argument instead of na array of ompi_proc_t's.
|
||||
*
|
||||
* This function just checks the group to ensure that all the procs
|
||||
* are local, and if they are, calls mca_common_sm_windows_init().
|
||||
*/
|
||||
mca_common_sm_module_t *
|
||||
mca_common_sm_windows_init_group(ompi_group_t *group,
|
||||
size_t size,
|
||||
char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment)
|
||||
{
|
||||
size_t i, group_size;
|
||||
ompi_proc_t *proc, **procs;
|
||||
mca_common_sm_module_t *ret;
|
||||
|
||||
group_size = ompi_group_size(group);
|
||||
procs = (ompi_proc_t**) malloc(sizeof(ompi_proc_t*) * group_size);
|
||||
if (NULL == procs) {
|
||||
return NULL;
|
||||
}
|
||||
for (i = 0; i < group_size; ++i) {
|
||||
proc = ompi_group_peer_lookup(group,i);
|
||||
if (!OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) {
|
||||
free(procs);
|
||||
return NULL;
|
||||
}
|
||||
procs[i] = proc;
|
||||
}
|
||||
|
||||
ret = mca_common_sm_windows_init(procs, group_size, size, file_name,
|
||||
size_ctl_structure, data_seg_alignment);
|
||||
free(procs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
mca_common_sm_windows_fini(mca_common_sm_module_t *mca_common_sm_module)
|
||||
{
|
||||
|
@ -49,13 +49,15 @@ OBJ_CLASS_DECLARATION(mca_common_sm_module_windows_t);
|
||||
* exist before any of the current set of processes try and open
|
||||
* it.
|
||||
*
|
||||
* @param procs - array of (ompi_proc_t*)'s to create this shared
|
||||
* memory segment for. This array must be writable; it may be edited
|
||||
* (in undefined ways) if the array contains procs that are not on
|
||||
* this host. It is assumed that the caller will simply free this
|
||||
* array upon return. (INOUT)
|
||||
* @param sorted_procs - array of (ompi_proc_t *)'s to create this shared memory
|
||||
* segment for. this routine, unlike the top-level
|
||||
* mca_common_sm_init routine, assumes that sorted_procs
|
||||
* is in the following state: all the local procs at the
|
||||
* beginning; sorted_procs[0] is the lowest named process.
|
||||
* (IN)
|
||||
*
|
||||
* @param num_procs - length of the procs array (IN)
|
||||
* @param num_local_procs - number of local procs contained within
|
||||
* sorted_procs (IN)
|
||||
*
|
||||
* @param size - size of the file, in bytes (IN)
|
||||
*
|
||||
@ -75,39 +77,20 @@ OBJ_CLASS_DECLARATION(mca_common_sm_module_windows_t);
|
||||
* @return value pointer to control structure at head of file.
|
||||
*/
|
||||
OMPI_DECLSPEC extern mca_common_sm_module_t *
|
||||
mca_common_sm_windows_init(ompi_proc_t **procs,
|
||||
size_t num_procs,
|
||||
size_t size,
|
||||
char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment);
|
||||
|
||||
/**
|
||||
* This routine is used to set up a shared memory file, backed
|
||||
* by a specified file. It is assumed that the file does not
|
||||
* exist before any of the current set of processes try and open
|
||||
* it.
|
||||
*
|
||||
* This routine is the same as mca_common_sm_windows_init() except that
|
||||
* it takes an (ompi_group_t*) parameter to specify the peers rather
|
||||
* than an array of procs. Unlike mca_common_sm_windows_init(), the
|
||||
* group must contain *only* local peers, or this function will return
|
||||
* NULL and not create any shared memory segment.
|
||||
*/
|
||||
OMPI_DECLSPEC extern mca_common_sm_module_t *
|
||||
mca_common_sm_windows_init_group(ompi_group_t *group,
|
||||
size_t size,
|
||||
char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment);
|
||||
mca_common_sm_windows_init(ompi_proc_t **sorted_procs,
|
||||
size_t num_local_procs,
|
||||
size_t size,
|
||||
char *file_name,
|
||||
size_t size_ctl_structure,
|
||||
size_t data_seg_alignment);
|
||||
|
||||
/*
|
||||
* Callback from the sm mpool
|
||||
*/
|
||||
OMPI_DECLSPEC extern void *
|
||||
mca_common_sm_windows_seg_alloc(struct mca_mpool_base_module_t *mpool,
|
||||
size_t *size,
|
||||
mca_mpool_base_registration_t **registration);
|
||||
size_t *size,
|
||||
mca_mpool_base_registration_t **registration);
|
||||
|
||||
/**
|
||||
* This function will release all local resources attached to the
|
||||
|
@ -10,7 +10,7 @@
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2010 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
@ -23,51 +23,64 @@
|
||||
# MCA_common_sm_POST_CONFIG([should_build])
|
||||
# ------------------------------------------
|
||||
AC_DEFUN([MCA_common_sm_POST_CONFIG], [
|
||||
AM_CONDITIONAL([MCA_common_sm_windows],
|
||||
[test $1 -eq 1 -a "x$MCA_common_sm_windows" = "x1"])
|
||||
AM_CONDITIONAL([MCA_common_sm_sysv],
|
||||
[test $1 -eq 1 -a "x$MCA_common_sm_sysv" = "x1"])
|
||||
AM_CONDITIONAL([COMMON_SM_BUILD_WINDOWS],
|
||||
[test $1 -eq 1 -a "x$common_sm_build_windows" = "x1"])
|
||||
AM_CONDITIONAL([COMMON_SM_BUILD_SYSV],
|
||||
[test $1 -eq 1 -a "x$common_sm_build_sysv" = "x1"])
|
||||
AM_CONDITIONAL([COMMON_SM_BUILD_POSIX],
|
||||
[test $1 -eq 1 -a "x$common_sm_build_posix" = "x1"])
|
||||
])dnl
|
||||
|
||||
# MCA_common_sm_CONFIG([action-if-can-compile],
|
||||
# [action-if-cant-compile])
|
||||
# ------------------------------------------------
|
||||
AC_DEFUN([MCA_common_sm_CONFIG], [
|
||||
OMPI_VAR_SCOPE_PUSH([MCA_common_sm_windows MCA_common_sm_sysv])
|
||||
|
||||
# Are we building on Windows?
|
||||
AC_CHECK_FUNC(CreateFileMapping,
|
||||
[MCA_common_sm_windows=1],
|
||||
[MCA_common_sm_windows=0])
|
||||
AC_DEFINE_UNQUOTED([MCA_COMMON_SM_WINDOWS],
|
||||
[$MCA_common_sm_windows],
|
||||
[common_sm_build_windows=1],
|
||||
[common_sm_build_windows=0])
|
||||
AC_DEFINE_UNQUOTED([MCA_COMMON_SM_WINDOWS],
|
||||
[$common_sm_build_windows],
|
||||
[Whether we have shared memory support for Windows or not])
|
||||
|
||||
# do we have sysv shared memory support on this system?
|
||||
AC_CHECK_FUNC(shmget,
|
||||
[ompi_check_sysv_happy="yes"],
|
||||
[ompi_check_sysv_happy="no"])
|
||||
|
||||
# do we want to enable System V shared memory support?
|
||||
AC_MSG_CHECKING([if want sysv support])
|
||||
AC_MSG_CHECKING([if want sysv shared memory support])
|
||||
AC_ARG_ENABLE(sysv,
|
||||
AC_HELP_STRING([--enable-sysv],
|
||||
[enable sysv shared memory support (default: disabled)]))
|
||||
if test "$enable_sysv" = "yes"; then
|
||||
if test "$ompi_check_sysv_happy" = "yes"; then
|
||||
AC_MSG_RESULT([yes])
|
||||
MCA_common_sm_sysv=1
|
||||
else
|
||||
MCA_common_sm_sysv=0
|
||||
AC_MSG_ERROR([sysv support requested but not found. aborting])
|
||||
fi
|
||||
else
|
||||
AC_MSG_RESULT([no])
|
||||
MCA_common_sm_sysv=0
|
||||
fi
|
||||
AC_HELP_STRING([--disable-sysv],
|
||||
[disable sysv shared memory support (default: enabled)]))
|
||||
AS_IF([test "$enable_sysv" = "no"],
|
||||
[AC_MSG_RESULT([no])
|
||||
common_sm_build_sysv=0],
|
||||
[AC_MSG_RESULT([yes])
|
||||
AC_CHECK_FUNC(shmget,
|
||||
[common_sm_build_sysv=1],
|
||||
[common_sm_build_sysv=0])])
|
||||
AS_IF([test "$enable_sysv" = "yes" -a "$common_sm_build_sysv" = "0"],
|
||||
[AC_MSG_WARN([System V shared memory support requested but not found])
|
||||
AC_MSG_ERROR([Cannot continue])])
|
||||
|
||||
AC_DEFINE_UNQUOTED([MCA_COMMON_SM_SYSV],
|
||||
[$MCA_common_sm_sysv],
|
||||
[$common_sm_build_sysv],
|
||||
[Whether we have shared memory support for SYSV or not])
|
||||
|
||||
# do we have the posix shm stuff?
|
||||
AC_MSG_CHECKING([if want POSIX shared memory support])
|
||||
AC_ARG_ENABLE(posix-shmem,
|
||||
AC_HELP_STRING([--disable-posix-shmem],
|
||||
[disable posix shared memory support (default: enabled)]))
|
||||
AS_IF([test "$enable_posix_shmem" = "no"],
|
||||
[AC_MSG_RESULT([no])
|
||||
common_sm_build_posix=0],
|
||||
[AC_MSG_RESULT([yes])
|
||||
AC_SEARCH_LIBS([shm_open], [rt],
|
||||
[common_sm_build_posix=1],
|
||||
[common_sm_build_posix=0])])
|
||||
AS_IF([test "$enable_posix_shmem" = "yes" -a "$common_sm_build_posix" = "0"],
|
||||
[AC_MSG_WARN([POSIX shared memory support requested but not found])
|
||||
AC_MSG_ERROR([Cannot continue])])
|
||||
|
||||
AC_DEFINE_UNQUOTED([MCA_COMMON_SM_POSIX],
|
||||
[$common_sm_build_posix],
|
||||
[Whether we have shared memory support for POSIX or not])
|
||||
])dnl
|
||||
|
||||
|
@ -69,6 +69,6 @@ the MCA parameter "orte_no_session_dir".
|
||||
WARNING: "%s" not recognized - ignoring option. Suppressing additional
|
||||
unrecognized option warnings.
|
||||
#
|
||||
[sysv rt test fail]
|
||||
[sm rt test fail]
|
||||
WARNING: It appears as if your system does not provide the run-time behavior
|
||||
that we rely on to safely provide System V shared memory support.
|
||||
that we rely on to safely provide %s shared memory support.
|
||||
|
@ -53,6 +53,8 @@ BEGIN_C_DECLS
|
||||
|
||||
/* support for shared memory collectives */
|
||||
#define OMPI_RML_TAG_COLL_SM2_BACK_FILE_CREATED OMPI_RML_TAG_BASE+9
|
||||
/* common sm component query result index */
|
||||
#define OMPI_RML_TAG_COMMON_SM_COMP_INDEX OMPI_RML_TAG_BASE+10
|
||||
|
||||
#define OMPI_RML_TAG_DYNAMIC OMPI_RML_TAG_BASE+200
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user