/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010 Los Alamos National Security, LLC. * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "ompi_config.h" #ifdef HAVE_STRING_H #include #endif #include "opal/util/argv.h" #if OPAL_ENABLE_FT_CR == 1 #include "opal/runtime/opal_cr.h" #endif #include "orte/mca/rml/rml.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/errmgr/errmgr.h" #include "ompi/constants.h" #include "ompi/mca/dpm/dpm.h" #include "common_sm_rml.h" #include "common_sm_mmap.h" #if MCA_COMMON_SM_SYSV #include "common_sm_sysv.h" #endif /* MCA_COMMON_SM_SYSV */ #if MCA_COMMON_SM_WINDOWS #include "common_sm_windows.h" #endif /* MCA_COMMON_SM_WINDOWS */ #if MCA_COMMON_SM_POSIX #include "common_sm_posix.h" #endif /* MCA_COMMON_SM_POSIX */ /** * ASSUMING local proc homogeneity with respect to all utilized shared memory * facilities. that is, if one local proc deems a particular shared memory * facility acceptable, then ALL local procs should be able to utilize that * facility. as it stands, this is an important point because one process * dictates to all other local procs which common sm component will be selected * based on its own, local run-time test. */ static bool initialized = false; static int num_times_registered = 0; static int sysv_index = -1; static int posix_index = -1; static int common_sm_index = -1; static char **sm_argv = NULL; static char *sm_params = NULL; static mca_common_sm_init_fn_t sm_init = NULL; static mca_common_sm_seg_alloc_fn_t sm_seg_alloc = NULL; static mca_common_sm_fini_fn_t sm_fini = NULL; /* should be more than enough to store all common sm component names */ static char sm_default[32]; /* holds common sm help string */ char sm_avail_help_str[OPAL_PATH_MAX]; /** * lock to protect multiple instances of query_sm_components() * from being invoked simultaneously (because of rml usage). */ static opal_mutex_t mutex; /* common shared memory component information */ typedef struct { /* flag indicating whether or not the component is available */ bool avail; /* component name */ char *sm_name; } mca_common_sm_info_t; /** * NOTE: * o array position dictates the default order in which * the common shared memory components will be queried. * o first component successfully queried gets selected. * o sm_name format: {component availability, "component name,"} * * if you change the order of sm_avail_table below, * don't forget to update mca_common_sm_comp_index_map_t. * * placing mmap before sysv in the list prevents sysv from ever being selected * (in the default case). this is because, at least for now, mmap's selection * query always succeeds. that is, sysv must be explicitly requested. * NOTE: mmap is the default for now. * * {component availability, component name} */ static const mca_common_sm_info_t sm_avail_table[] = { {true , "mmap," }, /* assume mmap is always available */ {(bool)MCA_COMMON_SM_POSIX, "posix,"}, {(bool)MCA_COMMON_SM_SYSV , "sysv," }, {false , NULL } /* MUST BE LAST ITEM */ }; /* component index enum */ typedef enum { MCA_COMMON_SM_COMP_INDEX_MMAP = 0, MCA_COMMON_SM_COMP_INDEX_POSIX, MCA_COMMON_SM_COMP_INDEX_SYSV, MCA_COMMON_SM_COMP_INDEX_NONE /* MUST BE LAST ITEM */ } mca_common_sm_comp_index_map_t; /** * list of RML messages that have arrived that have not yet been * consumed by the thread who is looking to complete its component * initialization based on the contents of the RML message. */ static opal_list_t pending_rml_msgs; /******************************************************************************/ /* STATIC UTILITY FUNCTIONS */ /******************************************************************************/ /******************************************************************************/ /** * this routine selects the common sm component that corresponds to * sm_component_index's value. * * @param sm_component_index index corresponding to the common sm component that * is to be selected. (IN) */ static void select_common_sm_component(int sm_component_index) { switch (sm_component_index) { #if MCA_COMMON_SM_POSIX case MCA_COMMON_SM_COMP_INDEX_POSIX: sm_init = mca_common_sm_posix_init; sm_seg_alloc = mca_common_sm_posix_seg_alloc; sm_fini = mca_common_sm_posix_fini; break; #endif case MCA_COMMON_SM_COMP_INDEX_MMAP: #if !MCA_COMMON_SM_WINDOWS sm_init = mca_common_sm_mmap_init; sm_seg_alloc = mca_common_sm_mmap_seg_alloc; sm_fini = mca_common_sm_mmap_fini; #else /* MCA_COMMON_SM_WINDOWS */ sm_init = mca_common_sm_windows_init; sm_seg_alloc = mca_common_sm_windows_seg_alloc; sm_fini = mca_common_sm_windows_fini; #endif break; #if MCA_COMMON_SM_SYSV case MCA_COMMON_SM_COMP_INDEX_SYSV: sm_init = mca_common_sm_sysv_init; sm_seg_alloc = mca_common_sm_sysv_seg_alloc; sm_fini = mca_common_sm_sysv_fini; break; #endif case MCA_COMMON_SM_COMP_INDEX_NONE: sm_init = NULL; sm_seg_alloc = NULL; sm_fini = NULL; break; default: sm_init = NULL; sm_seg_alloc = NULL; sm_fini = NULL; opal_output(0, "WARNING: invalid common sm component index."); break; } } /******************************************************************************/ /** * this routine performs a series of run-time tests that determines whether or * not a particular common sm component can be selected safely. once a component * is successfully selected, its component index is returned. * * @return index corresponding to the selected common sm component. see * mca_common_sm_comp_index_map_t for valid values. */ static int query_sm_components(void) { int help_msg_displayed = 0; int sm_component_index = MCA_COMMON_SM_COMP_INDEX_NONE; int i; if (NULL != sm_argv) { MCA_COMMON_SM_OUTPUT_VERBOSE("looking for available components"); for (i = 0; NULL != sm_argv[i]; ++i) { if (0 == strcasecmp(sm_argv[i], "posix")) { #if !MCA_COMMON_SM_POSIX if (!help_msg_displayed) { orte_show_help("help-mpi-common-sm.txt", "sm support", 1, sm_argv[i]); help_msg_displayed = 1; } #else /* MCA_COMMON_SM_POSIX */ MCA_COMMON_SM_OUTPUT_VERBOSE("querying posix"); /** * make sure that we can safely use posix sm on this system */ if (OMPI_SUCCESS == mca_common_sm_posix_component_query()) { MCA_COMMON_SM_OUTPUT_VERBOSE("selecting posix"); sm_component_index = MCA_COMMON_SM_COMP_INDEX_POSIX; break; } else /* let the user know that we tried posix and failed */ { MCA_COMMON_SM_OUTPUT_VERBOSE("cannot select posix"); orte_show_help("help-mpi-common-sm.txt", "sm rt test fail", 1, "Posix"); } #endif } else if (0 == strcasecmp(sm_argv[i], "mmap")) { MCA_COMMON_SM_OUTPUT_VERBOSE("selecting mmap"); /* there is no run-time test for mmap, so just select it */ sm_component_index = MCA_COMMON_SM_COMP_INDEX_MMAP; break; } else if (0 == strcasecmp(sm_argv[i], "sysv")) { #if !MCA_COMMON_SM_SYSV if (!help_msg_displayed) { orte_show_help("help-mpi-common-sm.txt", "sm support", 1, sm_argv[i]); help_msg_displayed = 1; } #else /* MCA_COMMON_SM_SYSV */ MCA_COMMON_SM_OUTPUT_VERBOSE("querying sysv"); /* SKG - disable sysv support when cr is enabled. * could presumably work properly someday. */ #if OPAL_ENABLE_FT_CR == 1 if (!opal_cr_is_enabled) { #endif /* OPAL_ENABLE_FT_CR */ /* make sure that we can safely use sysv on this system */ if (OMPI_SUCCESS == mca_common_sm_sysv_component_query()) { MCA_COMMON_SM_OUTPUT_VERBOSE("selecting sysv"); sm_component_index = MCA_COMMON_SM_COMP_INDEX_SYSV; break; } else /* let the user know that we tried sysv and failed */ { MCA_COMMON_SM_OUTPUT_VERBOSE("cannot select sysv"); orte_show_help("help-mpi-common-sm.txt", "sm rt test fail", 1, "System V"); } #if OPAL_ENABLE_FT_CR == 1 } else { orte_show_help("help-mpi-common-sm.txt", "sysv with cr", 1); help_msg_displayed = 1; } #endif /* OPAL_ENABLE_FT_CR */ #endif } else /* unknown value */ { if (!help_msg_displayed) { orte_show_help("help-mpi-common-sm.txt", "sm support", 1, sm_argv[i]); help_msg_displayed = 1; } } } } if (MCA_COMMON_SM_COMP_INDEX_NONE == sm_component_index) { MCA_COMMON_SM_OUTPUT_VERBOSE("no component selected"); } return sm_component_index; } /******************************************************************************/ int mca_common_sm_param_register(mca_base_component_t *c) { if (++num_times_registered > 1) { return OMPI_SUCCESS; } if (num_times_registered < 1) { /* This should never happen -- programmer error */ return OMPI_ERROR; } /* also using sysv_index's value as an initialization flag */ if (-1 == sysv_index) { int i; char *last_char; memset(sm_default, '\0', sizeof(sm_default)); /* populate sm_default with all available common sm component names */ for (i = 0; NULL != sm_avail_table[i].sm_name; ++i) { if (sm_avail_table[i].avail) { strncat(sm_default, sm_avail_table[i].sm_name, sizeof(sm_default) - 1); } } /* remove the last comma from the char buff */ if (NULL != (last_char = strrchr(sm_default, ','))) { *last_char = '\0'; } /* set up help string */ snprintf( sm_avail_help_str, sizeof(sm_avail_help_str) - 1, "Which shared memory support will be used. Valid values: (%s)%s", sm_default, (i > 1) ? " - or a comma delimited combination of them " "(order dependent). The first component that is successfully " "selected is used." : "." ); sysv_index = mca_base_param_reg_int_name( "mpi", "common_sm_have_sysv_support", "Whether shared memory has System V support or not", false, true, MCA_COMMON_SM_SYSV, NULL ); posix_index = mca_base_param_reg_int_name( "mpi", "common_sm_have_posix_support", "Whether shared memory has POSIX support or not", false, true, MCA_COMMON_SM_POSIX, NULL ); } /* register mpi_common_sm */ common_sm_index = mca_base_param_reg_string_name("mpi", "common_sm", sm_avail_help_str, false, false, /* default value */ sm_default, NULL); /* also register MCA param synonyms for the component */ mca_base_param_reg_syn(sysv_index, c, "have_sysv_support", false); mca_base_param_reg_syn(posix_index, c, "have_posix_support", false); mca_base_param_reg_syn(common_sm_index, c, "store", false); /* Once the synonyms are registered, look up the value */ if (OPAL_SUCCESS != mca_base_param_lookup_string(common_sm_index, &sm_params)) { return OMPI_ERROR; } /* empty string == try all available */ if (0 == strcmp(sm_params, "")) { if (NULL == (sm_argv = opal_argv_split(sm_default, ','))) { opal_output(0, "WARNING: could not parse mpi_common_sm request."); } } /* try what the user specified */ else { if (NULL == (sm_argv = opal_argv_split(sm_params, ','))) { opal_output(0, "WARNING: could not parse mpi_common_sm request."); } } free(sm_params); return OMPI_SUCCESS; } /******************************************************************************/ int mca_common_sm_param_unregister(mca_base_component_t *c) { if (--num_times_registered > 0) { return OMPI_SUCCESS; } if (num_times_registered < 0) { /* This should never happen -- programmer error */ return OMPI_ERROR; } if (NULL != sm_argv) { opal_argv_free(sm_argv); sm_argv = NULL; } return OMPI_SUCCESS; } /******************************************************************************/ mca_common_sm_module_t * mca_common_sm_init(ompi_proc_t **procs, size_t num_procs, size_t size, char *file_name, size_t size_ctl_structure, size_t data_seg_alignment) { size_t num_local_procs = 0; bool found_lowest = false; bool lowest; size_t p; ompi_proc_t *temp_proc; /** * NOTE: the selected component's init routine, unlike mca_common_sm_init, * must be provided with: * o a SORTED procs array * o the number of LOCAL processes within procs array * * so always do the following before calling sm_init: * o reorder procs array to have all the local procs at the beginning. * o look for the local proc with the lowest name. * o determine the number of local procs. * o ensure that procs[0] is the lowest named process. */ for (p = 0; p < num_procs; ++p) { if (OPAL_PROC_ON_LOCAL_NODE(procs[p]->proc_flags)) { /* if we don't have a lowest, save the first one */ if (!found_lowest) { procs[0] = procs[p]; found_lowest = true; } else { /* save this proc */ procs[num_local_procs] = procs[p]; /** * if we have a new lowest, swap it with position 0 * so that procs[0] is always the lowest named proc */ if (orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &(procs[p]->proc_name), &(procs[0]->proc_name)) < 0) { temp_proc = procs[0]; procs[0] = procs[p]; procs[num_local_procs] = temp_proc; } } /** * regardless of the comparisons above, we found * another proc on the local node, so increment */ ++num_local_procs; } } /* if there is less than 2 local processes, there's nothing to do. */ if (num_local_procs < 2) { return NULL; } if (!initialized) { mca_common_sm_rml_sm_info_t sm_info; sm_info.id = MCA_COMMON_SM_COMP_INDEX_NONE; memset(sm_info.posix_fname_buff, '\0', OMPI_COMMON_SM_POSIX_FILE_LEN_MAX); lowest = (0 == orte_util_compare_name_fields( ORTE_NS_CMP_ALL, ORTE_PROC_MY_NAME, &(procs[0]->proc_name))); /** * lock here to prevent multiple threads from invoking this function * simultaneously. the critical section we're protecting is usage of * the RML in this block. */ opal_mutex_lock(&mutex); OBJ_CONSTRUCT(&(pending_rml_msgs), opal_list_t); /** * figure out if i am the lowest proc in the group. * if i am, select a common sm component and send its index to the rest * of the local procs so they can select the same common sm component. */ if (lowest) { /* get the component index */ sm_info.id = query_sm_components(); } /* no return code check here because the error * path is the same as the expected path */ mca_common_sm_rml_info_bcast(&sm_info, procs, num_local_procs, OMPI_RML_TAG_COMMON_SM_COMP_INDEX, lowest, file_name, &(pending_rml_msgs)); opal_mutex_unlock(&mutex); select_common_sm_component(sm_info.id); initialized = true; } if (NULL != sm_init) { /* notice that we are passing a SORTED procs array to the selected * component along with the number of LOCAL processes found within * procs. */ return sm_init(procs, num_local_procs, size, file_name, size_ctl_structure, data_seg_alignment); } return NULL; } /******************************************************************************/ /** * This routine is the same as mca_common_sm_mmap_init() except that * it takes an (ompi_group_t *) parameter to specify the peers rather * than an array of procs. Unlike mca_common_sm_mmap_init(), the * group must contain *only* local peers, or this function will return * NULL and not create any shared memory segment. */ mca_common_sm_module_t * mca_common_sm_init_group(ompi_group_t *group, size_t size, char *file_name, size_t size_ctl_structure, size_t data_seg_alignment) { mca_common_sm_module_t *ret = NULL; ompi_proc_t **procs = NULL; /* make sure sm_init has been properly initialized. do this because * sm_init_group only does prep work before passing along the real work to * sm_init. */ if (NULL != sm_init) { size_t i; size_t group_size; ompi_proc_t *proc; /* if there is less than 2 procs, there's nothing to do */ if ((group_size = ompi_group_size(group)) < 2) { goto out; } if (NULL == (procs = (ompi_proc_t **) malloc(sizeof(ompi_proc_t *) * group_size))) { ORTE_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE); goto out; } /* make sure that all the procs in the group are local */ for (i = 0; i < group_size; ++i) { proc = ompi_group_peer_lookup(group, i); if (!OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) { goto out; } procs[i] = proc; } /* let sm_init take care of the rest ... */ ret = sm_init(procs, group_size, size, file_name, size_ctl_structure, data_seg_alignment); } out: if (NULL != procs) { free(procs); } return ret; } /******************************************************************************/ void * mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool, size_t *size, mca_mpool_base_registration_t **registration) { if (NULL != sm_seg_alloc) { return sm_seg_alloc(mpool, size, registration); } return NULL; } /******************************************************************************/ int mca_common_sm_fini(mca_common_sm_module_t *mca_common_sm_module) { if (NULL != sm_fini && NULL != mca_common_sm_module) { return sm_fini(mca_common_sm_module); } return OMPI_ERR_NOT_FOUND; }