From 6188d97e1a7a8bb9937585dd9600e16a306158df Mon Sep 17 00:00:00 2001 From: Samuel Gutierrez <samuel@lanl.gov> Date: Fri, 10 Aug 2012 22:20:38 +0000 Subject: [PATCH] Getting out of bed this morning was a bad idea... Reverting the sm update once more because it breaks direct launch. Will address this issue and commit the update once it has all been tested. Sorry everyone! This commit was SVN r27001. --- ompi/mca/btl/sm/btl_sm.c | 256 +++++-------- ompi/mca/btl/sm/btl_sm.h | 14 +- ompi/mca/btl/sm/btl_sm_component.c | 351 ++---------------- ompi/mca/btl/sm/help-mpi-btl-sm.txt | 6 - ompi/mca/common/sm/common_sm.c | 94 ++--- ompi/mca/common/sm/common_sm.h | 45 +-- ompi/mca/mpool/sm/mpool_sm.h | 17 +- ompi/mca/mpool/sm/mpool_sm_component.c | 111 +++--- opal/mca/shmem/mmap/shmem_mmap_module.c | 9 +- opal/mca/shmem/posix/shmem_posix_module.c | 9 +- opal/mca/shmem/sysv/shmem_sysv_module.c | 9 +- opal/mca/shmem/windows/shmem_windows_module.c | 9 +- 12 files changed, 248 insertions(+), 682 deletions(-) diff --git a/ompi/mca/btl/sm/btl_sm.c b/ompi/mca/btl/sm/btl_sm.c index b3bf68edd2..21c55e1e82 100644 --- a/ompi/mca/btl/sm/btl_sm.c +++ b/ompi/mca/btl/sm/btl_sm.c @@ -43,12 +43,9 @@ #include "opal/util/output.h" #include "opal/util/printf.h" #include "opal/mca/hwloc/base/base.h" -#include "opal/mca/shmem/base/base.h" -#include "opal/mca/shmem/shmem.h" #include "orte/util/proc_info.h" #include "opal/datatype/opal_convertor.h" #include "ompi/class/ompi_free_list.h" -#include "ompi/runtime/ompi_module_exchange.h" #include "ompi/mca/btl/btl.h" #include "ompi/mca/mpool/base/base.h" #include "ompi/mca/mpool/sm/mpool_sm.h" @@ -114,6 +111,7 @@ mca_btl_sm_t mca_btl_sm = { */ #define OFFSET2ADDR(OFFSET, BASE) ((ptrdiff_t)(OFFSET) + (char*)(BASE)) + static void *mpool_calloc(size_t nmemb, size_t size) { void *buf; @@ -129,83 +127,17 @@ static void *mpool_calloc(size_t nmemb, size_t size) return buf; } -/* - * Returns a pointer to node rank zero. Returns NULL on error. - */ -static ompi_proc_t * -get_node_rank_zero_proc_ptr(ompi_proc_t **proc_world, - size_t proc_world_size) + +static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n) { - size_t num_local_procs = 0; - - if (NULL == proc_world) { - return NULL; - } - /* sort the procs list and get a pointer to the lowest node rank */ - if (OMPI_SUCCESS != mca_common_sm_local_proc_reorder(proc_world, - proc_world_size, - &num_local_procs)) { - opal_output(0, "mca_common_sm_local_proc_reorder failure! " - "Cannot continue.\n"); - return NULL; - } - - return proc_world[0]; -} - -/* - * Modex receive. Caller is responsible for freeing returned resources. - */ -static inline int -recv_modex(mca_btl_sm_component_t *comp_ptr, - mca_btl_sm_modex_t **out_modex) -{ - ompi_proc_t **proc_world = NULL; - ompi_proc_t *proc_node_rank_zero = NULL; - size_t proc_world_size = 0; - size_t modex_size = 0; - int rc = OMPI_SUCCESS; - - if (NULL == (proc_world = ompi_proc_world(&proc_world_size))) { - opal_output(0, "ompi_proc_world failure! Cannot continue.\n"); - rc = OMPI_ERROR; - goto out; - } - if (NULL == (proc_node_rank_zero = - get_node_rank_zero_proc_ptr(proc_world, proc_world_size))) { - opal_output(0, "get_node_rank_zero_proc_ptr failure! " - "Cannot continue.\n"); - rc = OMPI_ERROR; - goto out; - } - if (OMPI_SUCCESS != (rc = - ompi_modex_recv(&comp_ptr->super.btl_version, - proc_node_rank_zero, - (void **)out_modex, - &modex_size))) { - opal_output(0, "recv_modex: ompi_modex_recv failure!\n"); - /* rc is set */ - goto out; - } - -out: - if (NULL != proc_world) { - free(proc_world); - } - return rc; -} - -static int -sm_btl_first_time_init(mca_btl_sm_t *sm_btl, - int32_t my_smp_rank, - int n) -{ - size_t length, length_payload; + size_t size, length, length_payload; + char *sm_ctl_file; sm_fifo_t *my_fifos; - int my_mem_node, num_mem_nodes, i, rc; - mca_mpool_base_resources_t *res = NULL; + int my_mem_node, num_mem_nodes, i; + ompi_proc_t **procs; + size_t num_procs; + mca_mpool_base_resources_t res; mca_btl_sm_component_t* m = &mca_btl_sm_component; - mca_btl_sm_modex_t *modex = NULL; /* Assume we don't have hwloc support and fill in dummy info */ mca_btl_sm_component.mem_node = my_mem_node = 0; @@ -258,42 +190,50 @@ sm_btl_first_time_init(mca_btl_sm_t *sm_btl, } #endif - if (NULL == (res = calloc(1, sizeof(*res)))) { + /* lookup shared memory pool */ + mca_btl_sm_component.sm_mpools = (mca_mpool_base_module_t **) calloc(num_mem_nodes, + sizeof(mca_mpool_base_module_t*)); + + /* Disable memory binding, because each MPI process will claim + pages in the mpool for their local NUMA node */ + res.mem_node = -1; + + /* determine how much memory to create */ + /* + * This heuristic formula mostly says that we request memory for: + * - nfifos FIFOs, each comprising: + * . a sm_fifo_t structure + * . many pointers (fifo_size of them per FIFO) + * - eager fragments (2*n of them, allocated in sm_free_list_inc chunks) + * - max fragments (sm_free_list_num of them) + * + * On top of all that, we sprinkle in some number of + * "opal_cache_line_size" additions to account for some + * padding and edge effects that may lie in the allocator. + */ + res.size = + FIFO_MAP_NUM(n) * ( sizeof(sm_fifo_t) + sizeof(void *) * m->fifo_size + 4 * opal_cache_line_size ) + + ( 2 * n + m->sm_free_list_inc ) * ( m->eager_limit + 2 * opal_cache_line_size ) + + m->sm_free_list_num * ( m->max_frag_size + 2 * opal_cache_line_size ); + + /* before we multiply by n, make sure the result won't overflow */ + /* Stick that little pad in, particularly since we'll eventually + * need a little extra space. E.g., in mca_mpool_sm_init() in + * mpool_sm_component.c when sizeof(mca_common_sm_module_t) is + * added. + */ + if ( ((double) res.size) * n > LONG_MAX - 4096 ) { return OMPI_ERR_OUT_OF_RESOURCE; } - /* everyone receive modex information. all but node rank zero attach to the - * segments stored within the modex. remember: node rank zero is already - * attached to sm_seg. */ - if (OMPI_SUCCESS != (rc = recv_modex(m, &modex))) { - free(res); - return rc; - } - /* lookup shared memory pool */ - mca_btl_sm_component.sm_mpools = - (mca_mpool_base_module_t **)calloc(num_mem_nodes, - sizeof(mca_mpool_base_module_t *)); - - /* Disable memory binding, because each MPI process will claim pages in the - * mpool for their local NUMA node */ - res->mem_node = -1; - res->size = modex->mpool_res_size; - - if (OPAL_SUCCESS != - opal_shmem_ds_copy(&(modex->sm_mpool_meta_buf), - &(res->bs_meta_buf))) { - free(res); - free(modex); - return OMPI_ERROR; - } - + res.size *= n; + + /* now, create it */ mca_btl_sm_component.sm_mpools[0] = mca_mpool_base_module_create(mca_btl_sm_component.sm_mpool_name, - sm_btl, res); + sm_btl, &res); /* Sanity check to ensure that we found it */ if (NULL == mca_btl_sm_component.sm_mpools[0]) { - free(res); - free(modex); - return OMPI_ERR_OUT_OF_RESOURCE; + return OMPI_ERR_OUT_OF_RESOURCE; } mca_btl_sm_component.sm_mpool = mca_btl_sm_component.sm_mpools[0]; @@ -305,27 +245,37 @@ sm_btl_first_time_init(mca_btl_sm_t *sm_btl, mca_btl_sm_component.sm_peers = (struct mca_btl_base_endpoint_t**) calloc(n, sizeof(struct mca_btl_base_endpoint_t*)); if (NULL == mca_btl_sm_component.sm_peers) { - free(res); - free(modex); return OMPI_ERR_OUT_OF_RESOURCE; } - if (0 != my_smp_rank) { - if (NULL == (mca_btl_sm_component.sm_seg = - mca_common_sm_module_attach(&modex->sm_meta_buf, - sizeof(mca_common_sm_seg_header_t), - opal_cache_line_size))) { - /* don't have to detach here, because module_attach cleans up after - * itself on failure. */ - opal_output(0, "sm_btl_first_time_init: " - "mca_common_sm_module_attach failure!\n"); - free(modex); - free(res); - return OMPI_ERROR; - } + + /* Allocate Shared Memory BTL process coordination + * data structure. This will reside in shared memory */ + + /* set file name */ + if (asprintf(&sm_ctl_file, "%s"OPAL_PATH_SEP"shared_mem_btl_module.%s", + orte_process_info.job_session_dir, + orte_process_info.nodename) < 0) { + return OMPI_ERR_OUT_OF_RESOURCE; } - /* it is now safe to free the modex and the mpool resources */ - free(modex); - free(res); + + /* Pass in a data segment alignment of 0 to get no data + segment (only the shared control structure) */ + size = sizeof(mca_common_sm_seg_header_t) + + n * (sizeof(sm_fifo_t*) + sizeof(char *) + sizeof(uint16_t)) + opal_cache_line_size; + procs = ompi_proc_world(&num_procs); + if (!(mca_btl_sm_component.sm_seg = + mca_common_sm_init(procs, num_procs, size, sm_ctl_file, + sizeof(mca_common_sm_seg_header_t), + opal_cache_line_size))) { + opal_output(0, "mca_btl_sm_add_procs: unable to create shared memory " + "BTL coordinating strucure :: size %lu \n", + (unsigned long)size); + free(procs); + free(sm_ctl_file); + return OMPI_ERROR; + } + free(procs); + free(sm_ctl_file); /* check to make sure number of local procs is within the * specified limits */ @@ -424,7 +374,6 @@ static struct mca_btl_base_endpoint_t * create_sm_endpoint(int local_proc, struct ompi_proc_t *proc) { struct mca_btl_base_endpoint_t *ep; - #if OMPI_ENABLE_PROGRESS_THREADS == 1 char path[PATH_MAX]; #endif @@ -452,6 +401,22 @@ create_sm_endpoint(int local_proc, struct ompi_proc_t *proc) return ep; } +static void calc_sm_max_procs(int n) +{ + /* see if need to allocate space for extra procs */ + if(0 > mca_btl_sm_component.sm_max_procs) { + /* no limit */ + if(0 <= mca_btl_sm_component.sm_extra_procs) { + /* limit */ + mca_btl_sm_component.sm_max_procs = + n + mca_btl_sm_component.sm_extra_procs; + } else { + /* no limit */ + mca_btl_sm_component.sm_max_procs = 2 * n; + } + } +} + int mca_btl_sm_add_procs( struct mca_btl_base_module_t* btl, size_t nprocs, @@ -465,9 +430,6 @@ int mca_btl_sm_add_procs( mca_btl_sm_t *sm_btl; bool have_connected_peer = false; char **bases; - /* for easy access to the mpool_sm_module */ - mca_mpool_sm_module_t *sm_mpool_modp = NULL; - /* initializion */ sm_btl = (mca_btl_sm_t *)btl; @@ -480,7 +442,7 @@ int mca_btl_sm_add_procs( * and idetify procs that are on this host. Add procs on this * host to shared memory reachbility list. Also, get number * of local procs in the procs list. */ - for (proc = 0; proc < (int32_t)nprocs; proc++) { + for(proc = 0; proc < (int32_t)nprocs; proc++) { /* check to see if this proc can be reached via shmem (i.e., if they're on my local host and in my job) */ if (procs[proc]->proc_name.jobid != my_proc->proc_name.jobid || @@ -515,18 +477,18 @@ int mca_btl_sm_add_procs( goto CLEANUP; /* make sure that my_smp_rank has been defined */ - if (-1 == my_smp_rank) { + if(-1 == my_smp_rank) { return_code = OMPI_ERROR; goto CLEANUP; } + calc_sm_max_procs(n_local_procs); + if (!sm_btl->btl_inited) { return_code = - sm_btl_first_time_init(sm_btl, my_smp_rank, - mca_btl_sm_component.sm_max_procs); - if (return_code != OMPI_SUCCESS) { + sm_btl_first_time_init(sm_btl, mca_btl_sm_component.sm_max_procs); + if(return_code != OMPI_SUCCESS) goto CLEANUP; - } } /* set local proc's smp rank in the peers structure for @@ -539,7 +501,6 @@ int mca_btl_sm_add_procs( } bases = mca_btl_sm_component.shm_bases; - sm_mpool_modp = (mca_mpool_sm_module_t *)mca_btl_sm_component.sm_mpool; /* initialize own FIFOs */ /* @@ -563,34 +524,13 @@ int mca_btl_sm_add_procs( /* Sync with other local procs. Force the FIFO initialization to always * happens before the readers access it. */ - opal_atomic_add_32(&mca_btl_sm_component.sm_seg->module_seg->seg_inited, 1); + opal_atomic_add_32( &mca_btl_sm_component.sm_seg->module_seg->seg_inited, 1); while( n_local_procs > mca_btl_sm_component.sm_seg->module_seg->seg_inited) { opal_progress(); opal_atomic_rmb(); } - /* it is now safe to unlink the shared memory segment. only one process - * needs to do this, so just let smp rank zero take care of it. */ - if (0 == my_smp_rank) { - if (OMPI_SUCCESS != - mca_common_sm_module_unlink(mca_btl_sm_component.sm_seg)) { - /* it is "okay" if this fails at this point. we have gone this far, - * so just warn about the failure and continue. this is probably - * only triggered by a programming error. */ - opal_output(0, "WARNING: common_sm_module_unlink failed.\n"); - } - /* SKG - another abstraction violation here, but I don't want to add - * extra code in the sm mpool for further synchronization. */ - - /* at this point, all processes have attached to the mpool segment. so - * it is safe to unlink it here. */ - if (OMPI_SUCCESS != - mca_common_sm_module_unlink(sm_mpool_modp->sm_common_module)) { - opal_output(0, "WARNING: common_sm_module_unlink failed.\n"); - } - } - /* coordinate with other processes */ for(j = mca_btl_sm_component.num_smp_procs; j < mca_btl_sm_component.num_smp_procs + n_local_procs; j++) { diff --git a/ompi/mca/btl/sm/btl_sm.h b/ompi/mca/btl/sm/btl_sm.h index 1d37ccca36..2a02f543b4 100644 --- a/ompi/mca/btl/sm/btl_sm.h +++ b/ompi/mca/btl/sm/btl_sm.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2012 Los Alamos National Security, LLC. + * Copyright (c) 2010 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2012 IBM Corporation. All rights reserved. * $COPYRIGHT$ @@ -42,8 +42,6 @@ #include "opal/util/bit_ops.h" #include "opal/class/opal_free_list.h" -#include "opal/mca/shmem/shmem.h" - #include "ompi/mca/btl/btl.h" #include "ompi/mca/common/sm/common_sm.h" @@ -123,16 +121,6 @@ typedef struct mca_btl_sm_mem_node_t { mca_mpool_base_module_t* sm_mpool; /**< shared memory pool */ } mca_btl_sm_mem_node_t; -/** - * Shared Memory (SM) BTL modex. - */ -struct mca_btl_sm_modex_t { - opal_shmem_ds_t sm_meta_buf; - opal_shmem_ds_t sm_mpool_meta_buf; - size_t mpool_res_size; -}; -typedef struct mca_btl_sm_modex_t mca_btl_sm_modex_t; - /** * Shared Memory (SM) BTL module. */ diff --git a/ompi/mca/btl/sm/btl_sm_component.c b/ompi/mca/btl/sm/btl_sm_component.c index 42d1d8955b..bab8487dfe 100644 --- a/ompi/mca/btl/sm/btl_sm_component.c +++ b/ompi/mca/btl/sm/btl_sm_component.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2012 Los Alamos National Security, LLC. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. * Copyright (c) 2010-2012 IBM Corporation. All rights reserved. @@ -42,20 +42,15 @@ #include <sys/stat.h> /* for mkfifo */ #endif /* HAVE_SYS_STAT_H */ +#include "ompi/constants.h" #include "opal/mca/event/event.h" -#include "opal/mca/base/mca_base_param.h" -#include "opal/mca/shmem/base/base.h" -#include "opal/mca/shmem/shmem.h" #include "opal/util/bit_ops.h" #include "opal/util/output.h" - #include "orte/util/proc_info.h" #include "orte/util/show_help.h" #include "orte/runtime/orte_globals.h" -#include "orte/util/proc_info.h" -#include "ompi/constants.h" -#include "ompi/runtime/ompi_module_exchange.h" +#include "opal/mca/base/mca_base_param.h" #include "ompi/mca/mpool/base/base.h" #include "ompi/mca/common/sm/common_sm.h" #include "ompi/mca/btl/base/btl_base_error.h" @@ -356,354 +351,52 @@ CLEANUP: return return_value; } -/* - * Returns the number of processes on the node. - */ -static inline int -get_num_local_procs(void) -{ - /* num_local_peers does not include us in - * its calculation, so adjust for that */ - return (int)(1 + orte_process_info.num_local_peers); -} - -static void -calc_sm_max_procs(int n) -{ - /* see if need to allocate space for extra procs */ - if (0 > mca_btl_sm_component.sm_max_procs) { - /* no limit */ - if (0 <= mca_btl_sm_component.sm_extra_procs) { - /* limit */ - mca_btl_sm_component.sm_max_procs = - n + mca_btl_sm_component.sm_extra_procs; - } else { - /* no limit */ - mca_btl_sm_component.sm_max_procs = 2 * n; - } - } -} - -static int -create_and_attach(mca_btl_sm_component_t *comp_ptr, - size_t size, - char *file_name, - size_t size_ctl_structure, - size_t data_seg_alignment, - mca_common_sm_module_t **out_modp) - -{ - if (NULL == (*out_modp = - mca_common_sm_module_create_and_attach(size, file_name, - size_ctl_structure, - data_seg_alignment))) { - opal_output(0, "create_and_attach: unable to create shared memory " - "BTL coordinating strucure :: size %lu \n", - (unsigned long)size); - return OMPI_ERROR; - } - return OMPI_SUCCESS; -} - -/* - * SKG - I'm not happy with this, but I can't figure out a better way of - * finding the sm mpool's minimum size 8-|. The way I see it. This BTL only - * uses the sm mpool, so maybe this isn't so bad... - * - * The problem is the we need to size the mpool resources at sm BTL component - * init. That means we need to know the mpool's minimum size at create. - */ -static int -get_min_mpool_size(mca_btl_sm_component_t *comp_ptr, - size_t *out_size) -{ - char *type_name = "mpool"; - char *param_name = "min_size"; - char *min_size = NULL; - int id = 0; - size_t default_min = 67108864; - size_t size = 0; - long tmp_size = 0; - - if (0 > (id = mca_base_param_find(type_name, comp_ptr->sm_mpool_name, - param_name))) { - opal_output(0, "mca_base_param_find: failure looking for %s_%s_%s\n", - type_name, comp_ptr->sm_mpool_name, param_name); - return OMPI_ERR_NOT_FOUND; - } - if (OPAL_ERROR == mca_base_param_lookup_string(id, &min_size)) { - opal_output(0, "mca_base_param_lookup_string failure\n"); - return OMPI_ERROR; - } - errno = 0; - tmp_size = strtol(min_size, (char **)NULL, 10); - if (ERANGE == errno || EINVAL == errno || tmp_size <= 0) { - opal_output(0, "mca_btl_sm::get_min_mpool_size: " - "Unusable %s_%s_min_size provided. " - "Continuing with %lu.", type_name, - comp_ptr->sm_mpool_name, - (unsigned long)default_min); - - size = default_min; - } - else { - size = (size_t)tmp_size; - } - free(min_size); - *out_size = size; - return OMPI_SUCCESS; -} - -static int -get_mpool_res_size(int32_t max_procs, - size_t *out_res_size) -{ - size_t size = 0; - /* determine how much memory to create */ - /* - * This heuristic formula mostly says that we request memory for: - * - nfifos FIFOs, each comprising: - * . a sm_fifo_t structure - * . many pointers (fifo_size of them per FIFO) - * - eager fragments (2*n of them, allocated in sm_free_list_inc chunks) - * - max fragments (sm_free_list_num of them) - * - * On top of all that, we sprinkle in some number of - * "opal_cache_line_size" additions to account for some - * padding and edge effects that may lie in the allocator. - */ - size = FIFO_MAP_NUM(max_procs) * - (sizeof(sm_fifo_t) + sizeof(void *) * - mca_btl_sm_component.fifo_size + 4 * opal_cache_line_size) + - (2 * max_procs + mca_btl_sm_component.sm_free_list_inc) * - (mca_btl_sm_component.eager_limit + 2 * opal_cache_line_size) + - mca_btl_sm_component.sm_free_list_num * - (mca_btl_sm_component.max_frag_size + 2 * opal_cache_line_size); - - /* add something for the control structure */ - size += sizeof(mca_common_sm_module_t); - - /* before we multiply by max_procs, make sure the result won't overflow */ - /* Stick that little pad in, particularly since we'll eventually - * need a little extra space. E.g., in mca_mpool_sm_init() in - * mpool_sm_component.c when sizeof(mca_common_sm_module_t) is - * added. - */ - if (((double)size) * max_procs > LONG_MAX - 4096) { - return OMPI_ERR_VALUE_OUT_OF_BOUNDS; - } - size *= (size_t)max_procs; - *out_res_size = size; - return OMPI_SUCCESS; -} - -/* - * Creates the shared-memory segments required for this BTL. One for the sm - * mpool and another for the shared memory store and populates *modex_buf_ptr. - * - * it is assumed that calc_sm_max_procs has already been called (sets - * sm_max_procs). - */ -static int -populate_modex_bufp(mca_btl_sm_component_t *comp_ptr, - mca_btl_sm_modex_t *modex_buf_ptr) -{ - int rc = OMPI_SUCCESS; - size_t size = 0; - size_t min_size = 0; - char *sm_mpool_ctl_file = NULL; - char *sm_ctl_file = NULL; - /* used as a temporary store so we can extract shmem_ds info */ - mca_common_sm_module_t *tmp_modp = NULL; - - /* first generate some unique paths for the shared-memory segments that - * this BTL needs. */ - if (asprintf(&sm_mpool_ctl_file, - "%s"OPAL_PATH_SEP"shared_mem_pool.%s", - orte_process_info.job_session_dir, - orte_process_info.nodename) < 0) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto out; - } - if (asprintf(&sm_ctl_file, - "%s"OPAL_PATH_SEP"shared_mem_btl_module.%s", - orte_process_info.job_session_dir, - orte_process_info.nodename) < 0) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto out; - } - - /* create the things */ - - /* === sm mpool == */ - /* get the segment size for the sm mpool. */ - if (OMPI_SUCCESS != (rc = get_mpool_res_size(comp_ptr->sm_max_procs, - &size))) { - /* rc is already set */ - goto out; - } - /* do we need to update the size based on the sm mpool's min size? */ - if (OMPI_SUCCESS != (rc = get_min_mpool_size(comp_ptr, &min_size))) { - goto out; - } - if (size < min_size) { - size = min_size; - } - /* we only need the shmem_ds info at this point. initilization will be - * completed in the mpool module code. the idea is that we just need this - * info so we can populate the modex. */ - if (OMPI_SUCCESS != (rc = - create_and_attach(comp_ptr, size, sm_mpool_ctl_file, - sizeof(mca_common_sm_module_t), 8, &tmp_modp))) { - /* rc is set */ - goto out; - } - /* now extract and store the shmem_ds info from the returned module */ - if (OPAL_SUCCESS != - opal_shmem_ds_copy(&(tmp_modp->shmem_ds), - &(modex_buf_ptr->sm_mpool_meta_buf))) { - rc = OMPI_ERROR; - goto out; - } - /* set the mpool_res_size in the modex */ - modex_buf_ptr->mpool_res_size = size; - - /* === sm btl == */ - /* calculate the segment size. */ - size = sizeof(mca_common_sm_seg_header_t) + - comp_ptr->sm_max_procs * - (sizeof(sm_fifo_t *) + - sizeof(char *) + sizeof(uint16_t)) + - opal_cache_line_size; - - if (OMPI_SUCCESS != (rc = - create_and_attach(comp_ptr, size, sm_ctl_file, - sizeof(mca_common_sm_seg_header_t), - opal_cache_line_size, &comp_ptr->sm_seg))) { - /* rc is set */ - goto out; - } - /* now extract and store the shmem_ds info from the returned module */ - if (OPAL_SUCCESS != opal_shmem_ds_copy(&(comp_ptr->sm_seg->shmem_ds), - &(modex_buf_ptr->sm_meta_buf))) { - rc = OMPI_ERROR; - goto out; - } - -out: - if (NULL != sm_mpool_ctl_file) { - free(sm_mpool_ctl_file); - } - if (NULL != sm_ctl_file) { - free(sm_ctl_file); - } - return rc; -} - -/* - * Creates information required for the sm modex and modex sends it. - */ -static int -send_modex(mca_btl_sm_component_t *comp_ptr) -{ - int rc = OMPI_SUCCESS; - mca_btl_sm_modex_t *sm_modex = NULL; - - if (NULL == (sm_modex = calloc(1, sizeof(*sm_modex)))) { - /* out of resources, so just bail. */ - return OMPI_ERR_OUT_OF_RESOURCE; - } - if (OMPI_SUCCESS != (rc = populate_modex_bufp(comp_ptr, sm_modex))) { - opal_output(0, "send_modex: populate_modex_bufp failure!\n"); - /* rc is set */ - goto out; - } - /* send the modex */ - rc = ompi_modex_send(&comp_ptr->super.btl_version, sm_modex, - sizeof(*sm_modex)); - -out: - if (NULL != sm_modex) { - free(sm_modex); - } - return rc; -} - /* * SM component initialization */ -static mca_btl_base_module_t ** -mca_btl_sm_component_init(int *num_btls, - bool enable_progress_threads, - bool enable_mpi_threads) +static mca_btl_base_module_t** mca_btl_sm_component_init( + int *num_btls, + bool enable_progress_threads, + bool enable_mpi_threads) { - int num_local_procs = 0; mca_btl_base_module_t **btls = NULL; - orte_node_rank_t my_node_rank = ORTE_NODE_RANK_INVALID; #if OMPI_BTL_SM_HAVE_KNEM int rc; #endif *num_btls = 0; + + /* if no session directory was created, then we cannot be used */ + if (!orte_create_session_dirs) { + return NULL; + } + /* lookup/create shared memory pool only when used */ mca_btl_sm_component.sm_mpool = NULL; mca_btl_sm_component.sm_mpool_base = NULL; - /* if no session directory was created, then we cannot be used */ - /* SKG - this isn't true anymore. Some backing facilities don't require a - * file-backed store. Extend shmem to provide this info one day. */ - if (!orte_create_session_dirs) { - return NULL; - } - /* if we don't have locality information, then we cannot be used */ - if (ORTE_NODE_RANK_INVALID == - (my_node_rank = orte_process_info.my_node_rank)) { - orte_show_help("help-mpi-btl-sm.txt", "no locality", true); - return NULL; - } - /* no use trying to use sm with less than two procs, so just bail. */ - if ((num_local_procs = get_num_local_procs()) < 2) { - return NULL; - } - /* calculate max procs so we can figure out how large to make the - * shared-memory segment. this routine sets component sm_max_procs. */ - calc_sm_max_procs(num_local_procs); - /* let local rank 0 create the shared-memory segments and send shmem info */ - if (0 == my_node_rank) { - if (OMPI_SUCCESS != send_modex(&mca_btl_sm_component)) { - return NULL; - } - } - #if OMPI_ENABLE_PROGRESS_THREADS == 1 /* create a named pipe to receive events */ - sprintf(mca_btl_sm_component.sm_fifo_path, - "%s"OPAL_PATH_SEP"sm_fifo.%lu", - orte_process_info.job_session_dir, - (unsigned long)ORTE_PROC_MY_NAME->vpid); - if (mkfifo(mca_btl_sm_component.sm_fifo_path, 0660) < 0) { - opal_output(0, "mca_btl_sm_component_init: " - "mkfifo failed with errno=%d\n",errno); + sprintf( mca_btl_sm_component.sm_fifo_path, + "%s"OPAL_PATH_SEP"sm_fifo.%lu", orte_process_info.job_session_dir, + (unsigned long)ORTE_PROC_MY_NAME->vpid ); + if(mkfifo(mca_btl_sm_component.sm_fifo_path, 0660) < 0) { + opal_output(0, "mca_btl_sm_component_init: mkfifo failed with errno=%d\n",errno); return NULL; } - mca_btl_sm_component.sm_fifo_fd = open(mca_btl_sm_component.sm_fifo_path, - O_RDWR); + mca_btl_sm_component.sm_fifo_fd = open(mca_btl_sm_component.sm_fifo_path, O_RDWR); if(mca_btl_sm_component.sm_fifo_fd < 0) { - opal_output(0, "mca_btl_sm_component_init: " - "open(%s) failed with errno=%d\n", + opal_output(0, "mca_btl_sm_component_init: open(%s) failed with errno=%d\n", mca_btl_sm_component.sm_fifo_path, errno); return NULL; } OBJ_CONSTRUCT(&mca_btl_sm_component.sm_fifo_thread, opal_thread_t); - mca_btl_sm_component.sm_fifo_thread.t_run = - (opal_thread_fn_t)mca_btl_sm_component_event_thread; + mca_btl_sm_component.sm_fifo_thread.t_run = (opal_thread_fn_t) mca_btl_sm_component_event_thread; opal_thread_start(&mca_btl_sm_component.sm_fifo_thread); #endif - mca_btl_sm_component.sm_btls = - (mca_btl_sm_t **)malloc(mca_btl_sm_component.sm_max_btls * - sizeof(mca_btl_sm_t *)); + mca_btl_sm_component.sm_btls = (mca_btl_sm_t **) malloc( mca_btl_sm_component.sm_max_btls * sizeof (mca_btl_sm_t *)); if (NULL == mca_btl_sm_component.sm_btls) { return NULL; } diff --git a/ompi/mca/btl/sm/help-mpi-btl-sm.txt b/ompi/mca/btl/sm/help-mpi-btl-sm.txt index 9d868e8445..b6905097d6 100644 --- a/ompi/mca/btl/sm/help-mpi-btl-sm.txt +++ b/ompi/mca/btl/sm/help-mpi-btl-sm.txt @@ -4,8 +4,6 @@ # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2012 Los Alamos National Security, LLC. -# All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -14,10 +12,6 @@ # # This is the US/English help file for Open MPI's shared memory support. # -[no locality] -WARNING: Missing locality information required for sm initialization. -Continuing without shared memory support. -# [knem requested but not supported] WARNING: Linux kernel knem support was requested for the shared memory (sm) BTL, but it is not supported. Deactivating the shared memory diff --git a/ompi/mca/common/sm/common_sm.c b/ompi/mca/common/sm/common_sm.c index 74dfdead43..889d9c3faa 100644 --- a/ompi/mca/common/sm/common_sm.c +++ b/ompi/mca/common/sm/common_sm.c @@ -42,7 +42,6 @@ #include "opal/align.h" #include "opal/util/argv.h" -#include "opal/mca/shmem/shmem.h" #if OPAL_ENABLE_FT_CR == 1 #include "opal/runtime/opal_cr.h" #endif @@ -134,7 +133,7 @@ attach_and_init(opal_shmem_ds_t *shmem_bufp, map->module_data_addr = addr; map->module_seg_addr = (unsigned char *)seg; - + /* note that size is only used during the first call */ if (first_call) { /* initialize some segment information */ @@ -158,20 +157,20 @@ attach_and_init(opal_shmem_ds_t *shmem_bufp, } /* ////////////////////////////////////////////////////////////////////////// */ -/* api implementation */ +/* api implementation */ /* ////////////////////////////////////////////////////////////////////////// */ /* ////////////////////////////////////////////////////////////////////////// */ mca_common_sm_module_t * -mca_common_sm_module_create_and_attach(size_t size, - char *file_name, - size_t size_ctl_structure, - size_t data_seg_alignment) +mca_common_sm_module_create(size_t size, + char *file_name, + size_t size_ctl_structure, + size_t data_seg_alignment) { mca_common_sm_module_t *map = NULL; opal_shmem_ds_t *seg_meta = NULL; - if (NULL == (seg_meta = (opal_shmem_ds_t *)malloc(sizeof(*seg_meta)))) { + if (NULL == (seg_meta = (opal_shmem_ds_t *) malloc(sizeof(*seg_meta)))) { /* out of resources */ return NULL; } @@ -198,39 +197,33 @@ mca_common_sm_module_attach(opal_shmem_ds_t *seg_meta, size_t size_ctl_structure, size_t data_seg_alignment) { + mca_common_sm_module_t *map = NULL; + /* notice that size is 0 here. it really doesn't matter because size WILL * NOT be used because this is an attach (first_call is false). */ - return attach_and_init(seg_meta, 0, size_ctl_structure, - data_seg_alignment, false); + map = attach_and_init(seg_meta, 0, size_ctl_structure, + data_seg_alignment, false); + + return map; } /* ////////////////////////////////////////////////////////////////////////// */ -int -mca_common_sm_module_unlink(mca_common_sm_module_t *modp) +mca_common_sm_module_t * +mca_common_sm_init(ompi_proc_t **procs, + size_t num_procs, + size_t size, + char *file_name, + size_t size_ctl_structure, + size_t data_seg_alignment) { - if (NULL == modp) { - return OMPI_ERROR; - } - if (OPAL_SUCCESS != opal_shmem_unlink(&modp->shmem_ds)) { - return OMPI_ERROR; - } - return OMPI_SUCCESS; -} - -/* ////////////////////////////////////////////////////////////////////////// */ -int -mca_common_sm_local_proc_reorder(ompi_proc_t **procs, - size_t num_procs, - size_t *out_num_local_procs) -{ - size_t num_local_procs = 0; - bool found_lowest = false; + /* indicates whether or not i'm the lowest named process */ + bool lowest_local_proc = false; + mca_common_sm_module_t *map = NULL; ompi_proc_t *temp_proc = NULL; - size_t p; + bool found_lowest = false; + size_t num_local_procs = 0, p = 0; + opal_shmem_ds_t *seg_meta = NULL; - if (NULL == out_num_local_procs || NULL == procs) { - return OMPI_ERR_BAD_PARAM; - } /* o reorder procs array to have all the local procs at the beginning. * o look for the local proc with the lowest name. * o determine the number of local procs. @@ -247,7 +240,8 @@ mca_common_sm_local_proc_reorder(ompi_proc_t **procs, /* save this proc */ procs[num_local_procs] = procs[p]; /* if we have a new lowest, swap it with position 0 - * so that procs[0] is always the lowest named proc */ + * so that procs[0] is always the lowest named proc + */ if (OPAL_VALUE2_GREATER == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &(procs[p]->proc_name), @@ -263,31 +257,6 @@ mca_common_sm_local_proc_reorder(ompi_proc_t **procs, ++num_local_procs; } } - *out_num_local_procs = num_local_procs; - - return OMPI_SUCCESS; -} - -/* ////////////////////////////////////////////////////////////////////////// */ -mca_common_sm_module_t * -mca_common_sm_init(ompi_proc_t **procs, - size_t num_procs, - size_t size, - char *file_name, - size_t size_ctl_structure, - size_t data_seg_alignment) -{ - /* indicates whether or not i'm the lowest named process */ - bool lowest_local_proc = false; - mca_common_sm_module_t *map = NULL; - size_t num_local_procs = 0; - opal_shmem_ds_t *seg_meta = NULL; - - if (OMPI_SUCCESS != mca_common_sm_local_proc_reorder(procs, - num_procs, - &num_local_procs)) { - return NULL; - } /* if there is less than 2 local processes, there's nothing to do. */ if (num_local_procs < 2) { @@ -301,9 +270,9 @@ mca_common_sm_init(ompi_proc_t **procs, /* determine whether or not i am the lowest local process */ lowest_local_proc = - (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, - ORTE_PROC_MY_NAME, - &(procs[0]->proc_name))); + (0 == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, + ORTE_PROC_MY_NAME, + &(procs[0]->proc_name))); /* figure out if i am the lowest rank in the group. * if so, i will create the shared memory backing store @@ -465,3 +434,4 @@ mca_common_sm_fini(mca_common_sm_module_t *mca_common_sm_module) } return rc; } + diff --git a/ompi/mca/common/sm/common_sm.h b/ompi/mca/common/sm/common_sm.h index c916cc603c..b8fd007e1e 100644 --- a/ompi/mca/common/sm/common_sm.h +++ b/ompi/mca/common/sm/common_sm.h @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2012 Los Alamos National Security, LLC. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. * All rights reserved. * $COPYRIGHT$ * @@ -73,31 +73,18 @@ typedef struct mca_common_sm_module_t { OBJ_CLASS_DECLARATION(mca_common_sm_module_t); /** - * This routine reorders procs array to have all the local procs at the - * beginning and returns the number of local procs through out_num_local_procs. - * The proc with the lowest name is at the beginning of the reordered procs - * array. - * - * @returnvalue OMPI_SUCCESS on success, something else, otherwise. - */ -OMPI_DECLSPEC extern int -mca_common_sm_local_proc_reorder(ompi_proc_t **procs, - size_t num_procs, - size_t *out_num_local_procs); - -/** - * This routine is used to create and attach to a shared memory segment - * (whether it's an mmaped file or a SYSV IPC segment). It is assumed that + * This routine is used to create a shared memory segment (whether + * it's an mmaped file or a SYSV IPC segment). It is assumed that * the shared memory segment does not exist before this call. * * @returnvalue pointer to control structure at head of shared memory segment. * Returns NULL if an error occurred. */ -OMPI_DECLSPEC extern mca_common_sm_module_t * -mca_common_sm_module_create_and_attach(size_t size, - char *file_name, - size_t size_ctl_structure, - size_t data_seg_alignment); +mca_common_sm_module_t * +mca_common_sm_module_create(size_t size, + char *file_name, + size_t size_ctl_structure, + size_t data_seg_alignment); /** * This routine is used to attach to the shared memory segment associated with @@ -109,22 +96,11 @@ mca_common_sm_module_create_and_attach(size_t size, * @returnvalue pointer to control structure at head of shared memory segment. * Returns NULL if an error occurred. */ -OMPI_DECLSPEC extern mca_common_sm_module_t * +mca_common_sm_module_t * mca_common_sm_module_attach(opal_shmem_ds_t *seg_meta, size_t size_ctl_structure, size_t data_seg_alignment); -/** - * A thin wrapper around opal_shmem_unlink. - * - * @ modp points to an initialized mca_common_sm_module_t. - * - * @returnvalue OMPI_SUCCESS if the operation completed successfully, - * OMPI_ERROR otherwise. - */ -OMPI_DECLSPEC extern int -mca_common_sm_module_unlink(mca_common_sm_module_t *modp); - /** * This routine is used to set up a shared memory segment (whether * it's an mmaped file or a SYSV IPC segment). It is assumed that @@ -188,7 +164,7 @@ mca_common_sm_init_group(ompi_group_t *group, */ OMPI_DECLSPEC extern void * mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool, - size_t *size, + size_t* size, mca_mpool_base_registration_t **registration); /** @@ -213,3 +189,4 @@ OMPI_DECLSPEC extern mca_common_sm_module_t *mca_common_sm_module; END_C_DECLS #endif /* _COMMON_SM_H_ */ + diff --git a/ompi/mca/mpool/sm/mpool_sm.h b/ompi/mca/mpool/sm/mpool_sm.h index 9666b3b63f..b46bc044d5 100644 --- a/ompi/mca/mpool/sm/mpool_sm.h +++ b/ompi/mca/mpool/sm/mpool_sm.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2012 Los Alamos National Security, LLC. + * Copyright (c) 2010 Los Alamos National Security, LLC. * All rights reserved. * $COPYRIGHT$ * @@ -28,7 +28,6 @@ #include "ompi_config.h" #include "opal/mca/event/event.h" -#include "opal/mca/shmem/shmem.h" #include "ompi/mca/common/sm/common_sm.h" #include "ompi/mca/mpool/mpool.h" @@ -37,19 +36,17 @@ BEGIN_C_DECLS struct mca_mpool_sm_component_t { - mca_mpool_base_component_t super; - /* mca_allocator_base_module_t* sm_allocator; */ - char *sm_allocator_name; - int verbose; - /* struct mca_mpool_sm_mmap_t *sm_mmap; */ + mca_mpool_base_component_t super; + /* mca_allocator_base_module_t* sm_allocator; */ + char* sm_allocator_name; + int verbose; + /* struct mca_mpool_sm_mmap_t *sm_mmap; */ }; typedef struct mca_mpool_sm_component_t mca_mpool_sm_component_t; typedef struct mca_mpool_base_resources_t { size_t size; int32_t mem_node; - /* backing store metadata */ - opal_shmem_ds_t bs_meta_buf; } mca_mpool_base_resources_t; OMPI_MODULE_DECLSPEC extern mca_mpool_sm_component_t mca_mpool_sm_component; @@ -57,7 +54,7 @@ OMPI_MODULE_DECLSPEC extern mca_mpool_sm_component_t mca_mpool_sm_component; typedef struct mca_mpool_sm_module_t { mca_mpool_base_module_t super; long sm_size; - mca_allocator_base_module_t *sm_allocator; + mca_allocator_base_module_t * sm_allocator; struct mca_mpool_sm_mmap_t *sm_mmap; mca_common_sm_module_t *sm_common_module; int32_t mem_node; diff --git a/ompi/mca/mpool/sm/mpool_sm_component.c b/ompi/mca/mpool/sm/mpool_sm_component.c index e5f9ff3994..bccaf78e38 100644 --- a/ompi/mca/mpool/sm/mpool_sm_component.c +++ b/ompi/mca/mpool/sm/mpool_sm_component.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2010-2012 Los Alamos National Security, LLC. + * Copyright (c) 2010 Los Alamos National Security, LLC. * All rights reserved. * $COPYRIGHT$ * @@ -45,14 +45,10 @@ /* * Local functions */ -static int -mca_mpool_sm_open(void); - -static int -mca_mpool_sm_close(void); - -static mca_mpool_base_module_t * -mca_mpool_sm_init(struct mca_mpool_base_resources_t* resources); +static int mca_mpool_sm_open(void); +static int mca_mpool_sm_close( void ); +static mca_mpool_base_module_t* mca_mpool_sm_init( + struct mca_mpool_base_resources_t* resources); mca_mpool_sm_component_t mca_mpool_sm_component = { { @@ -94,8 +90,8 @@ static int mca_mpool_sm_open(void) /* register SM component parameters */ mca_base_param_reg_string(&mca_mpool_sm_component.super.mpool_version, "allocator", - "Name of allocator component " - "to use with sm mpool", false, false, + "Name of allocator component to use with sm mpool", + false, false, "bucket", &mca_mpool_sm_component.sm_allocator_name); @@ -104,18 +100,18 @@ static int mca_mpool_sm_open(void) * to be set up to 2GB-1 for 32 bit and much greater for 64 bit. */ asprintf(&size_str, "%ld", default_min); mca_base_param_reg_string(&mca_mpool_sm_component.super.mpool_version, - "min_size", - "Minimum size of the sm mpool shared memory file", - false, false, size_str, &min_size_param); + "min_size", + "Minimum size of the sm mpool shared memory file", + false, false, size_str, &min_size_param); free(size_str); mca_base_param_reg_int(&mca_mpool_sm_component.super.mpool_version, - "verbose", - "Enable verbose output for mpool sm component", - false, false, 0, &value); + "verbose", + "Enable verbose output for mpool sm component", + false, false, 0, &value); if (value != 0) { - mca_mpool_sm_component.verbose = opal_output_open(NULL); + mca_mpool_sm_component.verbose = opal_output_open(NULL); } else { - mca_mpool_sm_component.verbose = -1; + mca_mpool_sm_component.verbose = -1; } return OMPI_SUCCESS; @@ -132,44 +128,41 @@ static int mca_mpool_sm_close( void ) return OMPI_SUCCESS; } -static mca_mpool_base_module_t * -mca_mpool_sm_init(struct mca_mpool_base_resources_t *resources) +static mca_mpool_base_module_t* mca_mpool_sm_init( + struct mca_mpool_base_resources_t* resources) { - mca_mpool_sm_module_t *mpool_module; + char *file_name; + int len; + mca_mpool_sm_module_t* mpool_module; mca_allocator_base_component_t* allocator_component; long min_size; ompi_proc_t **procs; size_t num_all_procs, i, num_local_procs = 0; /* README: this needs to change if procs in different jobs (even - * spawned ones) are to talk using shared memory */ - if (NULL == (procs = ompi_proc_world(&num_all_procs))) { - /* out of resources, so just bail */ - return NULL; - } + spawned ones) are to talk using shared memory */ + procs = ompi_proc_world(&num_all_procs); for (i = 0 ; i < num_all_procs ; ++i) { if (OPAL_PROC_ON_LOCAL_NODE(procs[i]->proc_flags)) { num_local_procs++; } } + /* parse the min size and validate it */ - /* if other parameters are added, absolutely - * necessary to reset errno each time */ + /* if other parameters are added, absolutely necessary to reset errno each time */ errno = 0; min_size = strtol(min_size_param, (char **)NULL, 10); if (errno == ERANGE) { - opal_output(0, "mca_mpool_sm_init: min_size overflows! " - "set to default (%ld)", default_min); + opal_output(0, "mca_mpool_sm_init: min_size overflows! set to default (%ld)", default_min); min_size = default_min; } else if (errno == EINVAL) { - opal_output(0, "mca_mpool_sm_init: invalid min_size entered. " - "set it to (%ld)", default_min); + opal_output(0, "mca_mpool_sm_init: invalid min_size entered. set it to (%ld)", default_min); min_size = default_min; } /* Make a new mpool module */ mpool_module = - (mca_mpool_sm_module_t *)malloc(sizeof(mca_mpool_sm_module_t)); + (mca_mpool_sm_module_t*)malloc(sizeof(mca_mpool_sm_module_t)); mca_mpool_sm_module_init(mpool_module); /* set sm_size */ @@ -180,26 +173,23 @@ mca_mpool_sm_init(struct mca_mpool_base_resources_t *resources) mpool_module->sm_size = min_size; } + /* add something for the control structure */ + mpool_module->sm_size += sizeof(mca_common_sm_module_t); + allocator_component = mca_allocator_component_lookup( mca_mpool_sm_component.sm_allocator_name); /* if specified allocator cannot be loaded - look for an alternative */ - if (NULL == allocator_component) { - if (opal_list_get_size(&mca_allocator_base_components) == 0) { - mca_base_component_list_item_t *item = - (mca_base_component_list_item_t *) + if(NULL == allocator_component) { + if(opal_list_get_size(&mca_allocator_base_components) == 0) { + mca_base_component_list_item_t* item = (mca_base_component_list_item_t*) opal_list_get_first(&mca_allocator_base_components); - allocator_component = - (mca_allocator_base_component_t *)item->cli_component; - opal_output( - 0, "mca_mpool_sm_init: " - "unable to locate allocator: %s - using %s\n", - mca_mpool_sm_component.sm_allocator_name, - allocator_component->allocator_version.mca_component_name); + allocator_component = (mca_allocator_base_component_t*)item->cli_component; + opal_output(0, "mca_mpool_sm_init: unable to locate allocator: %s - using %s\n", + mca_mpool_sm_component.sm_allocator_name, allocator_component->allocator_version.mca_component_name); } else { - opal_output(0, "mca_mpool_sm_init: " - "unable to locate allocator: %s\n", - mca_mpool_sm_component.sm_allocator_name); + opal_output(0, "mca_mpool_sm_init: unable to locate allocator: %s\n", + mca_mpool_sm_component.sm_allocator_name); free(procs); return NULL; } @@ -207,28 +197,41 @@ mca_mpool_sm_init(struct mca_mpool_base_resources_t *resources) mpool_module->mem_node = resources->mem_node; + /* create initial shared memory mapping */ + len = asprintf( &file_name, "%s"OPAL_PATH_SEP"shared_mem_pool.%s", + orte_process_info.job_session_dir, + orte_process_info.nodename ); + if ( 0 > len ) { + free(mpool_module); + free(procs); + return NULL; + } + opal_output(mca_mpool_sm_component.verbose, "mca_mpool_sm_init: shared memory size used: (%ld)", mpool_module->sm_size); - if (NULL == (mpool_module->sm_common_module = - mca_common_sm_module_attach(&resources->bs_meta_buf, + if (NULL == (mpool_module->sm_common_module = + mca_common_sm_init(procs, num_all_procs, + mpool_module->sm_size, + file_name, sizeof(mca_common_sm_module_t), 8))) { - opal_output(mca_mpool_sm_component.verbose, "mca_mpool_sm_init: " - "unable to create shared memory mapping (%s)", - resources->bs_meta_buf.seg_name); + opal_output(mca_mpool_sm_component.verbose, + "mca_mpool_sm_init: unable to create shared memory mapping (%s)", file_name); + free(file_name); free(mpool_module); free(procs); return NULL; } free(procs); + free(file_name); /* setup allocator */ mpool_module->sm_allocator = allocator_component->allocator_init(true, mca_common_sm_seg_alloc, NULL, &(mpool_module->super)); - if (NULL == mpool_module->sm_allocator) { + if(NULL == mpool_module->sm_allocator) { opal_output(0, "mca_mpool_sm_init: unable to initialize allocator"); free(mpool_module); return NULL; diff --git a/opal/mca/shmem/mmap/shmem_mmap_module.c b/opal/mca/shmem/mmap/shmem_mmap_module.c index 9fa4fe8cf4..f6fb514657 100644 --- a/opal/mca/shmem/mmap/shmem_mmap_module.c +++ b/opal/mca/shmem/mmap/shmem_mmap_module.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2010-2012 Los Alamos National Security, LLC. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. * All rights reserved. * * $COPYRIGHT$ @@ -115,12 +115,13 @@ opal_shmem_mmap_module_t opal_shmem_mmap_module = { static inline void shmem_ds_reset(opal_shmem_ds_t *ds_buf) { - /* don't print ds_buf info here, as we may be printing garbage. */ OPAL_OUTPUT_VERBOSE( (70, opal_shmem_base_output, - "%s: %s: shmem_ds_resetting\n", + "%s: %s: shmem_ds_resetting " + "(id: %d, size: %lu, name: %s)\n", mca_shmem_mmap_component.super.base_version.mca_type_name, - mca_shmem_mmap_component.super.base_version.mca_component_name) + mca_shmem_mmap_component.super.base_version.mca_component_name, + ds_buf->seg_id, (unsigned long)ds_buf->seg_size, ds_buf->seg_name) ); ds_buf->seg_cpid = 0; diff --git a/opal/mca/shmem/posix/shmem_posix_module.c b/opal/mca/shmem/posix/shmem_posix_module.c index 0ef9cfe5a3..22789cafb0 100644 --- a/opal/mca/shmem/posix/shmem_posix_module.c +++ b/opal/mca/shmem/posix/shmem_posix_module.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2010-2012 Los Alamos National Security, LLC. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. * All rights reserved. * * $COPYRIGHT$ @@ -108,12 +108,13 @@ opal_shmem_posix_module_t opal_shmem_posix_module = { static inline void shmem_ds_reset(opal_shmem_ds_t *ds_buf) { - /* don't print ds_buf info here, as we may be printing garbage. */ OPAL_OUTPUT_VERBOSE( (70, opal_shmem_base_output, - "%s: %s: shmem_ds_resetting\n", + "%s: %s: shmem_ds_resetting " + "(id: %d, size: %lu, name: %s)\n", mca_shmem_posix_component.super.base_version.mca_type_name, - mca_shmem_posix_component.super.base_version.mca_component_name) + mca_shmem_posix_component.super.base_version.mca_component_name, + ds_buf->seg_id, (unsigned long)ds_buf->seg_size, ds_buf->seg_name) ); ds_buf->seg_cpid = 0; diff --git a/opal/mca/shmem/sysv/shmem_sysv_module.c b/opal/mca/shmem/sysv/shmem_sysv_module.c index 59cb59e0d9..c0d13f429f 100644 --- a/opal/mca/shmem/sysv/shmem_sysv_module.c +++ b/opal/mca/shmem/sysv/shmem_sysv_module.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2010-2012 Los Alamos National Security, LLC. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. * All rights reserved. * * $COPYRIGHT$ @@ -113,12 +113,13 @@ opal_shmem_sysv_module_t opal_shmem_sysv_module = { static inline void shmem_ds_reset(opal_shmem_ds_t *ds_buf) { - /* don't print ds_buf info here, as we may be printing garbage. */ OPAL_OUTPUT_VERBOSE( (70, opal_shmem_base_output, - "%s: %s: shmem_ds_resetting\n", + "%s: %s: shmem_ds_resetting " + "(id: %d, size: %lu, name: %s)\n", mca_shmem_sysv_component.super.base_version.mca_type_name, - mca_shmem_sysv_component.super.base_version.mca_component_name) + mca_shmem_sysv_component.super.base_version.mca_component_name, + ds_buf->seg_id, (unsigned long)ds_buf->seg_size, ds_buf->seg_name) ); ds_buf->seg_cpid = 0; diff --git a/opal/mca/shmem/windows/shmem_windows_module.c b/opal/mca/shmem/windows/shmem_windows_module.c index 5e55e78920..3947d07311 100644 --- a/opal/mca/shmem/windows/shmem_windows_module.c +++ b/opal/mca/shmem/windows/shmem_windows_module.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2010-2012 Los Alamos National Security, LLC. + * Copyright (c) 2010-2011 Los Alamos National Security, LLC. * All rights reserved. * * $COPYRIGHT$ @@ -107,12 +107,13 @@ opal_shmem_windows_module_t opal_shmem_windows_module = { static inline void shmem_ds_reset(opal_shmem_ds_t *ds_buf) { - /* don't print ds_buf info here, as we may be printing garbage. */ OPAL_OUTPUT_VERBOSE( (70, opal_shmem_base_output, - "%s: %s: shmem_ds_resetting\n", + "%s: %s: shmem_ds_resetting " + "(id: %d, size: %"PRIsize_t", name: %s)\n", mca_shmem_windows_component.super.base_version.mca_type_name, - mca_shmem_windows_component.super.base_version.mca_component_name) + mca_shmem_windows_component.super.base_version.mca_component_name, + ds_buf->seg_id, ds_buf->seg_size, ds_buf->seg_name) ); ds_buf->seg_cpid = 0;