1
1

Let's try this again: sm BTL initialization via modex.

This commit was SVN r26989.
Этот коммит содержится в:
Samuel Gutierrez 2012-08-10 20:12:36 +00:00
родитель 6a70063812
Коммит 159bd2e62e
12 изменённых файлов: 685 добавлений и 251 удалений

Просмотреть файл

@ -43,9 +43,12 @@
#include "opal/util/output.h" #include "opal/util/output.h"
#include "opal/util/printf.h" #include "opal/util/printf.h"
#include "opal/mca/hwloc/base/base.h" #include "opal/mca/hwloc/base/base.h"
#include "opal/mca/shmem/base/base.h"
#include "opal/mca/shmem/shmem.h"
#include "orte/util/proc_info.h" #include "orte/util/proc_info.h"
#include "opal/datatype/opal_convertor.h" #include "opal/datatype/opal_convertor.h"
#include "ompi/class/ompi_free_list.h" #include "ompi/class/ompi_free_list.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "ompi/mca/btl/btl.h" #include "ompi/mca/btl/btl.h"
#include "ompi/mca/mpool/base/base.h" #include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/mpool/sm/mpool_sm.h" #include "ompi/mca/mpool/sm/mpool_sm.h"
@ -111,7 +114,6 @@ mca_btl_sm_t mca_btl_sm = {
*/ */
#define OFFSET2ADDR(OFFSET, BASE) ((ptrdiff_t)(OFFSET) + (char*)(BASE)) #define OFFSET2ADDR(OFFSET, BASE) ((ptrdiff_t)(OFFSET) + (char*)(BASE))
static void *mpool_calloc(size_t nmemb, size_t size) static void *mpool_calloc(size_t nmemb, size_t size)
{ {
void *buf; void *buf;
@ -127,17 +129,83 @@ static void *mpool_calloc(size_t nmemb, size_t size)
return buf; return buf;
} }
/*
static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n) * Returns a pointer to node rank zero. Returns NULL on error.
*/
static ompi_proc_t *
get_node_rank_zero_proc_ptr(ompi_proc_t **proc_world,
size_t proc_world_size)
{ {
size_t size, length, length_payload; size_t num_local_procs = 0;
char *sm_ctl_file;
if (NULL == proc_world) {
return NULL;
}
/* sort the procs list and get a pointer to the lowest node rank */
if (OMPI_SUCCESS != mca_common_sm_local_proc_reorder(proc_world,
proc_world_size,
&num_local_procs)) {
opal_output(0, "mca_common_sm_local_proc_reorder failure! "
"Cannot continue.\n");
return NULL;
}
return proc_world[0];
}
/*
* Modex receive. Caller is responsible for freeing returned resources.
*/
static inline int
recv_modex(mca_btl_sm_component_t *comp_ptr,
mca_btl_sm_modex_t **out_modex)
{
ompi_proc_t **proc_world = NULL;
ompi_proc_t *proc_node_rank_zero = NULL;
size_t proc_world_size = 0;
size_t modex_size = 0;
int rc = OMPI_SUCCESS;
if (NULL == (proc_world = ompi_proc_world(&proc_world_size))) {
opal_output(0, "ompi_proc_world failure! Cannot continue.\n");
rc = OMPI_ERROR;
goto out;
}
if (NULL == (proc_node_rank_zero =
get_node_rank_zero_proc_ptr(proc_world, proc_world_size))) {
opal_output(0, "get_node_rank_zero_proc_ptr failure! "
"Cannot continue.\n");
rc = OMPI_ERROR;
goto out;
}
if (OMPI_SUCCESS != (rc =
ompi_modex_recv(&comp_ptr->super.btl_version,
proc_node_rank_zero,
(void **)out_modex,
&modex_size))) {
opal_output(0, "recv_modex: ompi_modex_recv failure!\n");
/* rc is set */
goto out;
}
out:
if (NULL != proc_world) {
free(proc_world);
}
return rc;
}
static int
sm_btl_first_time_init(mca_btl_sm_t *sm_btl,
int32_t my_smp_rank,
int n)
{
size_t length, length_payload;
sm_fifo_t *my_fifos; sm_fifo_t *my_fifos;
int my_mem_node, num_mem_nodes, i; int my_mem_node, num_mem_nodes, i, rc;
ompi_proc_t **procs; mca_mpool_base_resources_t *res = NULL;
size_t num_procs;
mca_mpool_base_resources_t res;
mca_btl_sm_component_t* m = &mca_btl_sm_component; mca_btl_sm_component_t* m = &mca_btl_sm_component;
mca_btl_sm_modex_t *modex = NULL;
/* Assume we don't have hwloc support and fill in dummy info */ /* Assume we don't have hwloc support and fill in dummy info */
mca_btl_sm_component.mem_node = my_mem_node = 0; mca_btl_sm_component.mem_node = my_mem_node = 0;
@ -190,50 +258,42 @@ static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n)
} }
#endif #endif
/* lookup shared memory pool */ if (NULL == (res = calloc(1, sizeof(*res)))) {
mca_btl_sm_component.sm_mpools = (mca_mpool_base_module_t **) calloc(num_mem_nodes,
sizeof(mca_mpool_base_module_t*));
/* Disable memory binding, because each MPI process will claim
pages in the mpool for their local NUMA node */
res.mem_node = -1;
/* determine how much memory to create */
/*
* This heuristic formula mostly says that we request memory for:
* - nfifos FIFOs, each comprising:
* . a sm_fifo_t structure
* . many pointers (fifo_size of them per FIFO)
* - eager fragments (2*n of them, allocated in sm_free_list_inc chunks)
* - max fragments (sm_free_list_num of them)
*
* On top of all that, we sprinkle in some number of
* "opal_cache_line_size" additions to account for some
* padding and edge effects that may lie in the allocator.
*/
res.size =
FIFO_MAP_NUM(n) * ( sizeof(sm_fifo_t) + sizeof(void *) * m->fifo_size + 4 * opal_cache_line_size )
+ ( 2 * n + m->sm_free_list_inc ) * ( m->eager_limit + 2 * opal_cache_line_size )
+ m->sm_free_list_num * ( m->max_frag_size + 2 * opal_cache_line_size );
/* before we multiply by n, make sure the result won't overflow */
/* Stick that little pad in, particularly since we'll eventually
* need a little extra space. E.g., in mca_mpool_sm_init() in
* mpool_sm_component.c when sizeof(mca_common_sm_module_t) is
* added.
*/
if ( ((double) res.size) * n > LONG_MAX - 4096 ) {
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
res.size *= n; /* everyone receive modex information. all but node rank zero attach to the
* segments stored within the modex. remember: node rank zero is already
* attached to sm_seg. */
if (OMPI_SUCCESS != (rc = recv_modex(m, &modex))) {
free(res);
return rc;
}
/* lookup shared memory pool */
mca_btl_sm_component.sm_mpools =
(mca_mpool_base_module_t **)calloc(num_mem_nodes,
sizeof(mca_mpool_base_module_t *));
/* Disable memory binding, because each MPI process will claim pages in the
* mpool for their local NUMA node */
res->mem_node = -1;
res->size = modex->mpool_res_size;
if (OPAL_SUCCESS !=
opal_shmem_ds_copy(&(modex->sm_mpool_meta_buf),
&(res->bs_meta_buf))) {
free(res);
free(modex);
return OMPI_ERROR;
}
/* now, create it */
mca_btl_sm_component.sm_mpools[0] = mca_btl_sm_component.sm_mpools[0] =
mca_mpool_base_module_create(mca_btl_sm_component.sm_mpool_name, mca_mpool_base_module_create(mca_btl_sm_component.sm_mpool_name,
sm_btl, &res); sm_btl, res);
/* Sanity check to ensure that we found it */ /* Sanity check to ensure that we found it */
if (NULL == mca_btl_sm_component.sm_mpools[0]) { if (NULL == mca_btl_sm_component.sm_mpools[0]) {
return OMPI_ERR_OUT_OF_RESOURCE; free(res);
free(modex);
return OMPI_ERR_OUT_OF_RESOURCE;
} }
mca_btl_sm_component.sm_mpool = mca_btl_sm_component.sm_mpools[0]; mca_btl_sm_component.sm_mpool = mca_btl_sm_component.sm_mpools[0];
@ -245,37 +305,27 @@ static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n)
mca_btl_sm_component.sm_peers = (struct mca_btl_base_endpoint_t**) mca_btl_sm_component.sm_peers = (struct mca_btl_base_endpoint_t**)
calloc(n, sizeof(struct mca_btl_base_endpoint_t*)); calloc(n, sizeof(struct mca_btl_base_endpoint_t*));
if (NULL == mca_btl_sm_component.sm_peers) { if (NULL == mca_btl_sm_component.sm_peers) {
free(res);
free(modex);
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
if (0 != my_smp_rank) {
/* Allocate Shared Memory BTL process coordination if (NULL == (mca_btl_sm_component.sm_seg =
* data structure. This will reside in shared memory */ mca_common_sm_module_attach(&modex->sm_meta_buf,
sizeof(mca_common_sm_seg_header_t),
/* set file name */ opal_cache_line_size))) {
if (asprintf(&sm_ctl_file, "%s"OPAL_PATH_SEP"shared_mem_btl_module.%s", /* don't have to detach here, because module_attach cleans up after
orte_process_info.job_session_dir, * itself on failure. */
orte_process_info.nodename) < 0) { opal_output(0, "sm_btl_first_time_init: "
return OMPI_ERR_OUT_OF_RESOURCE; "mca_common_sm_module_attach failure!\n");
free(modex);
free(res);
return OMPI_ERROR;
}
} }
/* it is now safe to free the modex and the mpool resources */
/* Pass in a data segment alignment of 0 to get no data free(modex);
segment (only the shared control structure) */ free(res);
size = sizeof(mca_common_sm_seg_header_t) +
n * (sizeof(sm_fifo_t*) + sizeof(char *) + sizeof(uint16_t)) + opal_cache_line_size;
procs = ompi_proc_world(&num_procs);
if (!(mca_btl_sm_component.sm_seg =
mca_common_sm_init(procs, num_procs, size, sm_ctl_file,
sizeof(mca_common_sm_seg_header_t),
opal_cache_line_size))) {
opal_output(0, "mca_btl_sm_add_procs: unable to create shared memory "
"BTL coordinating strucure :: size %lu \n",
(unsigned long)size);
free(procs);
free(sm_ctl_file);
return OMPI_ERROR;
}
free(procs);
free(sm_ctl_file);
/* check to make sure number of local procs is within the /* check to make sure number of local procs is within the
* specified limits */ * specified limits */
@ -374,6 +424,7 @@ static struct mca_btl_base_endpoint_t *
create_sm_endpoint(int local_proc, struct ompi_proc_t *proc) create_sm_endpoint(int local_proc, struct ompi_proc_t *proc)
{ {
struct mca_btl_base_endpoint_t *ep; struct mca_btl_base_endpoint_t *ep;
#if OMPI_ENABLE_PROGRESS_THREADS == 1 #if OMPI_ENABLE_PROGRESS_THREADS == 1
char path[PATH_MAX]; char path[PATH_MAX];
#endif #endif
@ -401,22 +452,6 @@ create_sm_endpoint(int local_proc, struct ompi_proc_t *proc)
return ep; return ep;
} }
static void calc_sm_max_procs(int n)
{
/* see if need to allocate space for extra procs */
if(0 > mca_btl_sm_component.sm_max_procs) {
/* no limit */
if(0 <= mca_btl_sm_component.sm_extra_procs) {
/* limit */
mca_btl_sm_component.sm_max_procs =
n + mca_btl_sm_component.sm_extra_procs;
} else {
/* no limit */
mca_btl_sm_component.sm_max_procs = 2 * n;
}
}
}
int mca_btl_sm_add_procs( int mca_btl_sm_add_procs(
struct mca_btl_base_module_t* btl, struct mca_btl_base_module_t* btl,
size_t nprocs, size_t nprocs,
@ -430,6 +465,9 @@ int mca_btl_sm_add_procs(
mca_btl_sm_t *sm_btl; mca_btl_sm_t *sm_btl;
bool have_connected_peer = false; bool have_connected_peer = false;
char **bases; char **bases;
/* for easy access to the mpool_sm_module */
mca_mpool_sm_module_t *sm_mpool_modp = NULL;
/* initializion */ /* initializion */
sm_btl = (mca_btl_sm_t *)btl; sm_btl = (mca_btl_sm_t *)btl;
@ -442,7 +480,7 @@ int mca_btl_sm_add_procs(
* and idetify procs that are on this host. Add procs on this * and idetify procs that are on this host. Add procs on this
* host to shared memory reachbility list. Also, get number * host to shared memory reachbility list. Also, get number
* of local procs in the procs list. */ * of local procs in the procs list. */
for(proc = 0; proc < (int32_t)nprocs; proc++) { for (proc = 0; proc < (int32_t)nprocs; proc++) {
/* check to see if this proc can be reached via shmem (i.e., /* check to see if this proc can be reached via shmem (i.e.,
if they're on my local host and in my job) */ if they're on my local host and in my job) */
if (procs[proc]->proc_name.jobid != my_proc->proc_name.jobid || if (procs[proc]->proc_name.jobid != my_proc->proc_name.jobid ||
@ -477,18 +515,18 @@ int mca_btl_sm_add_procs(
goto CLEANUP; goto CLEANUP;
/* make sure that my_smp_rank has been defined */ /* make sure that my_smp_rank has been defined */
if(-1 == my_smp_rank) { if (-1 == my_smp_rank) {
return_code = OMPI_ERROR; return_code = OMPI_ERROR;
goto CLEANUP; goto CLEANUP;
} }
calc_sm_max_procs(n_local_procs);
if (!sm_btl->btl_inited) { if (!sm_btl->btl_inited) {
return_code = return_code =
sm_btl_first_time_init(sm_btl, mca_btl_sm_component.sm_max_procs); sm_btl_first_time_init(sm_btl, my_smp_rank,
if(return_code != OMPI_SUCCESS) mca_btl_sm_component.sm_max_procs);
if (return_code != OMPI_SUCCESS) {
goto CLEANUP; goto CLEANUP;
}
} }
/* set local proc's smp rank in the peers structure for /* set local proc's smp rank in the peers structure for
@ -501,6 +539,7 @@ int mca_btl_sm_add_procs(
} }
bases = mca_btl_sm_component.shm_bases; bases = mca_btl_sm_component.shm_bases;
sm_mpool_modp = (mca_mpool_sm_module_t *)mca_btl_sm_component.sm_mpool;
/* initialize own FIFOs */ /* initialize own FIFOs */
/* /*
@ -524,13 +563,34 @@ int mca_btl_sm_add_procs(
/* Sync with other local procs. Force the FIFO initialization to always /* Sync with other local procs. Force the FIFO initialization to always
* happens before the readers access it. * happens before the readers access it.
*/ */
opal_atomic_add_32( &mca_btl_sm_component.sm_seg->module_seg->seg_inited, 1); opal_atomic_add_32(&mca_btl_sm_component.sm_seg->module_seg->seg_inited, 1);
while( n_local_procs > while( n_local_procs >
mca_btl_sm_component.sm_seg->module_seg->seg_inited) { mca_btl_sm_component.sm_seg->module_seg->seg_inited) {
opal_progress(); opal_progress();
opal_atomic_rmb(); opal_atomic_rmb();
} }
/* it is now safe to unlink the shared memory segment. only one process
* needs to do this, so just let smp rank zero take care of it. */
if (0 == my_smp_rank) {
if (OMPI_SUCCESS !=
mca_common_sm_module_unlink(mca_btl_sm_component.sm_seg)) {
/* it is "okay" if this fails at this point. we have gone this far,
* so just warn about the failure and continue. this is probably
* only triggered by a programming error. */
opal_output(0, "WARNING: common_sm_module_unlink failed.\n");
}
/* SKG - another abstraction violation here, but I don't want to add
* extra code in the sm mpool for further synchronization. */
/* at this point, all processes have attached to the mpool segment. so
* it is safe to unlink it here. */
if (OMPI_SUCCESS !=
mca_common_sm_module_unlink(sm_mpool_modp->sm_common_module)) {
opal_output(0, "WARNING: common_sm_module_unlink failed.\n");
}
}
/* coordinate with other processes */ /* coordinate with other processes */
for(j = mca_btl_sm_component.num_smp_procs; for(j = mca_btl_sm_component.num_smp_procs;
j < mca_btl_sm_component.num_smp_procs + n_local_procs; j++) { j < mca_btl_sm_component.num_smp_procs + n_local_procs; j++) {

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved.
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Los Alamos National Security, LLC. * Copyright (c) 2010-2012 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* Copyright (c) 2010-2012 IBM Corporation. All rights reserved. * Copyright (c) 2010-2012 IBM Corporation. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
@ -42,6 +42,8 @@
#include "opal/util/bit_ops.h" #include "opal/util/bit_ops.h"
#include "opal/class/opal_free_list.h" #include "opal/class/opal_free_list.h"
#include "opal/mca/shmem/shmem.h"
#include "ompi/mca/btl/btl.h" #include "ompi/mca/btl/btl.h"
#include "ompi/mca/common/sm/common_sm.h" #include "ompi/mca/common/sm/common_sm.h"
@ -121,6 +123,16 @@ typedef struct mca_btl_sm_mem_node_t {
mca_mpool_base_module_t* sm_mpool; /**< shared memory pool */ mca_mpool_base_module_t* sm_mpool; /**< shared memory pool */
} mca_btl_sm_mem_node_t; } mca_btl_sm_mem_node_t;
/**
* Shared Memory (SM) BTL modex.
*/
struct mca_btl_sm_modex_t {
opal_shmem_ds_t sm_meta_buf;
opal_shmem_ds_t sm_mpool_meta_buf;
size_t mpool_res_size;
};
typedef struct mca_btl_sm_modex_t mca_btl_sm_modex_t;
/** /**
* Shared Memory (SM) BTL module. * Shared Memory (SM) BTL module.
*/ */

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2006-2007 Voltaire. All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved.
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2011 Los Alamos National Security, LLC. * Copyright (c) 2010-2012 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2010-2012 IBM Corporation. All rights reserved. * Copyright (c) 2010-2012 IBM Corporation. All rights reserved.
@ -42,15 +42,20 @@
#include <sys/stat.h> /* for mkfifo */ #include <sys/stat.h> /* for mkfifo */
#endif /* HAVE_SYS_STAT_H */ #endif /* HAVE_SYS_STAT_H */
#include "ompi/constants.h"
#include "opal/mca/event/event.h" #include "opal/mca/event/event.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/mca/shmem/base/base.h"
#include "opal/mca/shmem/shmem.h"
#include "opal/util/bit_ops.h" #include "opal/util/bit_ops.h"
#include "opal/util/output.h" #include "opal/util/output.h"
#include "orte/util/proc_info.h" #include "orte/util/proc_info.h"
#include "orte/util/show_help.h" #include "orte/util/show_help.h"
#include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_globals.h"
#include "orte/util/proc_info.h"
#include "opal/mca/base/mca_base_param.h" #include "ompi/constants.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "ompi/mca/mpool/base/base.h" #include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/common/sm/common_sm.h" #include "ompi/mca/common/sm/common_sm.h"
#include "ompi/mca/btl/base/btl_base_error.h" #include "ompi/mca/btl/base/btl_base_error.h"
@ -351,52 +356,354 @@ CLEANUP:
return return_value; return return_value;
} }
/*
* Returns the number of processes on the node.
*/
static inline int
get_num_local_procs(void)
{
/* num_local_peers does not include us in
* its calculation, so adjust for that */
return (int)(1 + orte_process_info.num_local_peers);
}
static void
calc_sm_max_procs(int n)
{
/* see if need to allocate space for extra procs */
if (0 > mca_btl_sm_component.sm_max_procs) {
/* no limit */
if (0 <= mca_btl_sm_component.sm_extra_procs) {
/* limit */
mca_btl_sm_component.sm_max_procs =
n + mca_btl_sm_component.sm_extra_procs;
} else {
/* no limit */
mca_btl_sm_component.sm_max_procs = 2 * n;
}
}
}
static int
create_and_attach(mca_btl_sm_component_t *comp_ptr,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment,
mca_common_sm_module_t **out_modp)
{
if (NULL == (*out_modp =
mca_common_sm_module_create_and_attach(size, file_name,
size_ctl_structure,
data_seg_alignment))) {
opal_output(0, "create_and_attach: unable to create shared memory "
"BTL coordinating strucure :: size %lu \n",
(unsigned long)size);
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
/*
* SKG - I'm not happy with this, but I can't figure out a better way of
* finding the sm mpool's minimum size 8-|. The way I see it. This BTL only
* uses the sm mpool, so maybe this isn't so bad...
*
* The problem is the we need to size the mpool resources at sm BTL component
* init. That means we need to know the mpool's minimum size at create.
*/
static int
get_min_mpool_size(mca_btl_sm_component_t *comp_ptr,
size_t *out_size)
{
char *type_name = "mpool";
char *param_name = "min_size";
char *min_size = NULL;
int id = 0;
size_t default_min = 67108864;
size_t size = 0;
long tmp_size = 0;
if (0 > (id = mca_base_param_find(type_name, comp_ptr->sm_mpool_name,
param_name))) {
opal_output(0, "mca_base_param_find: failure looking for %s_%s_%s\n",
type_name, comp_ptr->sm_mpool_name, param_name);
return OMPI_ERR_NOT_FOUND;
}
if (OPAL_ERROR == mca_base_param_lookup_string(id, &min_size)) {
opal_output(0, "mca_base_param_lookup_string failure\n");
return OMPI_ERROR;
}
errno = 0;
tmp_size = strtol(min_size, (char **)NULL, 10);
if (ERANGE == errno || EINVAL == errno || tmp_size <= 0) {
opal_output(0, "mca_btl_sm::get_min_mpool_size: "
"Unusable %s_%s_min_size provided. "
"Continuing with %lu.", type_name,
comp_ptr->sm_mpool_name,
(unsigned long)default_min);
size = default_min;
}
else {
size = (size_t)tmp_size;
}
free(min_size);
*out_size = size;
return OMPI_SUCCESS;
}
static int
get_mpool_res_size(int32_t max_procs,
size_t *out_res_size)
{
size_t size = 0;
/* determine how much memory to create */
/*
* This heuristic formula mostly says that we request memory for:
* - nfifos FIFOs, each comprising:
* . a sm_fifo_t structure
* . many pointers (fifo_size of them per FIFO)
* - eager fragments (2*n of them, allocated in sm_free_list_inc chunks)
* - max fragments (sm_free_list_num of them)
*
* On top of all that, we sprinkle in some number of
* "opal_cache_line_size" additions to account for some
* padding and edge effects that may lie in the allocator.
*/
size = FIFO_MAP_NUM(max_procs) *
(sizeof(sm_fifo_t) + sizeof(void *) *
mca_btl_sm_component.fifo_size + 4 * opal_cache_line_size) +
(2 * max_procs + mca_btl_sm_component.sm_free_list_inc) *
(mca_btl_sm_component.eager_limit + 2 * opal_cache_line_size) +
mca_btl_sm_component.sm_free_list_num *
(mca_btl_sm_component.max_frag_size + 2 * opal_cache_line_size);
/* add something for the control structure */
size += sizeof(mca_common_sm_module_t);
/* before we multiply by max_procs, make sure the result won't overflow */
/* Stick that little pad in, particularly since we'll eventually
* need a little extra space. E.g., in mca_mpool_sm_init() in
* mpool_sm_component.c when sizeof(mca_common_sm_module_t) is
* added.
*/
if (((double)size) * max_procs > LONG_MAX - 4096) {
return OMPI_ERR_VALUE_OUT_OF_BOUNDS;
}
size *= (size_t)max_procs;
*out_res_size = size;
return OMPI_SUCCESS;
}
/*
* Creates the shared-memory segments required for this BTL. One for the sm
* mpool and another for the shared memory store and populates *modex_buf_ptr.
*
* it is assumed that calc_sm_max_procs has already been called (sets
* sm_max_procs).
*/
static int
populate_modex_bufp(mca_btl_sm_component_t *comp_ptr,
mca_btl_sm_modex_t *modex_buf_ptr)
{
int rc = OMPI_SUCCESS;
size_t size = 0;
size_t min_size = 0;
char *sm_mpool_ctl_file = NULL;
char *sm_ctl_file = NULL;
/* used as a temporary store so we can extract shmem_ds info */
mca_common_sm_module_t *tmp_modp = NULL;
/* first generate some unique paths for the shared-memory segments that
* this BTL needs. */
if (asprintf(&sm_mpool_ctl_file,
"%s"OPAL_PATH_SEP"shared_mem_pool.%s",
orte_process_info.job_session_dir,
orte_process_info.nodename) < 0) {
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto out;
}
if (asprintf(&sm_ctl_file,
"%s"OPAL_PATH_SEP"shared_mem_btl_module.%s",
orte_process_info.job_session_dir,
orte_process_info.nodename) < 0) {
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto out;
}
/* create the things */
/* === sm mpool == */
/* get the segment size for the sm mpool. */
if (OMPI_SUCCESS != (rc = get_mpool_res_size(comp_ptr->sm_max_procs,
&size))) {
/* rc is already set */
goto out;
}
/* do we need to update the size based on the sm mpool's min size? */
if (OMPI_SUCCESS != (rc = get_min_mpool_size(comp_ptr, &min_size))) {
goto out;
}
if (size < min_size) {
size = min_size;
}
/* we only need the shmem_ds info at this point. initilization will be
* completed in the mpool module code. the idea is that we just need this
* info so we can populate the modex. */
if (OMPI_SUCCESS != (rc =
create_and_attach(comp_ptr, size, sm_mpool_ctl_file,
sizeof(mca_common_sm_module_t), 8, &tmp_modp))) {
/* rc is set */
goto out;
}
/* now extract and store the shmem_ds info from the returned module */
if (OPAL_SUCCESS !=
opal_shmem_ds_copy(&(tmp_modp->shmem_ds),
&(modex_buf_ptr->sm_mpool_meta_buf))) {
rc = OMPI_ERROR;
goto out;
}
/* set the mpool_res_size in the modex */
modex_buf_ptr->mpool_res_size = size;
/* === sm btl == */
/* calculate the segment size. */
size = sizeof(mca_common_sm_seg_header_t) +
comp_ptr->sm_max_procs *
(sizeof(sm_fifo_t *) +
sizeof(char *) + sizeof(uint16_t)) +
opal_cache_line_size;
if (OMPI_SUCCESS != (rc =
create_and_attach(comp_ptr, size, sm_ctl_file,
sizeof(mca_common_sm_seg_header_t),
opal_cache_line_size, &comp_ptr->sm_seg))) {
/* rc is set */
goto out;
}
/* now extract and store the shmem_ds info from the returned module */
if (OPAL_SUCCESS != opal_shmem_ds_copy(&(comp_ptr->sm_seg->shmem_ds),
&(modex_buf_ptr->sm_meta_buf))) {
rc = OMPI_ERROR;
goto out;
}
out:
if (NULL != sm_mpool_ctl_file) {
free(sm_mpool_ctl_file);
}
if (NULL != sm_ctl_file) {
free(sm_ctl_file);
}
return rc;
}
/*
* Creates information required for the sm modex and modex sends it.
*/
static int
send_modex(mca_btl_sm_component_t *comp_ptr)
{
int rc = OMPI_SUCCESS;
mca_btl_sm_modex_t *sm_modex = NULL;
if (NULL == (sm_modex = calloc(1, sizeof(*sm_modex)))) {
/* out of resources, so just bail. */
return OMPI_ERR_OUT_OF_RESOURCE;
}
if (OMPI_SUCCESS != (rc = populate_modex_bufp(comp_ptr, sm_modex))) {
opal_output(0, "send_modex: populate_modex_bufp failure!\n");
/* rc is set */
goto out;
}
/* send the modex */
rc = ompi_modex_send(&comp_ptr->super.btl_version, sm_modex,
sizeof(*sm_modex));
out:
if (NULL != sm_modex) {
free(sm_modex);
}
return rc;
}
/* /*
* SM component initialization * SM component initialization
*/ */
static mca_btl_base_module_t** mca_btl_sm_component_init( static mca_btl_base_module_t **
int *num_btls, mca_btl_sm_component_init(int *num_btls,
bool enable_progress_threads, bool enable_progress_threads,
bool enable_mpi_threads) bool enable_mpi_threads)
{ {
int num_local_procs = 0;
mca_btl_base_module_t **btls = NULL; mca_btl_base_module_t **btls = NULL;
orte_node_rank_t my_node_rank = ORTE_NODE_RANK_INVALID;
#if OMPI_BTL_SM_HAVE_KNEM #if OMPI_BTL_SM_HAVE_KNEM
int rc; int rc;
#endif #endif
*num_btls = 0; *num_btls = 0;
/* if no session directory was created, then we cannot be used */
if (!orte_create_session_dirs) {
return NULL;
}
/* lookup/create shared memory pool only when used */ /* lookup/create shared memory pool only when used */
mca_btl_sm_component.sm_mpool = NULL; mca_btl_sm_component.sm_mpool = NULL;
mca_btl_sm_component.sm_mpool_base = NULL; mca_btl_sm_component.sm_mpool_base = NULL;
#if OMPI_ENABLE_PROGRESS_THREADS == 1 /* if no session directory was created, then we cannot be used */
/* create a named pipe to receive events */ /* SKG - this isn't true anymore. Some backing facilities don't require a
sprintf( mca_btl_sm_component.sm_fifo_path, * file-backed store. Extend shmem to provide this info one day. */
"%s"OPAL_PATH_SEP"sm_fifo.%lu", orte_process_info.job_session_dir, if (!orte_create_session_dirs) {
(unsigned long)ORTE_PROC_MY_NAME->vpid );
if(mkfifo(mca_btl_sm_component.sm_fifo_path, 0660) < 0) {
opal_output(0, "mca_btl_sm_component_init: mkfifo failed with errno=%d\n",errno);
return NULL; return NULL;
} }
mca_btl_sm_component.sm_fifo_fd = open(mca_btl_sm_component.sm_fifo_path, O_RDWR); /* if we don't have locality information, then we cannot be used */
if (ORTE_NODE_RANK_INVALID ==
(my_node_rank = orte_process_info.my_node_rank)) {
orte_show_help("help-mpi-btl-sm.txt", "no locality", true);
return NULL;
}
/* no use trying to use sm with less than two procs, so just bail. */
if ((num_local_procs = get_num_local_procs()) < 2) {
return NULL;
}
/* calculate max procs so we can figure out how large to make the
* shared-memory segment. this routine sets component sm_max_procs. */
calc_sm_max_procs(num_local_procs);
/* let local rank 0 create the shared-memory segments and send shmem info */
if (0 == my_node_rank) {
if (OMPI_SUCCESS != send_modex(&mca_btl_sm_component)) {
return NULL;
}
}
#if OMPI_ENABLE_PROGRESS_THREADS == 1
/* create a named pipe to receive events */
sprintf(mca_btl_sm_component.sm_fifo_path,
"%s"OPAL_PATH_SEP"sm_fifo.%lu",
orte_process_info.job_session_dir,
(unsigned long)ORTE_PROC_MY_NAME->vpid);
if (mkfifo(mca_btl_sm_component.sm_fifo_path, 0660) < 0) {
opal_output(0, "mca_btl_sm_component_init: "
"mkfifo failed with errno=%d\n",errno);
return NULL;
}
mca_btl_sm_component.sm_fifo_fd = open(mca_btl_sm_component.sm_fifo_path,
O_RDWR);
if(mca_btl_sm_component.sm_fifo_fd < 0) { if(mca_btl_sm_component.sm_fifo_fd < 0) {
opal_output(0, "mca_btl_sm_component_init: open(%s) failed with errno=%d\n", opal_output(0, "mca_btl_sm_component_init: "
"open(%s) failed with errno=%d\n",
mca_btl_sm_component.sm_fifo_path, errno); mca_btl_sm_component.sm_fifo_path, errno);
return NULL; return NULL;
} }
OBJ_CONSTRUCT(&mca_btl_sm_component.sm_fifo_thread, opal_thread_t); OBJ_CONSTRUCT(&mca_btl_sm_component.sm_fifo_thread, opal_thread_t);
mca_btl_sm_component.sm_fifo_thread.t_run = (opal_thread_fn_t) mca_btl_sm_component_event_thread; mca_btl_sm_component.sm_fifo_thread.t_run =
(opal_thread_fn_t)mca_btl_sm_component_event_thread;
opal_thread_start(&mca_btl_sm_component.sm_fifo_thread); opal_thread_start(&mca_btl_sm_component.sm_fifo_thread);
#endif #endif
mca_btl_sm_component.sm_btls = (mca_btl_sm_t **) malloc( mca_btl_sm_component.sm_max_btls * sizeof (mca_btl_sm_t *)); mca_btl_sm_component.sm_btls =
(mca_btl_sm_t **)malloc(mca_btl_sm_component.sm_max_btls *
sizeof(mca_btl_sm_t *));
if (NULL == mca_btl_sm_component.sm_btls) { if (NULL == mca_btl_sm_component.sm_btls) {
return NULL; return NULL;
} }

Просмотреть файл

@ -4,6 +4,8 @@
# of Tennessee Research Foundation. All rights # of Tennessee Research Foundation. All rights
# reserved. # reserved.
# Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2012 Los Alamos National Security, LLC.
# All rights reserved.
# $COPYRIGHT$ # $COPYRIGHT$
# #
# Additional copyrights may follow # Additional copyrights may follow
@ -12,6 +14,10 @@
# #
# This is the US/English help file for Open MPI's shared memory support. # This is the US/English help file for Open MPI's shared memory support.
# #
[no locality]
WARNING: Missing locality information required for sm initialization.
Continuing without shared memory support.
#
[knem requested but not supported] [knem requested but not supported]
WARNING: Linux kernel knem support was requested for the shared memory WARNING: Linux kernel knem support was requested for the shared memory
(sm) BTL, but it is not supported. Deactivating the shared memory (sm) BTL, but it is not supported. Deactivating the shared memory

Просмотреть файл

@ -42,6 +42,7 @@
#include "opal/align.h" #include "opal/align.h"
#include "opal/util/argv.h" #include "opal/util/argv.h"
#include "opal/mca/shmem/shmem.h"
#if OPAL_ENABLE_FT_CR == 1 #if OPAL_ENABLE_FT_CR == 1
#include "opal/runtime/opal_cr.h" #include "opal/runtime/opal_cr.h"
#endif #endif
@ -162,15 +163,15 @@ attach_and_init(opal_shmem_ds_t *shmem_bufp,
/* ////////////////////////////////////////////////////////////////////////// */ /* ////////////////////////////////////////////////////////////////////////// */
mca_common_sm_module_t * mca_common_sm_module_t *
mca_common_sm_module_create(size_t size, mca_common_sm_module_create_and_attach(size_t size,
char *file_name, char *file_name,
size_t size_ctl_structure, size_t size_ctl_structure,
size_t data_seg_alignment) size_t data_seg_alignment)
{ {
mca_common_sm_module_t *map = NULL; mca_common_sm_module_t *map = NULL;
opal_shmem_ds_t *seg_meta = NULL; opal_shmem_ds_t *seg_meta = NULL;
if (NULL == (seg_meta = (opal_shmem_ds_t *) malloc(sizeof(*seg_meta)))) { if (NULL == (seg_meta = (opal_shmem_ds_t *)malloc(sizeof(*seg_meta)))) {
/* out of resources */ /* out of resources */
return NULL; return NULL;
} }
@ -197,33 +198,39 @@ mca_common_sm_module_attach(opal_shmem_ds_t *seg_meta,
size_t size_ctl_structure, size_t size_ctl_structure,
size_t data_seg_alignment) size_t data_seg_alignment)
{ {
mca_common_sm_module_t *map = NULL;
/* notice that size is 0 here. it really doesn't matter because size WILL /* notice that size is 0 here. it really doesn't matter because size WILL
* NOT be used because this is an attach (first_call is false). */ * NOT be used because this is an attach (first_call is false). */
map = attach_and_init(seg_meta, 0, size_ctl_structure, return attach_and_init(seg_meta, 0, size_ctl_structure,
data_seg_alignment, false); data_seg_alignment, false);
return map;
} }
/* ////////////////////////////////////////////////////////////////////////// */ /* ////////////////////////////////////////////////////////////////////////// */
mca_common_sm_module_t * int
mca_common_sm_init(ompi_proc_t **procs, mca_common_sm_module_unlink(mca_common_sm_module_t *modp)
size_t num_procs,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment)
{ {
/* indicates whether or not i'm the lowest named process */ if (NULL == modp) {
bool lowest_local_proc = false; return OMPI_ERROR;
mca_common_sm_module_t *map = NULL; }
ompi_proc_t *temp_proc = NULL; if (OPAL_SUCCESS != opal_shmem_unlink(&modp->shmem_ds)) {
bool found_lowest = false; return OMPI_ERROR;
size_t num_local_procs = 0, p = 0; }
opal_shmem_ds_t *seg_meta = NULL; return OMPI_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
int
mca_common_sm_local_proc_reorder(ompi_proc_t **procs,
size_t num_procs,
size_t *out_num_local_procs)
{
size_t num_local_procs = 0;
bool found_lowest = false;
ompi_proc_t *temp_proc = NULL;
size_t p;
if (NULL == out_num_local_procs || NULL == procs) {
return OMPI_ERR_BAD_PARAM;
}
/* o reorder procs array to have all the local procs at the beginning. /* o reorder procs array to have all the local procs at the beginning.
* o look for the local proc with the lowest name. * o look for the local proc with the lowest name.
* o determine the number of local procs. * o determine the number of local procs.
@ -240,8 +247,7 @@ mca_common_sm_init(ompi_proc_t **procs,
/* save this proc */ /* save this proc */
procs[num_local_procs] = procs[p]; procs[num_local_procs] = procs[p];
/* if we have a new lowest, swap it with position 0 /* if we have a new lowest, swap it with position 0
* so that procs[0] is always the lowest named proc * so that procs[0] is always the lowest named proc */
*/
if (OPAL_VALUE2_GREATER == if (OPAL_VALUE2_GREATER ==
orte_util_compare_name_fields(ORTE_NS_CMP_ALL, orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
&(procs[p]->proc_name), &(procs[p]->proc_name),
@ -257,6 +263,31 @@ mca_common_sm_init(ompi_proc_t **procs,
++num_local_procs; ++num_local_procs;
} }
} }
*out_num_local_procs = num_local_procs;
return OMPI_SUCCESS;
}
/* ////////////////////////////////////////////////////////////////////////// */
mca_common_sm_module_t *
mca_common_sm_init(ompi_proc_t **procs,
size_t num_procs,
size_t size,
char *file_name,
size_t size_ctl_structure,
size_t data_seg_alignment)
{
/* indicates whether or not i'm the lowest named process */
bool lowest_local_proc = false;
mca_common_sm_module_t *map = NULL;
size_t num_local_procs = 0;
opal_shmem_ds_t *seg_meta = NULL;
if (OMPI_SUCCESS != mca_common_sm_local_proc_reorder(procs,
num_procs,
&num_local_procs)) {
return NULL;
}
/* if there is less than 2 local processes, there's nothing to do. */ /* if there is less than 2 local processes, there's nothing to do. */
if (num_local_procs < 2) { if (num_local_procs < 2) {
@ -270,9 +301,9 @@ mca_common_sm_init(ompi_proc_t **procs,
/* determine whether or not i am the lowest local process */ /* determine whether or not i am the lowest local process */
lowest_local_proc = lowest_local_proc =
(0 == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
ORTE_PROC_MY_NAME, ORTE_PROC_MY_NAME,
&(procs[0]->proc_name))); &(procs[0]->proc_name)));
/* figure out if i am the lowest rank in the group. /* figure out if i am the lowest rank in the group.
* if so, i will create the shared memory backing store * if so, i will create the shared memory backing store
@ -434,4 +465,3 @@ mca_common_sm_fini(mca_common_sm_module_t *mca_common_sm_module)
} }
return rc; return rc;
} }

Просмотреть файл

@ -10,7 +10,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2011 Los Alamos National Security, LLC. * Copyright (c) 2010-2012 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -73,18 +73,31 @@ typedef struct mca_common_sm_module_t {
OBJ_CLASS_DECLARATION(mca_common_sm_module_t); OBJ_CLASS_DECLARATION(mca_common_sm_module_t);
/** /**
* This routine is used to create a shared memory segment (whether * This routine reorders procs array to have all the local procs at the
* it's an mmaped file or a SYSV IPC segment). It is assumed that * beginning and returns the number of local procs through out_num_local_procs.
* The proc with the lowest name is at the beginning of the reordered procs
* array.
*
* @returnvalue OMPI_SUCCESS on success, something else, otherwise.
*/
OMPI_DECLSPEC extern int
mca_common_sm_local_proc_reorder(ompi_proc_t **procs,
size_t num_procs,
size_t *out_num_local_procs);
/**
* This routine is used to create and attach to a shared memory segment
* (whether it's an mmaped file or a SYSV IPC segment). It is assumed that
* the shared memory segment does not exist before this call. * the shared memory segment does not exist before this call.
* *
* @returnvalue pointer to control structure at head of shared memory segment. * @returnvalue pointer to control structure at head of shared memory segment.
* Returns NULL if an error occurred. * Returns NULL if an error occurred.
*/ */
mca_common_sm_module_t * OMPI_DECLSPEC extern mca_common_sm_module_t *
mca_common_sm_module_create(size_t size, mca_common_sm_module_create_and_attach(size_t size,
char *file_name, char *file_name,
size_t size_ctl_structure, size_t size_ctl_structure,
size_t data_seg_alignment); size_t data_seg_alignment);
/** /**
* This routine is used to attach to the shared memory segment associated with * This routine is used to attach to the shared memory segment associated with
@ -96,11 +109,22 @@ mca_common_sm_module_create(size_t size,
* @returnvalue pointer to control structure at head of shared memory segment. * @returnvalue pointer to control structure at head of shared memory segment.
* Returns NULL if an error occurred. * Returns NULL if an error occurred.
*/ */
mca_common_sm_module_t * OMPI_DECLSPEC extern mca_common_sm_module_t *
mca_common_sm_module_attach(opal_shmem_ds_t *seg_meta, mca_common_sm_module_attach(opal_shmem_ds_t *seg_meta,
size_t size_ctl_structure, size_t size_ctl_structure,
size_t data_seg_alignment); size_t data_seg_alignment);
/**
* A thin wrapper around opal_shmem_unlink.
*
* @ modp points to an initialized mca_common_sm_module_t.
*
* @returnvalue OMPI_SUCCESS if the operation completed successfully,
* OMPI_ERROR otherwise.
*/
OMPI_DECLSPEC extern int
mca_common_sm_module_unlink(mca_common_sm_module_t *modp);
/** /**
* This routine is used to set up a shared memory segment (whether * This routine is used to set up a shared memory segment (whether
* it's an mmaped file or a SYSV IPC segment). It is assumed that * it's an mmaped file or a SYSV IPC segment). It is assumed that
@ -164,7 +188,7 @@ mca_common_sm_init_group(ompi_group_t *group,
*/ */
OMPI_DECLSPEC extern void * OMPI_DECLSPEC extern void *
mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool, mca_common_sm_seg_alloc(struct mca_mpool_base_module_t *mpool,
size_t* size, size_t *size,
mca_mpool_base_registration_t **registration); mca_mpool_base_registration_t **registration);
/** /**
@ -189,4 +213,3 @@ OMPI_DECLSPEC extern mca_common_sm_module_t *mca_common_sm_module;
END_C_DECLS END_C_DECLS
#endif /* _COMMON_SM_H_ */ #endif /* _COMMON_SM_H_ */

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Los Alamos National Security, LLC. * Copyright (c) 2010-2012 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -28,6 +28,7 @@
#include "ompi_config.h" #include "ompi_config.h"
#include "opal/mca/event/event.h" #include "opal/mca/event/event.h"
#include "opal/mca/shmem/shmem.h"
#include "ompi/mca/common/sm/common_sm.h" #include "ompi/mca/common/sm/common_sm.h"
#include "ompi/mca/mpool/mpool.h" #include "ompi/mca/mpool/mpool.h"
@ -36,17 +37,19 @@
BEGIN_C_DECLS BEGIN_C_DECLS
struct mca_mpool_sm_component_t { struct mca_mpool_sm_component_t {
mca_mpool_base_component_t super; mca_mpool_base_component_t super;
/* mca_allocator_base_module_t* sm_allocator; */ /* mca_allocator_base_module_t* sm_allocator; */
char* sm_allocator_name; char *sm_allocator_name;
int verbose; int verbose;
/* struct mca_mpool_sm_mmap_t *sm_mmap; */ /* struct mca_mpool_sm_mmap_t *sm_mmap; */
}; };
typedef struct mca_mpool_sm_component_t mca_mpool_sm_component_t; typedef struct mca_mpool_sm_component_t mca_mpool_sm_component_t;
typedef struct mca_mpool_base_resources_t { typedef struct mca_mpool_base_resources_t {
size_t size; size_t size;
int32_t mem_node; int32_t mem_node;
/* backing store metadata */
opal_shmem_ds_t bs_meta_buf;
} mca_mpool_base_resources_t; } mca_mpool_base_resources_t;
OMPI_MODULE_DECLSPEC extern mca_mpool_sm_component_t mca_mpool_sm_component; OMPI_MODULE_DECLSPEC extern mca_mpool_sm_component_t mca_mpool_sm_component;
@ -54,7 +57,7 @@ OMPI_MODULE_DECLSPEC extern mca_mpool_sm_component_t mca_mpool_sm_component;
typedef struct mca_mpool_sm_module_t { typedef struct mca_mpool_sm_module_t {
mca_mpool_base_module_t super; mca_mpool_base_module_t super;
long sm_size; long sm_size;
mca_allocator_base_module_t * sm_allocator; mca_allocator_base_module_t *sm_allocator;
struct mca_mpool_sm_mmap_t *sm_mmap; struct mca_mpool_sm_mmap_t *sm_mmap;
mca_common_sm_module_t *sm_common_module; mca_common_sm_module_t *sm_common_module;
int32_t mem_node; int32_t mem_node;

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Los Alamos National Security, LLC. * Copyright (c) 2010-2012 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -45,10 +45,14 @@
/* /*
* Local functions * Local functions
*/ */
static int mca_mpool_sm_open(void); static int
static int mca_mpool_sm_close( void ); mca_mpool_sm_open(void);
static mca_mpool_base_module_t* mca_mpool_sm_init(
struct mca_mpool_base_resources_t* resources); static int
mca_mpool_sm_close(void);
static mca_mpool_base_module_t *
mca_mpool_sm_init(struct mca_mpool_base_resources_t* resources);
mca_mpool_sm_component_t mca_mpool_sm_component = { mca_mpool_sm_component_t mca_mpool_sm_component = {
{ {
@ -90,8 +94,8 @@ static int mca_mpool_sm_open(void)
/* register SM component parameters */ /* register SM component parameters */
mca_base_param_reg_string(&mca_mpool_sm_component.super.mpool_version, mca_base_param_reg_string(&mca_mpool_sm_component.super.mpool_version,
"allocator", "allocator",
"Name of allocator component to use with sm mpool", "Name of allocator component "
false, false, "to use with sm mpool", false, false,
"bucket", "bucket",
&mca_mpool_sm_component.sm_allocator_name); &mca_mpool_sm_component.sm_allocator_name);
@ -100,18 +104,18 @@ static int mca_mpool_sm_open(void)
* to be set up to 2GB-1 for 32 bit and much greater for 64 bit. */ * to be set up to 2GB-1 for 32 bit and much greater for 64 bit. */
asprintf(&size_str, "%ld", default_min); asprintf(&size_str, "%ld", default_min);
mca_base_param_reg_string(&mca_mpool_sm_component.super.mpool_version, mca_base_param_reg_string(&mca_mpool_sm_component.super.mpool_version,
"min_size", "min_size",
"Minimum size of the sm mpool shared memory file", "Minimum size of the sm mpool shared memory file",
false, false, size_str, &min_size_param); false, false, size_str, &min_size_param);
free(size_str); free(size_str);
mca_base_param_reg_int(&mca_mpool_sm_component.super.mpool_version, mca_base_param_reg_int(&mca_mpool_sm_component.super.mpool_version,
"verbose", "verbose",
"Enable verbose output for mpool sm component", "Enable verbose output for mpool sm component",
false, false, 0, &value); false, false, 0, &value);
if (value != 0) { if (value != 0) {
mca_mpool_sm_component.verbose = opal_output_open(NULL); mca_mpool_sm_component.verbose = opal_output_open(NULL);
} else { } else {
mca_mpool_sm_component.verbose = -1; mca_mpool_sm_component.verbose = -1;
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;
@ -128,41 +132,44 @@ static int mca_mpool_sm_close( void )
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
static mca_mpool_base_module_t* mca_mpool_sm_init( static mca_mpool_base_module_t *
struct mca_mpool_base_resources_t* resources) mca_mpool_sm_init(struct mca_mpool_base_resources_t *resources)
{ {
char *file_name; mca_mpool_sm_module_t *mpool_module;
int len;
mca_mpool_sm_module_t* mpool_module;
mca_allocator_base_component_t* allocator_component; mca_allocator_base_component_t* allocator_component;
long min_size; long min_size;
ompi_proc_t **procs; ompi_proc_t **procs;
size_t num_all_procs, i, num_local_procs = 0; size_t num_all_procs, i, num_local_procs = 0;
/* README: this needs to change if procs in different jobs (even /* README: this needs to change if procs in different jobs (even
spawned ones) are to talk using shared memory */ * spawned ones) are to talk using shared memory */
procs = ompi_proc_world(&num_all_procs); if (NULL == (procs = ompi_proc_world(&num_all_procs))) {
/* out of resources, so just bail */
return NULL;
}
for (i = 0 ; i < num_all_procs ; ++i) { for (i = 0 ; i < num_all_procs ; ++i) {
if (OPAL_PROC_ON_LOCAL_NODE(procs[i]->proc_flags)) { if (OPAL_PROC_ON_LOCAL_NODE(procs[i]->proc_flags)) {
num_local_procs++; num_local_procs++;
} }
} }
/* parse the min size and validate it */ /* parse the min size and validate it */
/* if other parameters are added, absolutely necessary to reset errno each time */ /* if other parameters are added, absolutely
* necessary to reset errno each time */
errno = 0; errno = 0;
min_size = strtol(min_size_param, (char **)NULL, 10); min_size = strtol(min_size_param, (char **)NULL, 10);
if (errno == ERANGE) { if (errno == ERANGE) {
opal_output(0, "mca_mpool_sm_init: min_size overflows! set to default (%ld)", default_min); opal_output(0, "mca_mpool_sm_init: min_size overflows! "
"set to default (%ld)", default_min);
min_size = default_min; min_size = default_min;
} else if (errno == EINVAL) { } else if (errno == EINVAL) {
opal_output(0, "mca_mpool_sm_init: invalid min_size entered. set it to (%ld)", default_min); opal_output(0, "mca_mpool_sm_init: invalid min_size entered. "
"set it to (%ld)", default_min);
min_size = default_min; min_size = default_min;
} }
/* Make a new mpool module */ /* Make a new mpool module */
mpool_module = mpool_module =
(mca_mpool_sm_module_t*)malloc(sizeof(mca_mpool_sm_module_t)); (mca_mpool_sm_module_t *)malloc(sizeof(mca_mpool_sm_module_t));
mca_mpool_sm_module_init(mpool_module); mca_mpool_sm_module_init(mpool_module);
/* set sm_size */ /* set sm_size */
@ -173,23 +180,26 @@ static mca_mpool_base_module_t* mca_mpool_sm_init(
mpool_module->sm_size = min_size; mpool_module->sm_size = min_size;
} }
/* add something for the control structure */
mpool_module->sm_size += sizeof(mca_common_sm_module_t);
allocator_component = mca_allocator_component_lookup( allocator_component = mca_allocator_component_lookup(
mca_mpool_sm_component.sm_allocator_name); mca_mpool_sm_component.sm_allocator_name);
/* if specified allocator cannot be loaded - look for an alternative */ /* if specified allocator cannot be loaded - look for an alternative */
if(NULL == allocator_component) { if (NULL == allocator_component) {
if(opal_list_get_size(&mca_allocator_base_components) == 0) { if (opal_list_get_size(&mca_allocator_base_components) == 0) {
mca_base_component_list_item_t* item = (mca_base_component_list_item_t*) mca_base_component_list_item_t *item =
(mca_base_component_list_item_t *)
opal_list_get_first(&mca_allocator_base_components); opal_list_get_first(&mca_allocator_base_components);
allocator_component = (mca_allocator_base_component_t*)item->cli_component; allocator_component =
opal_output(0, "mca_mpool_sm_init: unable to locate allocator: %s - using %s\n", (mca_allocator_base_component_t *)item->cli_component;
mca_mpool_sm_component.sm_allocator_name, allocator_component->allocator_version.mca_component_name); opal_output(
0, "mca_mpool_sm_init: "
"unable to locate allocator: %s - using %s\n",
mca_mpool_sm_component.sm_allocator_name,
allocator_component->allocator_version.mca_component_name);
} else { } else {
opal_output(0, "mca_mpool_sm_init: unable to locate allocator: %s\n", opal_output(0, "mca_mpool_sm_init: "
mca_mpool_sm_component.sm_allocator_name); "unable to locate allocator: %s\n",
mca_mpool_sm_component.sm_allocator_name);
free(procs); free(procs);
return NULL; return NULL;
} }
@ -197,41 +207,28 @@ static mca_mpool_base_module_t* mca_mpool_sm_init(
mpool_module->mem_node = resources->mem_node; mpool_module->mem_node = resources->mem_node;
/* create initial shared memory mapping */
len = asprintf( &file_name, "%s"OPAL_PATH_SEP"shared_mem_pool.%s",
orte_process_info.job_session_dir,
orte_process_info.nodename );
if ( 0 > len ) {
free(mpool_module);
free(procs);
return NULL;
}
opal_output(mca_mpool_sm_component.verbose, opal_output(mca_mpool_sm_component.verbose,
"mca_mpool_sm_init: shared memory size used: (%ld)", "mca_mpool_sm_init: shared memory size used: (%ld)",
mpool_module->sm_size); mpool_module->sm_size);
if (NULL == (mpool_module->sm_common_module = if (NULL == (mpool_module->sm_common_module =
mca_common_sm_init(procs, num_all_procs, mca_common_sm_module_attach(&resources->bs_meta_buf,
mpool_module->sm_size,
file_name,
sizeof(mca_common_sm_module_t), 8))) { sizeof(mca_common_sm_module_t), 8))) {
opal_output(mca_mpool_sm_component.verbose, opal_output(mca_mpool_sm_component.verbose, "mca_mpool_sm_init: "
"mca_mpool_sm_init: unable to create shared memory mapping (%s)", file_name); "unable to create shared memory mapping (%s)",
free(file_name); resources->bs_meta_buf.seg_name);
free(mpool_module); free(mpool_module);
free(procs); free(procs);
return NULL; return NULL;
} }
free(procs); free(procs);
free(file_name);
/* setup allocator */ /* setup allocator */
mpool_module->sm_allocator = mpool_module->sm_allocator =
allocator_component->allocator_init(true, allocator_component->allocator_init(true,
mca_common_sm_seg_alloc, mca_common_sm_seg_alloc,
NULL, &(mpool_module->super)); NULL, &(mpool_module->super));
if(NULL == mpool_module->sm_allocator) { if (NULL == mpool_module->sm_allocator) {
opal_output(0, "mca_mpool_sm_init: unable to initialize allocator"); opal_output(0, "mca_mpool_sm_init: unable to initialize allocator");
free(mpool_module); free(mpool_module);
return NULL; return NULL;

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2010-2011 Los Alamos National Security, LLC. * Copyright (c) 2010-2012 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* *
* $COPYRIGHT$ * $COPYRIGHT$
@ -115,13 +115,12 @@ opal_shmem_mmap_module_t opal_shmem_mmap_module = {
static inline void static inline void
shmem_ds_reset(opal_shmem_ds_t *ds_buf) shmem_ds_reset(opal_shmem_ds_t *ds_buf)
{ {
/* don't print ds_buf info here, as we may be printing garbage. */
OPAL_OUTPUT_VERBOSE( OPAL_OUTPUT_VERBOSE(
(70, opal_shmem_base_output, (70, opal_shmem_base_output,
"%s: %s: shmem_ds_resetting " "%s: %s: shmem_ds_resetting\n",
"(id: %d, size: %lu, name: %s)\n",
mca_shmem_mmap_component.super.base_version.mca_type_name, mca_shmem_mmap_component.super.base_version.mca_type_name,
mca_shmem_mmap_component.super.base_version.mca_component_name, mca_shmem_mmap_component.super.base_version.mca_component_name)
ds_buf->seg_id, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
); );
ds_buf->seg_cpid = 0; ds_buf->seg_cpid = 0;

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2010-2011 Los Alamos National Security, LLC. * Copyright (c) 2010-2012 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* *
* $COPYRIGHT$ * $COPYRIGHT$
@ -108,13 +108,12 @@ opal_shmem_posix_module_t opal_shmem_posix_module = {
static inline void static inline void
shmem_ds_reset(opal_shmem_ds_t *ds_buf) shmem_ds_reset(opal_shmem_ds_t *ds_buf)
{ {
/* don't print ds_buf info here, as we may be printing garbage. */
OPAL_OUTPUT_VERBOSE( OPAL_OUTPUT_VERBOSE(
(70, opal_shmem_base_output, (70, opal_shmem_base_output,
"%s: %s: shmem_ds_resetting " "%s: %s: shmem_ds_resetting\n",
"(id: %d, size: %lu, name: %s)\n",
mca_shmem_posix_component.super.base_version.mca_type_name, mca_shmem_posix_component.super.base_version.mca_type_name,
mca_shmem_posix_component.super.base_version.mca_component_name, mca_shmem_posix_component.super.base_version.mca_component_name)
ds_buf->seg_id, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
); );
ds_buf->seg_cpid = 0; ds_buf->seg_cpid = 0;

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2010-2011 Los Alamos National Security, LLC. * Copyright (c) 2010-2012 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* *
* $COPYRIGHT$ * $COPYRIGHT$
@ -113,13 +113,12 @@ opal_shmem_sysv_module_t opal_shmem_sysv_module = {
static inline void static inline void
shmem_ds_reset(opal_shmem_ds_t *ds_buf) shmem_ds_reset(opal_shmem_ds_t *ds_buf)
{ {
/* don't print ds_buf info here, as we may be printing garbage. */
OPAL_OUTPUT_VERBOSE( OPAL_OUTPUT_VERBOSE(
(70, opal_shmem_base_output, (70, opal_shmem_base_output,
"%s: %s: shmem_ds_resetting " "%s: %s: shmem_ds_resetting\n",
"(id: %d, size: %lu, name: %s)\n",
mca_shmem_sysv_component.super.base_version.mca_type_name, mca_shmem_sysv_component.super.base_version.mca_type_name,
mca_shmem_sysv_component.super.base_version.mca_component_name, mca_shmem_sysv_component.super.base_version.mca_component_name)
ds_buf->seg_id, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
); );
ds_buf->seg_cpid = 0; ds_buf->seg_cpid = 0;

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2010-2011 Los Alamos National Security, LLC. * Copyright (c) 2010-2012 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* *
* $COPYRIGHT$ * $COPYRIGHT$
@ -107,13 +107,12 @@ opal_shmem_windows_module_t opal_shmem_windows_module = {
static inline void static inline void
shmem_ds_reset(opal_shmem_ds_t *ds_buf) shmem_ds_reset(opal_shmem_ds_t *ds_buf)
{ {
/* don't print ds_buf info here, as we may be printing garbage. */
OPAL_OUTPUT_VERBOSE( OPAL_OUTPUT_VERBOSE(
(70, opal_shmem_base_output, (70, opal_shmem_base_output,
"%s: %s: shmem_ds_resetting " "%s: %s: shmem_ds_resetting\n",
"(id: %d, size: %"PRIsize_t", name: %s)\n",
mca_shmem_windows_component.super.base_version.mca_type_name, mca_shmem_windows_component.super.base_version.mca_type_name,
mca_shmem_windows_component.super.base_version.mca_component_name, mca_shmem_windows_component.super.base_version.mca_component_name)
ds_buf->seg_id, ds_buf->seg_size, ds_buf->seg_name)
); );
ds_buf->seg_cpid = 0; ds_buf->seg_cpid = 0;