1
1

Fix for a bug Galen noticed yesterday -- make the shared memory only

be allocated the first time a sm coll is selected for a communicator,
not before.

This commit was SVN r7647.
Этот коммит содержится в:
Jeff Squyres 2005-10-06 13:17:27 +00:00
родитель 1fe18814da
Коммит b22fab2826
3 изменённых файлов: 81 добавлений и 27 удалений

Просмотреть файл

@ -37,7 +37,7 @@
#define D(foo) #define D(foo)
#endif #endif
#if 0 #if OMPI_ENABLE_DEBUG
#include <sched.h> #include <sched.h>
#define SPIN sched_yield() #define SPIN sched_yield()
#else #else
@ -190,6 +190,21 @@ extern "C" {
the division once and then just use the value without the division once and then just use the value without
having to re-calculate. */ having to re-calculate. */
int sm_segs_per_inuse_flag; int sm_segs_per_inuse_flag;
/** Whether the component's shared memory has been [lazily]
initialized or not */
bool sm_component_setup;
/** Once the component has been lazily initialized, keep the
state of it around */
bool sm_component_setup_success;
/** A lock protecting the lazy initialzation of the component
(SINCE THERE IS NO STATIC INITIALIZER FOR
opal_atomic_lock_t, THIS *MUST* BE THE LAST MEMBER OF THE
STRUCT!) */
opal_atomic_lock_t sm_component_setup_lock;
}; };
/** /**
* Convenience typedef * Convenience typedef

Просмотреть файл

@ -133,7 +133,11 @@ mca_coll_sm_component_t mca_coll_sm_component = {
0, /* mpool data size -- filled in below */ 0, /* mpool data size -- filled in below */
NULL, /* data mpool pointer */ NULL, /* data mpool pointer */
false, /* whether this process created the data mpool */ false, /* whether this process created the data mpool */
NULL /* pointer to meta data about bootstrap area */ NULL, /* pointer to meta data about bootstrap area */
false, /* whether the component sm has been [lazily] inited or not */
false /* whether lazy init was successful or not */
/* the lock for lazy initialization is not initialized here --
there is no static initializer for opal_atomic_lock_t */
}; };

Просмотреть файл

@ -110,7 +110,6 @@ static const mca_coll_base_module_1_0_0_t module = {
int mca_coll_sm_init_query(bool enable_progress_threads, int mca_coll_sm_init_query(bool enable_progress_threads,
bool enable_mpi_threads) bool enable_mpi_threads)
{ {
int ret;
#if 0 #if 0
/* JMS: Arrgh. Unfortunately, we don't have this information by /* JMS: Arrgh. Unfortunately, we don't have this information by
the time this is invoked -- the GPR compound command doesn't the time this is invoked -- the GPR compound command doesn't
@ -132,29 +131,12 @@ int mca_coll_sm_init_query(bool enable_progress_threads,
free(procs); free(procs);
#endif #endif
/* Ok, we have local peers. So setup the bootstrap file */ /* Don't do much here because we don't really want to allocate any
shared memory until this component is selected to be used. */
if (OMPI_SUCCESS != (ret = bootstrap_init())) {
return ret;
}
/* Can we get an mpool allocation? See if there was one created
already. If not, try to make one. */
mca_coll_sm_component.sm_data_mpool =
mca_mpool_base_module_lookup(mca_coll_sm_component.sm_mpool_name);
if (NULL == mca_coll_sm_component.sm_data_mpool) {
mca_coll_sm_component.sm_data_mpool =
mca_mpool_base_module_create(mca_coll_sm_component.sm_mpool_name,
NULL, NULL);
if (NULL == mca_coll_sm_component.sm_data_mpool) {
mca_coll_sm_bootstrap_finalize();
return OMPI_ERR_OUT_OF_RESOURCE;
}
mca_coll_sm_component.sm_data_mpool_created = true;
} else {
mca_coll_sm_component.sm_data_mpool_created = false; mca_coll_sm_component.sm_data_mpool_created = false;
} mca_coll_sm_component.sm_component_setup = false;
opal_atomic_init(&mca_coll_sm_component.sm_component_setup_lock, 0);
/* Alles gut */ /* Alles gut */
@ -171,6 +153,13 @@ const mca_coll_base_module_1_0_0_t *
mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority, mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority,
struct mca_coll_base_comm_t **data) struct mca_coll_base_comm_t **data)
{ {
/* See if someone has previously lazily initialized and failed */
if (mca_coll_sm_component.sm_component_setup &&
!mca_coll_sm_component.sm_component_setup_success) {
return NULL;
}
/* If we're intercomm, or if there's only one process in the /* If we're intercomm, or if there's only one process in the
communicator, or if not all the processes in the communicator communicator, or if not all the processes in the communicator
are not on this node, then we don't want to run */ are not on this node, then we don't want to run */
@ -227,6 +216,51 @@ sm_module_init(struct ompi_communicator_t *comm)
char *base; char *base;
const int num_barrier_buffers = 2; const int num_barrier_buffers = 2;
/* Once-per-component setup. This may happen at any time --
during MPI_INIT or later. So we must protect this with locks
to ensure that only one thread in the process actually does
this setup. */
opal_atomic_lock(&mca_coll_sm_component.sm_component_setup_lock);
if (!mca_coll_sm_component.sm_component_setup) {
mca_coll_sm_component.sm_component_setup = true;
if (OMPI_SUCCESS != bootstrap_init()) {
mca_coll_sm_component.sm_component_setup_success = false;
opal_atomic_unlock(&mca_coll_sm_component.sm_component_setup_lock);
return NULL;
}
/* Can we get an mpool allocation? See if there was one created
already. If not, try to make one. */
mca_coll_sm_component.sm_data_mpool =
mca_mpool_base_module_lookup(mca_coll_sm_component.sm_mpool_name);
if (NULL == mca_coll_sm_component.sm_data_mpool) {
mca_coll_sm_component.sm_data_mpool =
mca_mpool_base_module_create(mca_coll_sm_component.sm_mpool_name,
NULL, NULL);
if (NULL == mca_coll_sm_component.sm_data_mpool) {
mca_coll_sm_bootstrap_finalize();
mca_coll_sm_component.sm_component_setup_success = false;
opal_atomic_unlock(&mca_coll_sm_component.sm_component_setup_lock);
return NULL;
}
mca_coll_sm_component.sm_data_mpool_created = true;
} else {
mca_coll_sm_component.sm_data_mpool_created = false;
}
mca_coll_sm_component.sm_component_setup_success = true;
}
opal_atomic_unlock(&mca_coll_sm_component.sm_component_setup_lock);
/* Double check to see if some interleaved lazy init failed before
we got in here */
if (!mca_coll_sm_component.sm_component_setup_success) {
return NULL;
}
/* Get some space to setup memory affinity (just easier to try to /* Get some space to setup memory affinity (just easier to try to
alloc here to handle the error case) */ alloc here to handle the error case) */
@ -727,8 +761,9 @@ static int bootstrap_comm(ompi_communicator_t *comm)
/* /*
* This function is not static and has a prefix-rule-enabled name * This function is not static and has a prefix-rule-enabled name
* because it gets called from the component. This is only called * because it gets called from the component (but may also be called
* once -- no need for reference counting or thread protection. * from above). This is only called once -- no need for reference
* counting or thread protection.
*/ */
int mca_coll_sm_bootstrap_finalize(void) int mca_coll_sm_bootstrap_finalize(void)
{ {