diff --git a/ompi/mca/btl/sm/btl_sm.c b/ompi/mca/btl/sm/btl_sm.c index 0adff49cf9..30adbac113 100644 --- a/ompi/mca/btl/sm/btl_sm.c +++ b/ompi/mca/btl/sm/btl_sm.c @@ -227,11 +227,6 @@ static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n) mca_btl_sm_component.sm_mpool_base = mca_btl_sm_component.sm_mpools[0]->mpool_base(mca_btl_sm_component.sm_mpools[0]); - /* set the shared memory offset */ - mca_btl_sm_component.sm_offset = (ptrdiff_t*)calloc(n, sizeof(ptrdiff_t)); - if(NULL == mca_btl_sm_component.sm_offset) - return OMPI_ERR_OUT_OF_RESOURCE; - /* create a list of peers */ mca_btl_sm_component.sm_peers = (struct mca_btl_base_endpoint_t**) calloc(n, sizeof(struct mca_btl_base_endpoint_t*)); @@ -277,7 +272,7 @@ static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n) return OMPI_ERROR; } - mca_btl_sm_component.shm_fifo = (sm_fifo_t **)mca_btl_sm_component.mmap_file->data_addr; + mca_btl_sm_component.shm_fifo = (volatile sm_fifo_t **)mca_btl_sm_component.mmap_file->data_addr; mca_btl_sm_component.shm_bases = (char**)(mca_btl_sm_component.shm_fifo + n); mca_btl_sm_component.shm_mem_nodes = (uint16_t*)(mca_btl_sm_component.shm_bases + n); @@ -293,8 +288,6 @@ static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n) mca_btl_sm_component.shm_fifo[mca_btl_sm_component.my_smp_rank] = my_fifos; - opal_atomic_wmb(); - /* cache the pointer to the 2d fifo array. These addresses * are valid in the current process space */ mca_btl_sm_component.fifo = (sm_fifo_t**)malloc(sizeof(sm_fifo_t*) * n); @@ -499,14 +492,16 @@ int mca_btl_sm_add_procs( goto CLEANUP; } + opal_atomic_wmb(); + /* Sync with other local procs. Force the FIFO initialization to always * happens before the readers access it. */ opal_atomic_add_32( &mca_btl_sm_component.mmap_file->map_seg->seg_inited, 1); while( n_local_procs > mca_btl_sm_component.mmap_file->map_seg->seg_inited) { - opal_atomic_rmb(); opal_progress(); + opal_atomic_rmb(); } /* coordinate with other processes */ @@ -516,14 +511,14 @@ int mca_btl_sm_add_procs( /* spin until this element is allocated */ /* doesn't really wait for that process... FIFO might be allocated, but not initialized */ + opal_atomic_rmb(); while(NULL == mca_btl_sm_component.shm_fifo[j]) { - opal_atomic_rmb(); opal_progress(); + opal_atomic_rmb(); } /* Calculate the difference as (my_base - their_base) */ diff = ADDR2OFFSET(bases[my_smp_rank], bases[j]); - mca_btl_sm_component.sm_offset[j] = diff; /* store local address of remote fifos */ mca_btl_sm_component.fifo[j] = diff --git a/ompi/mca/btl/sm/btl_sm.h b/ompi/mca/btl/sm/btl_sm.h index ddf4ef44f5..c4e101f907 100644 --- a/ompi/mca/btl/sm/btl_sm.h +++ b/ompi/mca/btl/sm/btl_sm.h @@ -134,7 +134,7 @@ struct mca_btl_sm_component_t { mca_common_sm_mmap_t *mmap_file; /**< description of mmap'ed file */ mca_common_sm_file_header_t *sm_ctl_header; /* control header in shared memory */ - sm_fifo_t **shm_fifo; /**< pointer to fifo 2D array in shared memory */ + volatile sm_fifo_t **shm_fifo; /**< pointer to fifo 2D array in shared memory */ char **shm_bases; /**< pointer to base pointers in shared memory */ uint16_t *shm_mem_nodes; /**< pointer to mem noded in shared memory */ sm_fifo_t **fifo; /**< cached copy of the pointer to the 2D @@ -146,8 +146,6 @@ struct mca_btl_sm_component_t { size_t fifo_size; /**< number of FIFO queue entries */ size_t fifo_lazy_free; /**< number of reads before lazy fifo free is triggered */ int nfifos; /**< number of FIFOs per receiver */ - ptrdiff_t *sm_offset; /**< offset to be applied to shared memory - addresses, per local process value */ int32_t num_smp_procs; /**< current number of smp procs on this host */ int32_t my_smp_rank; /**< My SMP process rank. Used for accessing * SMP specfic data structures. */