Get rid of separate sm BTL for different shared memory base addresses. Now,
when we precalculate most of the addresses there is no point to have separate BTL for this. The sm_progress() code become much more simple as a result. This commit was SVN r14071.
Этот коммит содержится в:
родитель
803655b555
Коммит
e551c5f1a3
@ -82,38 +82,8 @@
|
|||||||
* cases, the receiver doesn't have to do anything.
|
* cases, the receiver doesn't have to do anything.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
mca_btl_sm_t mca_btl_sm[2] = {
|
mca_btl_sm_t mca_btl_sm = {
|
||||||
{
|
{
|
||||||
{
|
|
||||||
&mca_btl_sm_component.super,
|
|
||||||
0, /* btl_eager_limit */
|
|
||||||
0, /* btl_min_send_size */
|
|
||||||
0, /* btl_max_send_size */
|
|
||||||
0, /* btl_min_rdma_size */
|
|
||||||
0, /* btl_max_rdma_size */
|
|
||||||
0, /* btl_exclusivity */
|
|
||||||
0, /* btl_latency */
|
|
||||||
0, /* btl_bandwidth */
|
|
||||||
0, /* btl flags */
|
|
||||||
mca_btl_sm_add_procs_same_base_addr,
|
|
||||||
mca_btl_sm_del_procs,
|
|
||||||
mca_btl_sm_register,
|
|
||||||
mca_btl_sm_finalize,
|
|
||||||
mca_btl_sm_alloc,
|
|
||||||
mca_btl_sm_free,
|
|
||||||
mca_btl_sm_prepare_src,
|
|
||||||
NULL,
|
|
||||||
mca_btl_sm_send,
|
|
||||||
NULL, /* put */
|
|
||||||
NULL, /* get */
|
|
||||||
mca_btl_base_dump,
|
|
||||||
NULL, /* mpool */
|
|
||||||
mca_btl_sm_register_error_cb, /* register error */
|
|
||||||
mca_btl_sm_ft_event
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
{
|
|
||||||
&mca_btl_sm_component.super,
|
&mca_btl_sm_component.super,
|
||||||
0, /* btl_eager_limit */
|
0, /* btl_eager_limit */
|
||||||
0, /* btl_min_send_size */
|
0, /* btl_min_send_size */
|
||||||
@ -128,26 +98,35 @@ mca_btl_sm_t mca_btl_sm[2] = {
|
|||||||
mca_btl_sm_del_procs,
|
mca_btl_sm_del_procs,
|
||||||
mca_btl_sm_register,
|
mca_btl_sm_register,
|
||||||
mca_btl_sm_finalize,
|
mca_btl_sm_finalize,
|
||||||
mca_btl_sm_alloc,
|
mca_btl_sm_alloc,
|
||||||
mca_btl_sm_free,
|
mca_btl_sm_free,
|
||||||
mca_btl_sm_prepare_src,
|
mca_btl_sm_prepare_src,
|
||||||
NULL,
|
NULL,
|
||||||
mca_btl_sm_send,
|
mca_btl_sm_send,
|
||||||
NULL, /* put function */
|
NULL, /* put */
|
||||||
NULL, /* get function */
|
NULL, /* get */
|
||||||
mca_btl_base_dump,
|
mca_btl_base_dump,
|
||||||
NULL, /* mpool */
|
NULL, /* mpool */
|
||||||
mca_btl_sm_register_error_cb, /* register error */
|
mca_btl_sm_register_error_cb, /* register error */
|
||||||
mca_btl_sm_ft_event
|
mca_btl_sm_ft_event
|
||||||
}
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/* track information needed to synchronise a Shared Memory BTL module */
|
/* track information needed to synchronise a Shared Memory BTL module */
|
||||||
mca_btl_sm_module_resource_t mca_btl_sm_module_resource;
|
mca_btl_sm_module_resource_t mca_btl_sm_module_resource;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* calculate offset of an address from the beginning of a shared memory segment
|
||||||
|
*/
|
||||||
|
#define ADDR2OFFSET(ADDR, BASE) ((char*)(ADDR) - (char*)(BASE))
|
||||||
|
|
||||||
int mca_btl_sm_add_procs_same_base_addr(
|
/*
|
||||||
|
* calculate an absolute address in a local address space given an offset and
|
||||||
|
* a base address of a shared memory segment
|
||||||
|
*/
|
||||||
|
#define OFFSET2ADDR(OFFSET, BASE) ((ptrdiff_t)(OFFSET) + (char*)(BASE))
|
||||||
|
|
||||||
|
int mca_btl_sm_add_procs(
|
||||||
struct mca_btl_base_module_t* btl,
|
struct mca_btl_base_module_t* btl,
|
||||||
size_t nprocs,
|
size_t nprocs,
|
||||||
struct ompi_proc_t **procs,
|
struct ompi_proc_t **procs,
|
||||||
@ -155,35 +134,25 @@ int mca_btl_sm_add_procs_same_base_addr(
|
|||||||
ompi_bitmap_t* reachability)
|
ompi_bitmap_t* reachability)
|
||||||
{
|
{
|
||||||
int return_code = OMPI_SUCCESS, cnt, len;
|
int return_code = OMPI_SUCCESS, cnt, len;
|
||||||
size_t i, j, size, n_to_allocate, length;
|
size_t size, length;
|
||||||
int32_t n_local_procs, proc;
|
int32_t n_local_procs, proc, j, n_to_allocate, i;
|
||||||
ompi_proc_t* my_proc; /* pointer to caller's proc structure */
|
ompi_proc_t* my_proc; /* pointer to caller's proc structure */
|
||||||
mca_btl_sm_t *btl_sm;
|
mca_btl_sm_t *btl_sm;
|
||||||
ompi_fifo_t *my_fifos;
|
ompi_fifo_t *my_fifos;
|
||||||
ompi_fifo_t * volatile *fifo_tmp;
|
ompi_fifo_t * volatile *fifo_tmp;
|
||||||
bool same_sm_base;
|
|
||||||
ptrdiff_t diff;
|
ptrdiff_t diff;
|
||||||
volatile char **tmp_ptr;
|
volatile char **tmp_ptr;
|
||||||
volatile int *tmp_int_ptr;
|
volatile int *tmp_int_ptr;
|
||||||
bool have_connected_peer = false;
|
bool have_connected_peer = false;
|
||||||
|
|
||||||
/* initializion */
|
/* initializion */
|
||||||
for( i = 0 ; i < nprocs ; i++ ) {
|
|
||||||
peers[i] = NULL;
|
|
||||||
}
|
|
||||||
btl_sm=(mca_btl_sm_t *)btl;
|
btl_sm=(mca_btl_sm_t *)btl;
|
||||||
|
|
||||||
/* allocate array to hold setup shared memory from all
|
/* allocate array to hold setup shared memory from all
|
||||||
* other procs */
|
* other procs */
|
||||||
mca_btl_sm_component.sm_proc_connect=(int *) malloc(nprocs*sizeof(int));
|
mca_btl_sm_component.sm_proc_connect=(int *)malloc(nprocs*sizeof(int));
|
||||||
if( NULL == mca_btl_sm_component.sm_proc_connect ){
|
if( NULL == mca_btl_sm_component.sm_proc_connect ){
|
||||||
return_code=OMPI_ERR_OUT_OF_RESOURCE;
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
goto CLEANUP;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* initialize and sm_proc_connect*/
|
|
||||||
for( proc = 0 ; proc < (int32_t)nprocs ; proc++ ) {
|
|
||||||
mca_btl_sm_component.sm_proc_connect[proc] = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* get pointer to my proc structure */
|
/* get pointer to my proc structure */
|
||||||
@ -227,7 +196,7 @@ int mca_btl_sm_add_procs_same_base_addr(
|
|||||||
return_code=OMPI_ERR_OUT_OF_RESOURCE;
|
return_code=OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
goto CLEANUP;
|
goto CLEANUP;
|
||||||
}
|
}
|
||||||
peer->peer_smp_rank=n_local_procs+
|
peer->peer_smp_rank = n_local_procs +
|
||||||
mca_btl_sm_component.num_smp_procs;
|
mca_btl_sm_component.num_smp_procs;
|
||||||
|
|
||||||
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||||
@ -268,7 +237,7 @@ int mca_btl_sm_add_procs_same_base_addr(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* make sure that my_smp_rank has been defined */
|
/* make sure that my_smp_rank has been defined */
|
||||||
if( 0xFFFFFFFF == mca_btl_sm_component.my_smp_rank ) {
|
if( -1 == mca_btl_sm_component.my_smp_rank ) {
|
||||||
return_code=OMPI_ERROR;
|
return_code=OMPI_ERROR;
|
||||||
goto CLEANUP;
|
goto CLEANUP;
|
||||||
}
|
}
|
||||||
@ -290,7 +259,7 @@ int mca_btl_sm_add_procs_same_base_addr(
|
|||||||
|
|
||||||
/* make sure n_to_allocate is greater than 0 */
|
/* make sure n_to_allocate is greater than 0 */
|
||||||
|
|
||||||
if ( !mca_btl_sm[0].btl_inited ) {
|
if ( !mca_btl_sm.btl_inited ) {
|
||||||
/* set the shared memory offset */
|
/* set the shared memory offset */
|
||||||
mca_btl_sm_component.sm_offset=(ptrdiff_t*)
|
mca_btl_sm_component.sm_offset=(ptrdiff_t*)
|
||||||
malloc(n_to_allocate*sizeof(ptrdiff_t));
|
malloc(n_to_allocate*sizeof(ptrdiff_t));
|
||||||
@ -324,7 +293,7 @@ int mca_btl_sm_add_procs_same_base_addr(
|
|||||||
/*
|
/*
|
||||||
* Create backing file - only first time through
|
* Create backing file - only first time through
|
||||||
*/
|
*/
|
||||||
if ( !mca_btl_sm[0].btl_inited ) {
|
if ( !mca_btl_sm.btl_inited ) {
|
||||||
/* set file name */
|
/* set file name */
|
||||||
len=asprintf(&(mca_btl_sm_component.sm_resouce_ctl_file),
|
len=asprintf(&(mca_btl_sm_component.sm_resouce_ctl_file),
|
||||||
"%s"OPAL_PATH_SEP"shared_mem_btl_module.%s",orte_process_info.job_session_dir,
|
"%s"OPAL_PATH_SEP"shared_mem_btl_module.%s",orte_process_info.job_session_dir,
|
||||||
@ -363,7 +332,8 @@ int mca_btl_sm_add_procs_same_base_addr(
|
|||||||
/* check to make sure number of local procs is within the
|
/* check to make sure number of local procs is within the
|
||||||
* specified limits */
|
* specified limits */
|
||||||
if( ( 0 < mca_btl_sm_component.sm_max_procs ) &&
|
if( ( 0 < mca_btl_sm_component.sm_max_procs ) &&
|
||||||
( n_local_procs > mca_btl_sm_component.sm_max_procs) ) {
|
( mca_btl_sm_component.num_smp_procs + n_local_procs >
|
||||||
|
mca_btl_sm_component.sm_max_procs) ) {
|
||||||
return_code=OMPI_ERROR;
|
return_code=OMPI_ERROR;
|
||||||
goto CLEANUP;
|
goto CLEANUP;
|
||||||
}
|
}
|
||||||
@ -379,10 +349,6 @@ int mca_btl_sm_add_procs_same_base_addr(
|
|||||||
return_code=OMPI_ERR_OUT_OF_RESOURCE;
|
return_code=OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
goto CLEANUP;
|
goto CLEANUP;
|
||||||
}
|
}
|
||||||
/* initiazlize the pointer array */
|
|
||||||
for(i=0 ; i < n_to_allocate ; i++ ) {
|
|
||||||
mca_btl_sm_component.sm_ctl_header->fifo[i]=NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* allocate and initialize the array to hold the virtual address
|
/* allocate and initialize the array to hold the virtual address
|
||||||
* of the shared memory base */
|
* of the shared memory base */
|
||||||
@ -395,11 +361,6 @@ int mca_btl_sm_add_procs_same_base_addr(
|
|||||||
return_code=OMPI_ERR_OUT_OF_RESOURCE;
|
return_code=OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
goto CLEANUP;
|
goto CLEANUP;
|
||||||
}
|
}
|
||||||
/* initialize the pointer array */
|
|
||||||
for(i=0 ; i < n_to_allocate ; i++ ) {
|
|
||||||
mca_btl_sm_component.sm_ctl_header->segment_header.
|
|
||||||
base_shared_mem_segment[i]=NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* allocate and initialize the array of flags indicating
|
/* allocate and initialize the array of flags indicating
|
||||||
* when the virtual address of the shared memory address
|
* when the virtual address of the shared memory address
|
||||||
@ -416,26 +377,28 @@ int mca_btl_sm_add_procs_same_base_addr(
|
|||||||
for(i=0 ; i < n_to_allocate ; i++ ) {
|
for(i=0 ; i < n_to_allocate ; i++ ) {
|
||||||
mca_btl_sm_component.sm_ctl_header->segment_header.
|
mca_btl_sm_component.sm_ctl_header->segment_header.
|
||||||
base_shared_mem_flags[i]=0;
|
base_shared_mem_flags[i]=0;
|
||||||
|
mca_btl_sm_component.sm_ctl_header->fifo[i]=NULL;
|
||||||
|
mca_btl_sm_component.sm_ctl_header->segment_header.
|
||||||
|
base_shared_mem_segment[i]=NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* set the addresses to be a relative, so that
|
/* set the addresses to be a relative, so that
|
||||||
* they can be used by other procs */
|
* they can be used by other procs */
|
||||||
mca_btl_sm_component.sm_ctl_header->fifo=
|
mca_btl_sm_component.sm_ctl_header->fifo = (volatile ompi_fifo_t **)
|
||||||
(volatile ompi_fifo_t **)
|
ADDR2OFFSET(mca_btl_sm_component.sm_ctl_header->fifo,
|
||||||
( (char *)(mca_btl_sm_component.sm_ctl_header->fifo)-
|
mca_btl_sm_component.sm_mpool_base);
|
||||||
(char *)(mca_btl_sm_component.sm_mpool->mpool_base(mca_btl_sm_component.sm_mpool)) );
|
|
||||||
|
|
||||||
mca_btl_sm_component.sm_ctl_header->segment_header.
|
mca_btl_sm_component.sm_ctl_header->segment_header.
|
||||||
base_shared_mem_segment=( volatile char **)
|
base_shared_mem_segment = (volatile char **)
|
||||||
( (char *)(mca_btl_sm_component.sm_ctl_header->
|
ADDR2OFFSET(mca_btl_sm_component.sm_ctl_header->
|
||||||
segment_header.base_shared_mem_segment) -
|
segment_header.base_shared_mem_segment,
|
||||||
(char *)(mca_btl_sm_component.sm_mpool->mpool_base(mca_btl_sm_component.sm_mpool)) );
|
mca_btl_sm_component.sm_mpool_base);
|
||||||
|
|
||||||
mca_btl_sm_component.sm_ctl_header->segment_header.
|
mca_btl_sm_component.sm_ctl_header->segment_header.
|
||||||
base_shared_mem_flags = (volatile int *)
|
base_shared_mem_flags = (volatile int *)
|
||||||
( ((char *) mca_btl_sm_component.sm_ctl_header->
|
ADDR2OFFSET(mca_btl_sm_component.sm_ctl_header->segment_header.
|
||||||
segment_header.base_shared_mem_flags) -
|
base_shared_mem_flags,
|
||||||
(char *) (mca_btl_sm_component.sm_mpool_base));
|
mca_btl_sm_component.sm_mpool_base);
|
||||||
|
|
||||||
/* allow other procs to use this shared memory map */
|
/* allow other procs to use this shared memory map */
|
||||||
mca_btl_sm_component.mmap_file->map_seg->seg_inited=true;
|
mca_btl_sm_component.mmap_file->map_seg->seg_inited=true;
|
||||||
@ -458,20 +421,20 @@ int mca_btl_sm_add_procs_same_base_addr(
|
|||||||
/* set the base of the shared memory segment, and flag
|
/* set the base of the shared memory segment, and flag
|
||||||
* indicating that it is set */
|
* indicating that it is set */
|
||||||
tmp_ptr=(volatile char **)
|
tmp_ptr=(volatile char **)
|
||||||
( (char *)(mca_btl_sm_component.sm_ctl_header->segment_header.
|
OFFSET2ADDR(mca_btl_sm_component.sm_ctl_header->segment_header.
|
||||||
base_shared_mem_segment) +
|
base_shared_mem_segment,
|
||||||
(long )(mca_btl_sm_component.sm_mpool->mpool_base(mca_btl_sm_component.sm_mpool)) );
|
mca_btl_sm_component.sm_mpool_base);
|
||||||
tmp_ptr[mca_btl_sm_component.my_smp_rank]=(char*)
|
tmp_ptr[mca_btl_sm_component.my_smp_rank] =
|
||||||
mca_btl_sm_component.sm_mpool->mpool_base(mca_btl_sm_component.sm_mpool);
|
(char*)mca_btl_sm_component.sm_mpool_base;
|
||||||
|
|
||||||
/* memory barrier to ensure this flag is set before other
|
/* memory barrier to ensure this flag is set before other
|
||||||
* flags are set */
|
* flags are set */
|
||||||
opal_atomic_mb();
|
opal_atomic_mb();
|
||||||
|
|
||||||
/* Set my flag to 1 (convert from relative address first) */
|
/* Set my flag to 1 (convert from relative address first) */
|
||||||
tmp_int_ptr=(volatile int *)
|
tmp_int_ptr=(volatile int *)
|
||||||
( ((char *) mca_btl_sm_component.sm_ctl_header->segment_header.
|
OFFSET2ADDR(mca_btl_sm_component.sm_ctl_header->segment_header.
|
||||||
base_shared_mem_flags) +
|
base_shared_mem_flags, mca_btl_sm_component.sm_mpool_base);
|
||||||
((long) mca_btl_sm_component.sm_mpool_base));
|
|
||||||
tmp_int_ptr[mca_btl_sm_component.my_smp_rank]=1;
|
tmp_int_ptr[mca_btl_sm_component.my_smp_rank]=1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -493,7 +456,7 @@ int mca_btl_sm_add_procs_same_base_addr(
|
|||||||
char *buf;
|
char *buf;
|
||||||
my_fifos[j].head = (ompi_cb_fifo_wrapper_t*)OMPI_CB_FREE;
|
my_fifos[j].head = (ompi_cb_fifo_wrapper_t*)OMPI_CB_FREE;
|
||||||
my_fifos[j].tail = (ompi_cb_fifo_wrapper_t*)OMPI_CB_FREE;
|
my_fifos[j].tail = (ompi_cb_fifo_wrapper_t*)OMPI_CB_FREE;
|
||||||
if(opal_using_threads()) {
|
if(opal_using_threads()) {
|
||||||
/* allocate head and tail locks on different cache lines */
|
/* allocate head and tail locks on different cache lines */
|
||||||
buf = (char*)mca_btl_sm_component.sm_mpool->mpool_alloc(
|
buf = (char*)mca_btl_sm_component.sm_mpool->mpool_alloc(
|
||||||
mca_btl_sm_component.sm_mpool,
|
mca_btl_sm_component.sm_mpool,
|
||||||
@ -507,14 +470,14 @@ int mca_btl_sm_add_procs_same_base_addr(
|
|||||||
CACHE_LINE_SIZE);
|
CACHE_LINE_SIZE);
|
||||||
opal_atomic_init(my_fifos[j].head_lock, OPAL_ATOMIC_UNLOCKED);
|
opal_atomic_init(my_fifos[j].head_lock, OPAL_ATOMIC_UNLOCKED);
|
||||||
opal_atomic_init(my_fifos[j].tail_lock, OPAL_ATOMIC_UNLOCKED);
|
opal_atomic_init(my_fifos[j].tail_lock, OPAL_ATOMIC_UNLOCKED);
|
||||||
} else {
|
} else {
|
||||||
my_fifos[j].head_lock = NULL;
|
my_fifos[j].head_lock = NULL;
|
||||||
my_fifos[j].tail_lock = NULL;
|
my_fifos[j].tail_lock = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fifo_tmp=(ompi_fifo_t * volatile *)
|
fifo_tmp = (ompi_fifo_t * volatile *)
|
||||||
( (char *)(mca_btl_sm_component.sm_ctl_header->fifo) +
|
OFFSET2ADDR(mca_btl_sm_component.sm_ctl_header->fifo,
|
||||||
(long)(mca_btl_sm_component.sm_mpool->mpool_base(mca_btl_sm_component.sm_mpool)) );
|
mca_btl_sm_component.sm_mpool_base);
|
||||||
fifo_tmp[mca_btl_sm_component.my_smp_rank]=my_fifos;
|
fifo_tmp[mca_btl_sm_component.my_smp_rank]=my_fifos;
|
||||||
opal_atomic_mb();
|
opal_atomic_mb();
|
||||||
|
|
||||||
@ -529,14 +492,13 @@ int mca_btl_sm_add_procs_same_base_addr(
|
|||||||
mca_btl_sm_component.fifo[mca_btl_sm_component.my_smp_rank]=my_fifos;
|
mca_btl_sm_component.fifo[mca_btl_sm_component.my_smp_rank]=my_fifos;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* cache the pointers to the rest of the fifo arrays */
|
fifo_tmp = (ompi_fifo_t * volatile *)
|
||||||
fifo_tmp=(ompi_fifo_t * volatile *)
|
OFFSET2ADDR(mca_btl_sm_component.sm_ctl_header->fifo,
|
||||||
( (char *)(mca_btl_sm_component.sm_ctl_header->fifo) +
|
mca_btl_sm_component.sm_mpool_base);
|
||||||
(long)(mca_btl_sm_component.sm_mpool->mpool_base(mca_btl_sm_component.sm_mpool)) );
|
|
||||||
tmp_ptr=(volatile char **)
|
tmp_ptr=(volatile char **)
|
||||||
( (char *)mca_btl_sm_component.sm_ctl_header->
|
OFFSET2ADDR(mca_btl_sm_component.sm_ctl_header->segment_header.
|
||||||
segment_header.base_shared_mem_segment +
|
base_shared_mem_segment,
|
||||||
(long)mca_btl_sm_component.sm_mpool->mpool_base(mca_btl_sm_component.sm_mpool));
|
mca_btl_sm_component.sm_mpool_base);
|
||||||
for( j=mca_btl_sm_component.num_smp_procs ; j <
|
for( j=mca_btl_sm_component.num_smp_procs ; j <
|
||||||
mca_btl_sm_component.num_smp_procs+n_local_procs ; j++ ) {
|
mca_btl_sm_component.num_smp_procs+n_local_procs ; j++ ) {
|
||||||
|
|
||||||
@ -548,13 +510,12 @@ int mca_btl_sm_add_procs_same_base_addr(
|
|||||||
|
|
||||||
/* Calculate the difference as (my_base - their_base) */
|
/* Calculate the difference as (my_base - their_base) */
|
||||||
diff = tmp_ptr[mca_btl_sm_component.my_smp_rank] - tmp_ptr[j];
|
diff = tmp_ptr[mca_btl_sm_component.my_smp_rank] - tmp_ptr[j];
|
||||||
mca_btl_sm_component.fifo[j]=
|
mca_btl_sm_component.fifo[j] = (ompi_fifo_t*)((char*)fifo_tmp[j]+diff);
|
||||||
( ompi_fifo_t *)( (char *)fifo_tmp[j]+diff);
|
|
||||||
mca_btl_sm_component.sm_offset[j] = diff;
|
mca_btl_sm_component.sm_offset[j] = diff;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* initialize some of the free-lists */
|
/* initialize some of the free-lists */
|
||||||
if( !mca_btl_sm[0].btl_inited ) {
|
if( !mca_btl_sm.btl_inited ) {
|
||||||
/* some initialization happens only the first time this routine
|
/* some initialization happens only the first time this routine
|
||||||
* is called, i.e. when btl_inited is false */
|
* is called, i.e. when btl_inited is false */
|
||||||
|
|
||||||
@ -587,18 +548,10 @@ int mca_btl_sm_add_procs_same_base_addr(
|
|||||||
mca_btl_sm_component.sm_free_list_inc,
|
mca_btl_sm_component.sm_free_list_inc,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
/* set up mca_btl_sm_component.list_smp_procs_same_base_addr */
|
/* set up mca_btl_sm_component.list_smp_procs */
|
||||||
mca_btl_sm_component.list_smp_procs_same_base_addr=(int *)
|
mca_btl_sm_component.list_smp_procs=(int *)
|
||||||
malloc(mca_btl_sm_component.sm_max_procs*sizeof(int));
|
malloc(mca_btl_sm_component.sm_max_procs*sizeof(int));
|
||||||
if( NULL == mca_btl_sm_component.list_smp_procs_same_base_addr ){
|
if( NULL == mca_btl_sm_component.list_smp_procs ){
|
||||||
return_code=OMPI_ERR_OUT_OF_RESOURCE;
|
|
||||||
goto CLEANUP;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* set up mca_btl_sm_component.list_smp_procs_different_base_addr */
|
|
||||||
mca_btl_sm_component.list_smp_procs_different_base_addr=(int *)
|
|
||||||
malloc(mca_btl_sm_component.sm_max_procs*sizeof(int));
|
|
||||||
if( NULL == mca_btl_sm_component.list_smp_procs_different_base_addr ){
|
|
||||||
return_code=OMPI_ERR_OUT_OF_RESOURCE;
|
return_code=OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
goto CLEANUP;
|
goto CLEANUP;
|
||||||
}
|
}
|
||||||
@ -610,49 +563,24 @@ int mca_btl_sm_add_procs_same_base_addr(
|
|||||||
/* set connectivity */
|
/* set connectivity */
|
||||||
cnt=0;
|
cnt=0;
|
||||||
for(proc = 0 ; proc < (int32_t)nprocs ; proc++ ) {
|
for(proc = 0 ; proc < (int32_t)nprocs ; proc++ ) {
|
||||||
|
|
||||||
struct mca_btl_base_endpoint_t* peer = peers[proc];
|
struct mca_btl_base_endpoint_t* peer = peers[proc];
|
||||||
if(peer == NULL)
|
if(peer == NULL)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
tmp_ptr=(volatile char **)
|
|
||||||
( (char *)mca_btl_sm_component.sm_ctl_header->
|
|
||||||
segment_header.base_shared_mem_segment +
|
|
||||||
(long)mca_btl_sm_component.sm_mpool->mpool_base(mca_btl_sm_component.sm_mpool));
|
|
||||||
same_sm_base=(tmp_ptr[peer->peer_smp_rank] ==
|
|
||||||
tmp_ptr[mca_btl_sm_component.my_smp_rank]);
|
|
||||||
|
|
||||||
if( SM_CONNECTED == mca_btl_sm_component.sm_proc_connect[proc] ) {
|
if( SM_CONNECTED == mca_btl_sm_component.sm_proc_connect[proc] ) {
|
||||||
if( same_sm_base ){
|
/* don't count if same process */
|
||||||
|
if( (mca_btl_sm_component.num_smp_procs+proc ) ==
|
||||||
|
mca_btl_sm_component.my_smp_rank) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
/* don't count if same process */
|
mca_btl_sm_component.list_smp_procs
|
||||||
if( (mca_btl_sm_component.num_smp_procs+cnt ) ==
|
[mca_btl_sm_component.num_smp_procs + cnt] = proc;
|
||||||
mca_btl_sm_component.my_smp_rank) {
|
cnt++;
|
||||||
cnt++;
|
/* add this proc to shared memory accessability list */
|
||||||
continue;
|
return_code=ompi_bitmap_set_bit(reachability,proc);
|
||||||
}
|
if( OMPI_SUCCESS != return_code ){
|
||||||
/* set up the list of local processes with the same base
|
goto CLEANUP;
|
||||||
* shared memory virtual address as this process */
|
|
||||||
mca_btl_sm_component.list_smp_procs_same_base_addr
|
|
||||||
[mca_btl_sm_component.num_smp_procs_same_base_addr]=
|
|
||||||
cnt;
|
|
||||||
mca_btl_sm_component.num_smp_procs_same_base_addr++;
|
|
||||||
cnt++;
|
|
||||||
/* add this proc to shared memory accessability list */
|
|
||||||
return_code=ompi_bitmap_set_bit(reachability,proc);
|
|
||||||
if( OMPI_SUCCESS != return_code ){
|
|
||||||
goto CLEANUP;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/* set up the list of local processes with the same base
|
|
||||||
* shared memory virtual address as this process */
|
|
||||||
mca_btl_sm_component.list_smp_procs_different_base_addr
|
|
||||||
[mca_btl_sm_component.num_smp_procs_different_base_addr]=
|
|
||||||
cnt;
|
|
||||||
mca_btl_sm_component.num_smp_procs_different_base_addr++;
|
|
||||||
cnt++;
|
|
||||||
mca_btl_sm_component.sm_proc_connect[proc]=
|
|
||||||
SM_CONNECTED_DIFFERENT_BASE_ADDR;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -667,82 +595,12 @@ int mca_btl_sm_add_procs_same_base_addr(
|
|||||||
/* update the local smp process count */
|
/* update the local smp process count */
|
||||||
mca_btl_sm_component.num_smp_procs+=n_local_procs;
|
mca_btl_sm_component.num_smp_procs+=n_local_procs;
|
||||||
|
|
||||||
CLEANUP:
|
|
||||||
return return_code;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Note:: this routine assumes that mca_btl_sm_add_procs_same_base_addr
|
|
||||||
* has already been called to set up data structures needed by this
|
|
||||||
* routine */
|
|
||||||
int mca_btl_sm_add_procs(
|
|
||||||
struct mca_btl_base_module_t* btl,
|
|
||||||
size_t nprocs,
|
|
||||||
struct ompi_proc_t **procs,
|
|
||||||
struct mca_btl_base_endpoint_t **peers,
|
|
||||||
ompi_bitmap_t* reachability)
|
|
||||||
{
|
|
||||||
int return_code = OMPI_SUCCESS, tmp_cnt;
|
|
||||||
uint32_t proc, n_local_procs;
|
|
||||||
|
|
||||||
/* initializion */
|
|
||||||
for(proc=0 ; proc < nprocs ; proc++ ) {
|
|
||||||
peers[proc]=NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* figure out total number of local procs in current set */
|
|
||||||
tmp_cnt=0;
|
|
||||||
for(proc = 0 ; proc < nprocs ; proc++ ) {
|
|
||||||
if( (SM_CONNECTED_DIFFERENT_BASE_ADDR ==
|
|
||||||
mca_btl_sm_component.sm_proc_connect[proc]) ||
|
|
||||||
(SM_CONNECTED ==
|
|
||||||
mca_btl_sm_component.sm_proc_connect[proc]) ) {
|
|
||||||
tmp_cnt++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* set connectivity */
|
|
||||||
n_local_procs=0;
|
|
||||||
for(proc = 0 ; proc < nprocs ; proc++ ) {
|
|
||||||
/* Same base address base */
|
|
||||||
if (SM_CONNECTED == mca_btl_sm_component.sm_proc_connect[proc]) {
|
|
||||||
n_local_procs++;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Different base address case */
|
|
||||||
else if (SM_CONNECTED_DIFFERENT_BASE_ADDR ==
|
|
||||||
mca_btl_sm_component.sm_proc_connect[proc]) {
|
|
||||||
|
|
||||||
/* add this proc to shared memory accessability list */
|
|
||||||
return_code=ompi_bitmap_set_bit(reachability,proc);
|
|
||||||
if( OMPI_SUCCESS != return_code ){
|
|
||||||
goto CLEANUP;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* initialize the peers information */
|
|
||||||
peers[proc] = (struct mca_btl_base_endpoint_t*)malloc(sizeof(struct mca_btl_base_endpoint_t));
|
|
||||||
if( NULL == peers[proc] ){
|
|
||||||
return_code=OMPI_ERR_OUT_OF_RESOURCE;
|
|
||||||
goto CLEANUP;
|
|
||||||
}
|
|
||||||
peers[proc]->my_smp_rank=mca_btl_sm_component.my_smp_rank;
|
|
||||||
/* subtract tmp_cnt, since mca_btl_sm_add_procs_same_base_addr
|
|
||||||
* already added these into num_smp_procs */
|
|
||||||
peers[proc]->peer_smp_rank=n_local_procs+
|
|
||||||
mca_btl_sm_component.num_smp_procs-tmp_cnt;
|
|
||||||
#if OMPI_ENABLE_PROGRESS_THREADS
|
|
||||||
peers[proc]->fifo_fd =
|
|
||||||
mca_btl_sm_component.sm_peers[peers[proc]->peer_smp_rank]->fifo_fd;
|
|
||||||
#endif
|
|
||||||
n_local_procs++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
CLEANUP:
|
CLEANUP:
|
||||||
/* free local memory */
|
/* free local memory */
|
||||||
if(mca_btl_sm_component.sm_proc_connect){
|
if(mca_btl_sm_component.sm_proc_connect){
|
||||||
free(mca_btl_sm_component.sm_proc_connect);
|
free(mca_btl_sm_component.sm_proc_connect);
|
||||||
mca_btl_sm_component.sm_proc_connect=NULL;
|
mca_btl_sm_component.sm_proc_connect=NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
return return_code;
|
return return_code;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -111,21 +111,11 @@ struct mca_btl_sm_component_t {
|
|||||||
int *sm_proc_connect; /* scratch array used by the 0'th btl to
|
int *sm_proc_connect; /* scratch array used by the 0'th btl to
|
||||||
* set indicate sm connectivty. Used by
|
* set indicate sm connectivty. Used by
|
||||||
* the 1'st btl */
|
* the 1'st btl */
|
||||||
uint32_t num_smp_procs; /**< current number of smp procs on this host */
|
int32_t num_smp_procs; /**< current number of smp procs on this host */
|
||||||
int num_smp_procs_same_base_addr; /* number of procs with same
|
int *list_smp_procs; /* procs with same (index 0) and
|
||||||
base shared memory virtual
|
different (index 1) base shared memory virtual
|
||||||
address as this process */
|
address as this process */
|
||||||
int num_smp_procs_different_base_addr; /* number of procs with
|
int32_t my_smp_rank; /**< My SMP process rank. Used for accessing
|
||||||
different base shared memory
|
|
||||||
virtual address as this
|
|
||||||
process */
|
|
||||||
int *list_smp_procs_same_base_addr; /* number of procs with same
|
|
||||||
base shared memory virtual
|
|
||||||
address as this process */
|
|
||||||
int *list_smp_procs_different_base_addr; /* number of procs with different
|
|
||||||
base shared memory virtual
|
|
||||||
address as this process */
|
|
||||||
uint32_t my_smp_rank; /**< My SMP process rank. Used for accessing
|
|
||||||
* SMP specfic data structures. */
|
* SMP specfic data structures. */
|
||||||
ompi_free_list_t sm_frags1; /**< free list of sm first */
|
ompi_free_list_t sm_frags1; /**< free list of sm first */
|
||||||
ompi_free_list_t sm_frags2; /**< free list of sm second */
|
ompi_free_list_t sm_frags2; /**< free list of sm second */
|
||||||
@ -184,7 +174,7 @@ struct mca_btl_sm_t {
|
|||||||
};
|
};
|
||||||
typedef struct mca_btl_sm_t mca_btl_sm_t;
|
typedef struct mca_btl_sm_t mca_btl_sm_t;
|
||||||
|
|
||||||
extern mca_btl_sm_t mca_btl_sm[2];
|
extern mca_btl_sm_t mca_btl_sm;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Register a callback function that is called on error..
|
* Register a callback function that is called on error..
|
||||||
|
@ -240,7 +240,6 @@ mca_btl_base_module_t** mca_btl_sm_component_init(
|
|||||||
bool enable_mpi_threads)
|
bool enable_mpi_threads)
|
||||||
{
|
{
|
||||||
mca_btl_base_module_t **ptls = NULL;
|
mca_btl_base_module_t **ptls = NULL;
|
||||||
int i;
|
|
||||||
|
|
||||||
*num_ptls = 0;
|
*num_ptls = 0;
|
||||||
|
|
||||||
@ -270,44 +269,40 @@ mca_btl_base_module_t** mca_btl_sm_component_init(
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* allocate the Shared Memory PTL */
|
/* allocate the Shared Memory PTL */
|
||||||
*num_ptls = 2;
|
*num_ptls = 1;
|
||||||
ptls = (mca_btl_base_module_t**)malloc((*num_ptls)*sizeof(mca_btl_base_module_t*));
|
ptls = (mca_btl_base_module_t**)malloc((*num_ptls)*sizeof(mca_btl_base_module_t*));
|
||||||
if (NULL == ptls) {
|
if (NULL == ptls) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* get pointer to the ptls */
|
/* get pointer to the ptls */
|
||||||
ptls[0] = (mca_btl_base_module_t *)(&(mca_btl_sm[0]));
|
ptls[0] = (mca_btl_base_module_t *)(&(mca_btl_sm));
|
||||||
ptls[1] = (mca_btl_base_module_t *)(&(mca_btl_sm[1]));
|
|
||||||
|
|
||||||
/* set scheduling parameters */
|
/* set scheduling parameters */
|
||||||
for( i=0 ; i < 2 ; i++ ) {
|
mca_btl_sm.super.btl_eager_limit=mca_btl_sm_component.eager_limit;
|
||||||
mca_btl_sm[i].super.btl_eager_limit=mca_btl_sm_component.eager_limit;
|
mca_btl_sm.super.btl_min_send_size=mca_btl_sm_component.max_frag_size;
|
||||||
mca_btl_sm[i].super.btl_min_send_size=mca_btl_sm_component.max_frag_size;
|
mca_btl_sm.super.btl_max_send_size=mca_btl_sm_component.max_frag_size;
|
||||||
mca_btl_sm[i].super.btl_max_send_size=mca_btl_sm_component.max_frag_size;
|
mca_btl_sm.super.btl_min_rdma_size=mca_btl_sm_component.max_frag_size;
|
||||||
mca_btl_sm[i].super.btl_min_rdma_size=mca_btl_sm_component.max_frag_size;
|
mca_btl_sm.super.btl_max_rdma_size=mca_btl_sm_component.max_frag_size;
|
||||||
mca_btl_sm[i].super.btl_max_rdma_size=mca_btl_sm_component.max_frag_size;
|
/* The order in which the SM modules are initialized is important as only
|
||||||
/* The order in which the SM modules are initialized is important as only
|
* the first one (the one using the mca_btl_sm_add_procs_same_base_addr)
|
||||||
* the first one (the one using the mca_btl_sm_add_procs_same_base_addr)
|
* will setup all the memory for the internal structures (sm_proc_connect).
|
||||||
* will setup all the memory for the internal structures (sm_proc_connect).
|
* Therefore, the order in which the two SM module will be after the
|
||||||
* Therefore, the order in which the two SM module will be after the
|
* selection is important. We have to make sure they get sorted in the
|
||||||
* selection is important. We have to make sure they get sorted in the
|
* correct order. The simplest way is to force the exclusivity of the
|
||||||
* correct order. The simplest way is to force the exclusivity of the
|
* second module to something lower than the exclusivity of the first one.
|
||||||
* second module to something lower than the exclusivity of the first one.
|
*/
|
||||||
*/
|
mca_btl_sm.super.btl_exclusivity = mca_btl_sm_component.sm_exclusivity;
|
||||||
mca_btl_sm[i].super.btl_exclusivity = mca_btl_sm_component.sm_exclusivity - i;
|
mca_btl_sm.super.btl_latency = mca_btl_sm_component.sm_latency; /* lowest latency */
|
||||||
mca_btl_sm[i].super.btl_latency = mca_btl_sm_component.sm_latency; /* lowest latency */
|
mca_btl_sm.super.btl_bandwidth = 900; /* not really used now since exclusivity is set to the highest value */
|
||||||
mca_btl_sm[i].super.btl_bandwidth = 900; /* not really used now since exclusivity is set to the highest value */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* initialize some PTL data */
|
/* initialize some PTL data */
|
||||||
/* start with no SM procs */
|
/* start with no SM procs */
|
||||||
mca_btl_sm_component.num_smp_procs = 0;
|
mca_btl_sm_component.num_smp_procs = 0;
|
||||||
mca_btl_sm_component.my_smp_rank = 0xFFFFFFFF; /* not defined */
|
mca_btl_sm_component.my_smp_rank = -1; /* not defined */
|
||||||
|
|
||||||
/* set flag indicating ptl not inited */
|
/* set flag indicating ptl not inited */
|
||||||
mca_btl_sm[0].btl_inited=false;
|
mca_btl_sm.btl_inited=false;
|
||||||
mca_btl_sm[1].btl_inited=false;
|
|
||||||
|
|
||||||
return ptls;
|
return ptls;
|
||||||
}
|
}
|
||||||
@ -344,7 +339,7 @@ int mca_btl_sm_component_progress(void)
|
|||||||
mca_btl_sm_hdr_t *hdr;
|
mca_btl_sm_hdr_t *hdr;
|
||||||
int my_smp_rank=mca_btl_sm_component.my_smp_rank;
|
int my_smp_rank=mca_btl_sm_component.my_smp_rank;
|
||||||
int proc;
|
int proc;
|
||||||
int rc = 0, btl = 0;
|
int rc = 0;
|
||||||
|
|
||||||
/* send progress is made by the PML */
|
/* send progress is made by the PML */
|
||||||
|
|
||||||
@ -353,14 +348,9 @@ int mca_btl_sm_component_progress(void)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/* poll each fifo */
|
/* poll each fifo */
|
||||||
|
for(proc = 0; proc < mca_btl_sm_component.num_smp_procs - 1; proc++) {
|
||||||
/* loop over fifo's - procs with same base shared memory
|
peer_smp_rank = mca_btl_sm_component.list_smp_procs[proc];
|
||||||
* virtual address as this process */
|
fifo = &(mca_btl_sm_component.fifo[my_smp_rank][peer_smp_rank]);
|
||||||
for( proc=0 ; proc < mca_btl_sm_component.num_smp_procs_same_base_addr
|
|
||||||
; proc++ )
|
|
||||||
{
|
|
||||||
peer_smp_rank= mca_btl_sm_component.list_smp_procs_same_base_addr[proc];
|
|
||||||
fifo=&(mca_btl_sm_component.fifo[my_smp_rank][peer_smp_rank]);
|
|
||||||
|
|
||||||
/* if fifo is not yet setup - continue - not data has been sent*/
|
/* if fifo is not yet setup - continue - not data has been sent*/
|
||||||
if(OMPI_CB_FREE == fifo->tail){
|
if(OMPI_CB_FREE == fifo->tail){
|
||||||
@ -368,124 +358,31 @@ int mca_btl_sm_component_progress(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* aquire thread lock */
|
/* aquire thread lock */
|
||||||
if( opal_using_threads() ) {
|
if(opal_using_threads()) {
|
||||||
opal_atomic_lock(fifo->tail_lock);
|
opal_atomic_lock(fifo->tail_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* get pointer - pass in offset to change queue pointer
|
|
||||||
* addressing from that of the sender. In this case, we know
|
|
||||||
* that we have the same base address as the sender, so no
|
|
||||||
* translation is necessary when accessing the fifo. Hence,
|
|
||||||
* we use the _same_base_addr varient. */
|
|
||||||
hdr = (mca_btl_sm_hdr_t *)ompi_fifo_read_from_tail(fifo);
|
hdr = (mca_btl_sm_hdr_t *)ompi_fifo_read_from_tail(fifo);
|
||||||
|
|
||||||
/* release thread lock */
|
/* release thread lock */
|
||||||
if( opal_using_threads() ) {
|
if(opal_using_threads()) {
|
||||||
opal_atomic_unlock(fifo->tail_lock);
|
opal_atomic_unlock(fifo->tail_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
if( OMPI_CB_FREE == hdr ) {
|
if(OMPI_CB_FREE == hdr) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* dispatch fragment by type */
|
|
||||||
switch(((uintptr_t)hdr) & MCA_BTL_SM_FRAG_TYPE_MASK) {
|
|
||||||
case MCA_BTL_SM_FRAG_ACK:
|
|
||||||
{
|
|
||||||
int status = (uintptr_t)hdr & MCA_BTL_SM_FRAG_STATUS_MASK;
|
|
||||||
frag = (mca_btl_sm_frag_t *)((uintptr_t)hdr &
|
|
||||||
(~(MCA_BTL_SM_FRAG_TYPE_MASK |
|
|
||||||
MCA_BTL_SM_FRAG_STATUS_MASK)));
|
|
||||||
/* completion callback */
|
|
||||||
frag->base.des_cbfunc(&mca_btl_sm[0].super, frag->endpoint,
|
|
||||||
&frag->base, status?OMPI_ERROR:OMPI_SUCCESS);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case MCA_BTL_SM_FRAG_SEND:
|
|
||||||
{
|
|
||||||
/* recv upcall */
|
|
||||||
mca_btl_sm_recv_reg_t* reg = mca_btl_sm[0].sm_reg + hdr->tag;
|
|
||||||
|
|
||||||
MCA_BTL_SM_FRAG_ALLOC(frag, rc);
|
|
||||||
frag->segment.seg_addr.pval = ((char*)hdr) +
|
|
||||||
sizeof(mca_btl_sm_hdr_t);
|
|
||||||
frag->segment.seg_len = hdr->len;
|
|
||||||
reg->cbfunc(&mca_btl_sm[0].super,hdr->tag,&frag->base,reg->cbdata);
|
|
||||||
MCA_BTL_SM_FRAG_RETURN(frag);
|
|
||||||
MCA_BTL_SM_FIFO_WRITE(
|
|
||||||
mca_btl_sm_component.sm_peers[peer_smp_rank],
|
|
||||||
my_smp_rank, peer_smp_rank, hdr->frag, rc);
|
|
||||||
if(OMPI_SUCCESS != rc)
|
|
||||||
goto err;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
/* unknown */
|
|
||||||
hdr = (mca_btl_sm_hdr_t*)((uintptr_t)hdr->frag |
|
|
||||||
MCA_BTL_SM_FRAG_STATUS_MASK);
|
|
||||||
MCA_BTL_SM_FIFO_WRITE(
|
|
||||||
mca_btl_sm_component.sm_peers[peer_smp_rank],
|
|
||||||
my_smp_rank, peer_smp_rank, hdr, rc);
|
|
||||||
if(OMPI_SUCCESS != rc)
|
|
||||||
goto err;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
rc++;
|
|
||||||
} /* end peer_local_smp_rank loop */
|
|
||||||
|
|
||||||
|
|
||||||
btl = 1;
|
|
||||||
/* loop over fifo's - procs with different base shared memory
|
|
||||||
* virtual address as this process */
|
|
||||||
for( proc=0 ; proc < mca_btl_sm_component.num_smp_procs_different_base_addr
|
|
||||||
; proc++ )
|
|
||||||
{
|
|
||||||
peer_smp_rank= mca_btl_sm_component.list_smp_procs_different_base_addr[proc];
|
|
||||||
fifo=&(mca_btl_sm_component.fifo[my_smp_rank][peer_smp_rank]);
|
|
||||||
|
|
||||||
/* if fifo is not yet setup - continue - not data has been sent*/
|
|
||||||
if(OMPI_CB_FREE == fifo->tail){
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* aquire thread lock */
|
|
||||||
if( opal_using_threads() ) {
|
|
||||||
opal_atomic_lock(fifo->tail_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* get pointer - pass in offset to change queue pointer
|
|
||||||
* addressing from that of the sender. In this case, we do
|
|
||||||
* *not* have the same base address as the sender, so we must
|
|
||||||
* translate every access into the fifo to be relevant to our
|
|
||||||
* memory space. Hence, we do *not* use the _same_base_addr
|
|
||||||
* variant. */
|
|
||||||
hdr = (mca_btl_sm_hdr_t *)ompi_fifo_read_from_tail( fifo );
|
|
||||||
|
|
||||||
if( OMPI_CB_FREE == hdr ) {
|
|
||||||
/* release thread lock */
|
|
||||||
if( opal_using_threads() ) {
|
|
||||||
opal_atomic_unlock(fifo->tail_lock);
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* release thread lock */
|
|
||||||
if( opal_using_threads() ) {
|
|
||||||
opal_atomic_unlock(fifo->tail_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* dispatch fragment by type */
|
/* dispatch fragment by type */
|
||||||
switch(((uintptr_t)hdr) & MCA_BTL_SM_FRAG_TYPE_MASK) {
|
switch(((uintptr_t)hdr) & MCA_BTL_SM_FRAG_TYPE_MASK) {
|
||||||
case MCA_BTL_SM_FRAG_ACK:
|
case MCA_BTL_SM_FRAG_ACK:
|
||||||
{
|
{
|
||||||
int status = (uintptr_t)hdr & MCA_BTL_SM_FRAG_STATUS_MASK;
|
int status = (uintptr_t)hdr & MCA_BTL_SM_FRAG_STATUS_MASK;
|
||||||
frag = (mca_btl_sm_frag_t *)((char*)((uintptr_t)hdr &
|
frag = (mca_btl_sm_frag_t *)((char*)((uintptr_t)hdr &
|
||||||
(~(MCA_BTL_SM_FRAG_TYPE_MASK |
|
(~(MCA_BTL_SM_FRAG_TYPE_MASK |
|
||||||
MCA_BTL_SM_FRAG_STATUS_MASK))));
|
MCA_BTL_SM_FRAG_STATUS_MASK))));
|
||||||
/* completion callback */
|
/* completion callback */
|
||||||
frag->base.des_cbfunc(&mca_btl_sm[1].super, frag->endpoint,
|
frag->base.des_cbfunc(&mca_btl_sm.super, frag->endpoint,
|
||||||
&frag->base, status?OMPI_ERROR:OMPI_SUCCESS);
|
&frag->base, status?OMPI_ERROR:OMPI_SUCCESS);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -494,15 +391,16 @@ int mca_btl_sm_component_progress(void)
|
|||||||
mca_btl_sm_recv_reg_t* reg;
|
mca_btl_sm_recv_reg_t* reg;
|
||||||
/* change the address from address relative to the shared
|
/* change the address from address relative to the shared
|
||||||
* memory address, to a true virtual address */
|
* memory address, to a true virtual address */
|
||||||
hdr = (mca_btl_sm_hdr_t *)( (char *)hdr +
|
hdr = (mca_btl_sm_hdr_t *)((char *)hdr +
|
||||||
mca_btl_sm_component.sm_offset[peer_smp_rank]);
|
mca_btl_sm_component.sm_offset[peer_smp_rank]);
|
||||||
/* recv upcall */
|
/* recv upcall */
|
||||||
reg = mca_btl_sm[1].sm_reg + hdr->tag;
|
reg = mca_btl_sm.sm_reg + hdr->tag;
|
||||||
MCA_BTL_SM_FRAG_ALLOC(frag, rc);
|
MCA_BTL_SM_FRAG_ALLOC(frag, rc);
|
||||||
frag->segment.seg_addr.pval = ((char*)hdr) +
|
frag->segment.seg_addr.pval = ((char*)hdr) +
|
||||||
sizeof(mca_btl_sm_hdr_t);
|
sizeof(mca_btl_sm_hdr_t);
|
||||||
frag->segment.seg_len = hdr->len;
|
frag->segment.seg_len = hdr->len;
|
||||||
reg->cbfunc(&mca_btl_sm[1].super,hdr->tag,&frag->base,reg->cbdata);
|
reg->cbfunc(&mca_btl_sm.super, hdr->tag, &frag->base,
|
||||||
|
reg->cbdata);
|
||||||
MCA_BTL_SM_FRAG_RETURN(frag);
|
MCA_BTL_SM_FRAG_RETURN(frag);
|
||||||
MCA_BTL_SM_FIFO_WRITE(
|
MCA_BTL_SM_FIFO_WRITE(
|
||||||
mca_btl_sm_component.sm_peers[peer_smp_rank],
|
mca_btl_sm_component.sm_peers[peer_smp_rank],
|
||||||
@ -512,7 +410,6 @@ int mca_btl_sm_component_progress(void)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
{
|
|
||||||
/* unknown */
|
/* unknown */
|
||||||
hdr = (mca_btl_sm_hdr_t*)((uintptr_t)hdr->frag |
|
hdr = (mca_btl_sm_hdr_t*)((uintptr_t)hdr->frag |
|
||||||
MCA_BTL_SM_FRAG_STATUS_MASK);
|
MCA_BTL_SM_FRAG_STATUS_MASK);
|
||||||
@ -522,13 +419,12 @@ int mca_btl_sm_component_progress(void)
|
|||||||
if(OMPI_SUCCESS != rc)
|
if(OMPI_SUCCESS != rc)
|
||||||
goto err;
|
goto err;
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
rc++;
|
rc++;
|
||||||
} /* end peer_local_smp_rank loop */
|
}
|
||||||
return rc;
|
return rc;
|
||||||
err:
|
err:
|
||||||
BTL_ERROR(("SM faild to send message due to shortage of shared memory.\n"));
|
BTL_ERROR(("SM faild to send message due to shortage of shared memory.\n"));
|
||||||
mca_btl_sm[btl].error_cb(&mca_btl_sm[btl].super, MCA_BTL_ERROR_FLAGS_FATAL);
|
mca_btl_sm.error_cb(&mca_btl_sm.super, MCA_BTL_ERROR_FLAGS_FATAL);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user