diff --git a/src/mca/mpool/sm/mpool_sm_mmap.c b/src/mca/mpool/sm/mpool_sm_mmap.c index 6a1de50207..15e63e6d76 100644 --- a/src/mca/mpool/sm/mpool_sm_mmap.c +++ b/src/mca/mpool/sm/mpool_sm_mmap.c @@ -11,6 +11,7 @@ #include "include/constants.h" #include "util/output.h" #include "util/sys_info.h" +#include "util/proc_info.h" #include "mca/pcm/pcm.h" #include "mpool_sm.h" #include "mpool_sm_mmap.h" @@ -35,7 +36,8 @@ static mca_mpool_sm_mmap_t* mca_mpool_sm_mmap_open(char* path) struct timespec ts; fd = open(path, O_CREAT|O_RDWR, 0000); if(fd < 0 && errno != EACCES) { - ompi_output(0, "mca_ptl_sm_mmap_open: open failed with errno=%d\n", errno); + ompi_output(0, + "mca_ptl_sm_mmap_open: open %s failed with errno=%d\n", path, errno); return NULL; } ts.tv_sec = 0; @@ -72,12 +74,18 @@ mca_mpool_sm_mmap_t* mca_mpool_sm_mmap_init(size_t size) mca_mpool_sm_mmap_t* map; char path[PATH_MAX]; - sprintf(path, "%s/mmap.%s", ompi_system_info.session_dir, ompi_system_info.nodename); + sprintf(path, "%s/mmap.%s", ompi_process_info.job_session_dir, ompi_system_info.nodename); + /* debug */ + fprintf(stderr," open %s \n",path); + fflush(stderr); + /* end debug */ fd = open(path, O_CREAT|O_RDWR, 0000); if(fd < 0) { if(errno == EACCES) return mca_mpool_sm_mmap_open(path); - ompi_output(0, "mca_mpool_sm_mmap_init: open failed with errno=%d\n", errno); + ompi_output(0, + "mca_mpool_sm_mmap_init: open %s failed with errno=%d\n", + path,errno); return NULL; } diff --git a/src/mca/ptl/sm/src/ptl_sm.c b/src/mca/ptl/sm/src/ptl_sm.c index 05ca82cb9b..067f81275c 100644 --- a/src/mca/ptl/sm/src/ptl_sm.c +++ b/src/mca/ptl/sm/src/ptl_sm.c @@ -15,6 +15,7 @@ #include "mca/base/mca_base_module_exchange.h" #include "ptl_sm.h" #include "util/sys_info.h" +#include "mca/ptl/sm/src/ptl_sm_peer.h" mca_ptl_sm_t mca_ptl_sm = { @@ -46,64 +47,115 @@ int mca_ptl_sm_add_procs( struct mca_ptl_base_peer_t **peers, ompi_bitmap_t* reachability) { - int proc,rc; - bool same_host; - size_t size,len,my_len; + int i,proc,my_smp_rank,return_code=OMPI_SUCCESS; + size_t size,len,my_len,n_local_procs; mca_ptl_sm_exchange_t **sm_proc_info; ompi_proc_t* my_proc; /* pointer to caller's proc structure */ + mca_ptl_sm_t *ptl_sm; + + /* initializion */ + for(i=0 ; i < nprocs ; i++ ) { + peers[i]=NULL; + } + ptl_sm=(mca_ptl_sm_t *)ptl; /* allocate array to hold setup shared memory from all * other procs */ sm_proc_info=(mca_ptl_sm_exchange_t **) malloc(nprocs*sizeof(mca_ptl_sm_exchange_t *)); if( NULL == sm_proc_info ){ - rc=OMPI_ERR_OUT_OF_RESOURCE; + return_code=OMPI_ERR_OUT_OF_RESOURCE; goto CLEANUP; } /* get pointer to my proc structure */ my_proc=ompi_proc_local(); if( NULL == my_proc ) { - rc=OMPI_ERR_OUT_OF_RESOURCE; + return_code=OMPI_ERR_OUT_OF_RESOURCE; goto CLEANUP; } my_len=strlen(ompi_system_info.nodename); - /* get unique host identifier for each process in the list */ + /* Get unique host identifier for each process in the list, + * and idetify procs that are on this host. Add procs on this + * host to shared memory reachbility list. Also, get number + * of local procs in the prcs list. */ + n_local_procs=0; for( proc=0 ; proc < nprocs; proc++ ) { /* don't compare with self */ if( my_proc == procs[proc] ) { + ptl_sm->my_smp_rank=n_local_procs; + n_local_procs++; continue; } - rc = mca_base_modex_recv( + return_code = mca_base_modex_recv( &mca_ptl_sm_module.super.ptlm_version, procs[proc], (void**)(&(sm_proc_info[proc])), &size); - if(rc != OMPI_SUCCESS) { - ompi_output(0, "mca_ptl_sm_add_procs: mca_base_modex_recv: failed with return value=%d", rc); + if(return_code != OMPI_SUCCESS) { + ompi_output(0, "mca_ptl_sm_add_procs: mca_base_modex_recv: failed with return value=%d", return_code); goto CLEANUP; } + /* for zero length, just continue - comparison is meaningless*/ if( 0 >= size ) { continue; } + /* check to see if this proc is on my host */ len=strlen((char *)(sm_proc_info[proc])); - same_host=false; if( len == my_len ) { if( 0 == strncmp(ompi_system_info.nodename, - (char *)(sm_proc_info[proc]),len) ) { - same_host=true; + (char *)(sm_proc_info[proc]),len) ) { + + /* initialize the peers information */ + peers[proc]=malloc(sizeof(struct mca_ptl_base_peer_t)); + if( NULL == peers[proc] ){ + return_code=OMPI_ERR_OUT_OF_RESOURCE; + goto CLEANUP; + } + peers[proc]->peer_smp_rank=n_local_procs+ + ptl_sm->num_smp_procs; + n_local_procs++; + + /* add this proc to shared memory accessability list */ + return_code=ompi_bitmap_set_bit(reachability,proc); + if( OMPI_SUCCESS != return_code ){ + goto CLEANUP; + } } } - /* add this proc to shared memory accessability list */ - rc=ompi_bitmap_set_bit(reachability,proc); - if( OMPI_SUCCESS != rc ){ - goto CLEANUP; - } - } + /* make sure that my_smp_rank has been defined */ + if(-1 == ptl_sm->my_smp_rank){ + return_code=OMPI_ERROR; + goto CLEANUP; + } + + /* set local proc's smp rank in the peers structure for + * rapid access */ + for( proc=0 ; proc < nprocs; proc++ ) { + if(NULL != peers[proc] ) { + peers[proc]->my_smp_rank=ptl_sm->my_smp_rank; + } + } + + /* Allocate a fixed size pointer array for the 2-D Shared memory queues. + * Excess slots will be allocated for future growth. One could + * make this array growable, but then one would need to uses mutexes + * for any access to these queues to ensure data consistancy when + * the array is grown */ + + /* Note: Need to make sure that proc 0 initializes control + * structures before any of the other procs can progress */ + + /* Initizlize queue data structures + * - proc with lowest local rank does this + * - all the rest of the procs block until the queues are + * initialized + * - initial queue size is zero */ + /* free local memory */ if(sm_proc_info){ /* free the memory allocated by mca_base_modex_recv */ @@ -115,14 +167,15 @@ int mca_ptl_sm_add_procs( free(sm_proc_info); } - return OMPI_SUCCESS; + /* update the local smp process count */ + ptl_sm->num_smp_procs+=n_local_procs; CLEANUP: if(sm_proc_info){ free(sm_proc_info); } - return rc; + return return_code; } diff --git a/src/mca/ptl/sm/src/ptl_sm.h b/src/mca/ptl/sm/src/ptl_sm.h index 6ee7eb883e..b3afc994a5 100644 --- a/src/mca/ptl/sm/src/ptl_sm.h +++ b/src/mca/ptl/sm/src/ptl_sm.h @@ -82,6 +82,10 @@ extern int mca_ptl_sm_module_progress( */ struct mca_ptl_sm_t { mca_ptl_t super; /**< base PTL interface */ + int num_smp_procs; /**< current number of smp procs on this + host */ + int my_smp_rank; /**< My SMP process rank. Used for accessing + * SMP specfic data structures. */ }; typedef struct mca_ptl_sm_t mca_ptl_sm_t; diff --git a/src/mca/ptl/sm/src/ptl_sm_module.c b/src/mca/ptl/sm/src/ptl_sm_module.c index d9d63ecccb..c00e13f259 100644 --- a/src/mca/ptl/sm/src/ptl_sm_module.c +++ b/src/mca/ptl/sm/src/ptl_sm_module.c @@ -8,7 +8,7 @@ #include #include -#include "constants.h" +#include "include/constants.h" #include "event/event.h" #include "util/if.h" #include "util/argv.h" @@ -184,12 +184,19 @@ mca_ptl_t** mca_ptl_sm_module_init( if(mca_ptl_sm_module_exchange() != OMPI_SUCCESS) return 0; + /* allocate the Shared Memory PTL. Only one is being allocated */ ptls = malloc(sizeof(mca_ptl_t*)); if(NULL == ptls) return NULL; *ptls = &mca_ptl_sm.super; *num_ptls = 1; + + /* initialize some PTL data */ + /* start with no SM procs */ + mca_ptl_sm.num_smp_procs=0; + mca_ptl_sm.my_smp_rank=-1; + return ptls; } diff --git a/src/mca/ptl/sm/src/ptl_sm_peer.h b/src/mca/ptl/sm/src/ptl_sm_peer.h new file mode 100644 index 0000000000..9b296e9acc --- /dev/null +++ b/src/mca/ptl/sm/src/ptl_sm_peer.h @@ -0,0 +1,25 @@ +/* + * $HEADER$ + */ +/** + * @file + */ +#ifndef MCA_PTL_SM_PEER_H +#define MCA_PTL_SM_PEER_H + +/** + * An abstraction that represents a connection to a peer process. + * An instance of mca_ptl_base_peer_t is associated w/ each process + * and PTL pair at startup. + */ + +struct mca_ptl_base_peer_t { + int my_smp_rank; /**< My SMP process rank. Used for accessing + * SMP specfic data structures. */ + int peer_smp_rank; /**< My peer's SMP process rank. Used for accessing + * SMP specfic data structures. */ + +}; + +#endif + diff --git a/src/mca/ptl/sm/src/ptl_sm_sendfrag.c b/src/mca/ptl/sm/src/ptl_sm_sendfrag.c index cfbe848005..023001222d 100644 --- a/src/mca/ptl/sm/src/ptl_sm_sendfrag.c +++ b/src/mca/ptl/sm/src/ptl_sm_sendfrag.c @@ -4,7 +4,7 @@ #include #include #include -#include "types.h" +#include "include/types.h" #include "datatype/datatype.h" #include "ptl_sm.h" #include "ptl_sm_sendfrag.h" diff --git a/src/mca/ptl/sm/src/ptl_sm_sendreq.c b/src/mca/ptl/sm/src/ptl_sm_sendreq.c index 967ca4ffdf..5e414eccc4 100644 --- a/src/mca/ptl/sm/src/ptl_sm_sendreq.c +++ b/src/mca/ptl/sm/src/ptl_sm_sendreq.c @@ -4,7 +4,7 @@ #include #include #include -#include "types.h" +#include "include/types.h" #include "mca/pml/base/pml_base_sendreq.h" #include "ptl_sm.h" #include "ptl_sm_sendreq.h"