From 58b325b03faca0ae8b7761ebe2ef6574d6888308 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Thu, 1 Feb 2007 17:18:35 +0000 Subject: [PATCH] Two changes to improve the sm situation with spawn: * have the mpool size be based on MCW, not num procs in other jobs we know about. Solves the problem of the spawned job having a much bigger than needed sm file * Can't assume that "me" is in the list of procs passed to addprocs, so need to use slightly different logic and not go through all of add procs unless there's a proc in my job that isn't me. This seems to greatly improve the situation, although there still seems to be more of a slowdown through MPI_INIT for the children (if there are more than one child) than MPI_INIT for the parent if there are 'n' children compared to 'n' parents. Hopefully that made sense ;) This commit was SVN r13417. --- ompi/mca/btl/sm/btl_sm.c | 21 +++++++++++++-------- ompi/mca/mpool/sm/mpool_sm_component.c | 4 +++- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/ompi/mca/btl/sm/btl_sm.c b/ompi/mca/btl/sm/btl_sm.c index 4bb05c4cd3..2d23205b78 100644 --- a/ompi/mca/btl/sm/btl_sm.c +++ b/ompi/mca/btl/sm/btl_sm.c @@ -163,6 +163,7 @@ int mca_btl_sm_add_procs_same_base_addr( ptrdiff_t diff; volatile char **tmp_ptr; volatile int *tmp_int_ptr; + bool have_connected_peer = false; /* initializion */ for( i = 0 ; i < nprocs ; i++ ) { @@ -201,20 +202,24 @@ int mca_btl_sm_add_procs_same_base_addr( #endif struct mca_btl_base_endpoint_t *peer; - /* check to see if this is me */ - if( my_proc == procs[proc] ) { - mca_btl_sm_component.my_smp_rank = n_local_procs; - } - /* check to see if this proc can be reached via shmem (i.e., if they're on my local host and in my job) */ - else if (procs[proc]->proc_name.jobid != my_proc->proc_name.jobid || + if (procs[proc]->proc_name.jobid != my_proc->proc_name.jobid || 0 == (procs[proc]->proc_flags & OMPI_PROC_FLAG_LOCAL)) { continue; } /* If we got here, the proc is reachable via sm. So initialize the peers information */ + + /* check to see if this is me */ + if( my_proc == procs[proc] ) { + mca_btl_sm_component.my_smp_rank = n_local_procs; + } else { + /* we have someone to talk to */ + have_connected_peer = true; + } + peer = peers[proc] = (struct mca_btl_base_endpoint_t*)malloc(sizeof(struct mca_btl_base_endpoint_t)); if( NULL == peer ){ return_code=OMPI_ERR_OUT_OF_RESOURCE; @@ -236,8 +241,8 @@ int mca_btl_sm_add_procs_same_base_addr( mca_btl_sm_component.sm_proc_connect[proc]=SM_CONNECTED; } - /* There is always at least a local proc (myself). */ - if( n_local_procs == 1) { + /* jump out if there's not someone we can talk to */ + if (!have_connected_peer) { return_code = OMPI_SUCCESS; goto CLEANUP; } diff --git a/ompi/mca/mpool/sm/mpool_sm_component.c b/ompi/mca/mpool/sm/mpool_sm_component.c index 9f11e224ec..808c5a3da9 100644 --- a/ompi/mca/mpool/sm/mpool_sm_component.c +++ b/ompi/mca/mpool/sm/mpool_sm_component.c @@ -136,7 +136,9 @@ static mca_mpool_base_module_t* mca_mpool_sm_init( mca_base_param_lookup_int(min_size_param, &min_size); mca_base_param_lookup_int(peer_size_param, &peer_size); - procs = ompi_proc_all(&num_all_procs); + /* README: this needs to change if procs in different jobs (even + spawned ones) are to talk using shared memory */ + procs = ompi_proc_world(&num_all_procs); for (i = 0 ; i < num_all_procs ; ++i) { if (procs[i]->proc_flags & OMPI_PROC_FLAG_LOCAL) { num_local_procs++;