Two changes to improve the sm situation with spawn:

* have the mpool size be based on MCW, not num procs in other jobs we know about. Solves the problem of the spawned job having a much bigger than needed sm file * Can't assume that "me" is in the list of procs passed to addprocs, so need to use slightly different logic and not go through all of add procs unless there's a proc in my job that isn't me. This seems to greatly improve the situation, although there still seems to be more of a slowdown through MPI_INIT for the children (if there are more than one child) than MPI_INIT for the parent if there are 'n' children compared to 'n' parents. Hopefully that made sense ;) This commit was SVN r13417.
2007-02-01 17:18:35 +00:00 · 2007-02-01 17:18:35 +00:00 · 58b325b03f
--- a/ompi/mca/btl/sm/btl_sm.c
+++ b/ompi/mca/btl/sm/btl_sm.c
@ -163,6 +163,7 @@ int mca_btl_sm_add_procs_same_base_addr(
    ptrdiff_t diff;
    volatile char **tmp_ptr;
    volatile int *tmp_int_ptr;
+    bool have_connected_peer = false;

    /* initializion */
    for( i = 0 ; i < nprocs ; i++ ) {
@ -201,20 +202,24 @@ int mca_btl_sm_add_procs_same_base_addr(
 #endif
        struct mca_btl_base_endpoint_t *peer;

-        /* check to see if this is me */
-        if( my_proc == procs[proc] ) {
-            mca_btl_sm_component.my_smp_rank = n_local_procs;
-        }
-
        /* check to see if this proc can be reached via shmem (i.e.,
           if they're on my local host and in my job) */
-        else if (procs[proc]->proc_name.jobid != my_proc->proc_name.jobid ||
+        if (procs[proc]->proc_name.jobid != my_proc->proc_name.jobid ||
                 0 == (procs[proc]->proc_flags & OMPI_PROC_FLAG_LOCAL)) {
            continue;
        }

        /* If we got here, the proc is reachable via sm.  So
           initialize the peers information */
+
+        /* check to see if this is me */
+        if( my_proc == procs[proc] ) {
+            mca_btl_sm_component.my_smp_rank = n_local_procs;
+        } else {
+            /* we have someone to talk to */
+            have_connected_peer = true;
+        }
+
        peer = peers[proc] = (struct mca_btl_base_endpoint_t*)malloc(sizeof(struct mca_btl_base_endpoint_t));
        if( NULL == peer ){
            return_code=OMPI_ERR_OUT_OF_RESOURCE;
@ -236,8 +241,8 @@ int mca_btl_sm_add_procs_same_base_addr(
        mca_btl_sm_component.sm_proc_connect[proc]=SM_CONNECTED;
    }

-    /* There is always at least a local proc (myself). */
-    if( n_local_procs == 1) {
+    /* jump out if there's not someone we can talk to */
+    if (!have_connected_peer) {
        return_code = OMPI_SUCCESS;
        goto CLEANUP;
    }
--- a/ompi/mca/mpool/sm/mpool_sm_component.c
+++ b/ompi/mca/mpool/sm/mpool_sm_component.c
@ -136,7 +136,9 @@ static mca_mpool_base_module_t* mca_mpool_sm_init(
    mca_base_param_lookup_int(min_size_param, &min_size);
    mca_base_param_lookup_int(peer_size_param, &peer_size);

-    procs = ompi_proc_all(&num_all_procs);
+    /* README: this needs to change if procs in different jobs (even
+       spawned ones) are to talk using shared memory */
+    procs = ompi_proc_world(&num_all_procs);
    for (i = 0 ; i < num_all_procs ; ++i) {
        if (procs[i]->proc_flags & OMPI_PROC_FLAG_LOCAL) {
            num_local_procs++;