diff --git a/ompi/mca/btl/mx/btl_mx.c b/ompi/mca/btl/mx/btl_mx.c index 6fed175167..6dbd47ef3a 100644 --- a/ompi/mca/btl/mx/btl_mx.c +++ b/ompi/mca/btl/mx/btl_mx.c @@ -70,9 +70,18 @@ int mca_btl_mx_add_procs( ompi_bitmap_t* reachable) { mca_btl_mx_module_t* mx_btl = (mca_btl_mx_module_t*)btl; - int i, rc; + int i, rc, index; - for(i = 0; i < (int) nprocs; i++) { + /* MX seems to not be very scalable if all the processes start to connect in + * same time to the same destinattion. We can help it here if we first compute + * our rank in the list, and then we setup the connections starting with + * the next processor in the list in a round-robin fashion. + */ + for( i = 0; i < (int)nprocs; i++ ) { + if( ompi_procs[i] == ompi_proc_local_proc ) + break; + } + for( i = i % nprocs, index = 0; index < (int) nprocs; index++, i = (i + 1) % nprocs ) { struct ompi_proc_t* ompi_proc = ompi_procs[i]; mca_btl_mx_proc_t* mx_proc;