From 428add390ead3f467a0613313911e9b57ff64ed6 Mon Sep 17 00:00:00 2001 From: Alex Mikheev Date: Wed, 3 Dec 2014 15:36:45 +0200 Subject: [PATCH] OSHMEM: spml ikrit: add skew to connect/disconnect Each pe connects/disconnects starting from itself instead of pe=0. This will distribute network traffic in a more friendly way. --- oshmem/mca/spml/ikrit/spml_ikrit.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/oshmem/mca/spml/ikrit/spml_ikrit.c b/oshmem/mca/spml/ikrit/spml_ikrit.c index 555fab46da..9bb3c34f08 100644 --- a/oshmem/mca/spml/ikrit/spml_ikrit.c +++ b/oshmem/mca/spml/ikrit/spml_ikrit.c @@ -341,8 +341,9 @@ OBJ_CLASS_INSTANCE( mxm_peer_t, int mca_spml_ikrit_del_procs(oshmem_proc_t** procs, size_t nprocs) { - size_t i; + size_t i, n; opal_list_item_t *item; + int my_rank = oshmem_my_proc_id(); #if MXM_API >= MXM_VERSION(2,0) if (mca_spml_ikrit.bulk_disconnect) { @@ -354,7 +355,8 @@ int mca_spml_ikrit_del_procs(oshmem_proc_t** procs, size_t nprocs) }; OBJ_DESTRUCT(&mca_spml_ikrit.active_peers); - for (i = 0; i < nprocs; i++) { + for (n = 0; n < nprocs; n++) { + i = (my_rank + n) % nprocs; if (mca_spml_ikrit.mxm_peers[i]->mxm_conn) { mxm_ep_disconnect(mca_spml_ikrit.mxm_peers[i]->mxm_conn); } @@ -384,7 +386,7 @@ int mca_spml_ikrit_add_procs(oshmem_proc_t** procs, size_t nprocs) size_t mxm_addr_len = MXM_MAX_ADDR_LEN; #endif mxm_error_t err; - size_t i; + size_t i, n; int rc = OSHMEM_ERROR; oshmem_proc_t *proc_self; int my_rank = oshmem_my_proc_id(); @@ -455,8 +457,11 @@ int mca_spml_ikrit_add_procs(oshmem_proc_t** procs, size_t nprocs) opal_progress_register(spml_ikrit_progress); /* Get the EP connection requests for all the processes from modex */ - for (i = 0; i < nprocs; ++i) { + for (n = 0; n < nprocs; ++n) { + /* mxm 2.0 keeps its connections on a list. Make sure + * that list have different order on every rank */ + i = (my_rank + n) % nprocs; mca_spml_ikrit.mxm_peers[i] = OBJ_NEW(mxm_peer_t); if (NULL == mca_spml_ikrit.mxm_peers[i]) { rc = OSHMEM_ERR_OUT_OF_RESOURCE;