1
1

OSHMEM: MXM2: a2a perf improvement on large scale

Allow only limited number of coonections to have 'puts'
that do not require remote completion ack. That will
greatly improve performance of shmem_fence()/shmem_quiet()
and shmem_barrier() when there are many active connections.

fixed by Alex, reviewed by Miked

Refs trac:3763

This commit was SVN r30573.

The following Trac tickets were found above:
  Ticket 3763 --> https://svn.open-mpi.org/trac/ompi/ticket/3763
Этот коммит содержится в:
Mike Dubman 2014-02-06 08:42:45 +00:00
родитель 27a763c86c
Коммит 28949efcaf
3 изменённых файлов: 11 добавлений и 0 удалений

Просмотреть файл

@ -1055,6 +1055,7 @@ static inline int mca_spml_ikrit_put_internal(void* dst_addr,
}
#else
if (mca_spml_ikrit.free_list_max - mca_spml_ikrit.n_active_puts <= SPML_IKRIT_PUT_LOW_WATER ||
opal_list_get_size(&mca_spml_ikrit.active_peers) > mca_spml_ikrit.unsync_conn_max ||
(mca_spml_ikrit.mxm_peers[dst]->n_active_puts + 1) % SPML_IKRIT_PACKETS_PER_SYNC == 0) {
put_req->mxm_req.flags = 0;
need_progress = 1;

Просмотреть файл

@ -94,6 +94,9 @@ struct mca_spml_ikrit_t {
int n_relays; /* number of procs/node serving as relays */
char *mxm_tls;
#if MXM_API >= MXM_VERSION(2,0)
int unsync_conn_max;
#endif
};
typedef struct mca_spml_ikrit_t mca_spml_ikrit_t;

Просмотреть файл

@ -125,6 +125,13 @@ static int mca_spml_ikrit_component_register(void)
0,
#endif
"[integer] Minimal allowed job's NP to activate ikrit");
#if MXM_API >= MXM_VERSION(2,0)
mca_spml_ikrit.unsync_conn_max =
mca_spml_ikrit_param_register_int("unsync_conn_max",
8,
"[integer] Max number of connections that do not require notification of PUT operation remote completion. Increasing this number improves efficiency of p2p communication but increases overhead of shmem_fence/shmem_quiet/shmem_barrier");
#endif
if (oshmem_num_procs() < np) {
SPML_VERBOSE(1,
"Not enough ranks (%d<%d), disqualifying spml/ikrit",