OSHMEM: MXM2: a2a perf improvement on large scale
Allow only limited number of coonections to have 'puts' that do not require remote completion ack. That will greatly improve performance of shmem_fence()/shmem_quiet() and shmem_barrier() when there are many active connections. fixed by Alex, reviewed by Miked Refs trac:3763 This commit was SVN r30573. The following Trac tickets were found above: Ticket 3763 --> https://svn.open-mpi.org/trac/ompi/ticket/3763
Этот коммит содержится в:
родитель
27a763c86c
Коммит
28949efcaf
@ -1055,6 +1055,7 @@ static inline int mca_spml_ikrit_put_internal(void* dst_addr,
|
||||
}
|
||||
#else
|
||||
if (mca_spml_ikrit.free_list_max - mca_spml_ikrit.n_active_puts <= SPML_IKRIT_PUT_LOW_WATER ||
|
||||
opal_list_get_size(&mca_spml_ikrit.active_peers) > mca_spml_ikrit.unsync_conn_max ||
|
||||
(mca_spml_ikrit.mxm_peers[dst]->n_active_puts + 1) % SPML_IKRIT_PACKETS_PER_SYNC == 0) {
|
||||
put_req->mxm_req.flags = 0;
|
||||
need_progress = 1;
|
||||
|
@ -94,6 +94,9 @@ struct mca_spml_ikrit_t {
|
||||
int n_relays; /* number of procs/node serving as relays */
|
||||
|
||||
char *mxm_tls;
|
||||
#if MXM_API >= MXM_VERSION(2,0)
|
||||
int unsync_conn_max;
|
||||
#endif
|
||||
};
|
||||
|
||||
typedef struct mca_spml_ikrit_t mca_spml_ikrit_t;
|
||||
|
@ -125,6 +125,13 @@ static int mca_spml_ikrit_component_register(void)
|
||||
0,
|
||||
#endif
|
||||
"[integer] Minimal allowed job's NP to activate ikrit");
|
||||
#if MXM_API >= MXM_VERSION(2,0)
|
||||
mca_spml_ikrit.unsync_conn_max =
|
||||
mca_spml_ikrit_param_register_int("unsync_conn_max",
|
||||
8,
|
||||
"[integer] Max number of connections that do not require notification of PUT operation remote completion. Increasing this number improves efficiency of p2p communication but increases overhead of shmem_fence/shmem_quiet/shmem_barrier");
|
||||
#endif
|
||||
|
||||
if (oshmem_num_procs() < np) {
|
||||
SPML_VERBOSE(1,
|
||||
"Not enough ranks (%d<%d), disqualifying spml/ikrit",
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user