* add ability to try a couple of different collectives for fence
synchronization to see which gives the best performance This commit was SVN r9314.
Этот коммит содержится в:
родитель
8a9e76dfa3
Коммит
234adb292b
@ -109,6 +109,11 @@ struct ompi_osc_pt2pt_module_t {
|
||||
/* an array of <sizeof(p2p_comm)> ints, each containing the value
|
||||
1. */
|
||||
int *p2p_fence_coll_counts;
|
||||
/* an array of <sizeof(p2p_comm)> shorts, for use in experimenting
|
||||
with different synchronization costs */
|
||||
short *p2p_fence_coll_results;
|
||||
|
||||
enum { OSC_SYNC_REDUCE_SCATTER, OSC_SYNC_ALLREDUCE, OSC_SYNC_ALLTOALL } p2p_fence_sync_type;
|
||||
|
||||
/* ********************* PWSC data ************************ */
|
||||
|
||||
@ -163,9 +168,6 @@ extern "C" {
|
||||
/*
|
||||
* Component functions
|
||||
*/
|
||||
int ompi_osc_pt2pt_component_open(void);
|
||||
|
||||
int ompi_osc_pt2pt_component_close(void);
|
||||
|
||||
int ompi_osc_pt2pt_component_init(bool enable_progress_threads,
|
||||
bool enable_mpi_threads);
|
||||
|
@ -34,6 +34,8 @@
|
||||
#include "ompi/mca/bml/base/base.h"
|
||||
#include "ompi/datatype/dt_arch.h"
|
||||
|
||||
static int ompi_osc_pt2pt_component_open(void);
|
||||
|
||||
ompi_osc_pt2pt_component_t mca_osc_pt2pt_component = {
|
||||
{ /* ompi_osc_base_component_t */
|
||||
{ /* ompi_base_component_t */
|
||||
@ -42,7 +44,7 @@ ompi_osc_pt2pt_component_t mca_osc_pt2pt_component = {
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
ompi_osc_pt2pt_component_open,
|
||||
NULL
|
||||
},
|
||||
{ /* mca_base_component_data */
|
||||
@ -111,6 +113,20 @@ want_locks(ompi_info_t *info)
|
||||
return !no_locks;
|
||||
}
|
||||
|
||||
static int fence_sync_index;
|
||||
|
||||
static int
|
||||
ompi_osc_pt2pt_component_open(void)
|
||||
{
|
||||
fence_sync_index =
|
||||
mca_base_param_reg_string(&mca_osc_pt2pt_component.super.osc_version,
|
||||
"fence_sync_method",
|
||||
"How to synchronize fence: reduce_scatter, allreduce, alltoall",
|
||||
false, false, "reduce_scatter", NULL);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
ompi_osc_pt2pt_component_init(bool enable_progress_threads,
|
||||
@ -195,6 +211,8 @@ ompi_osc_pt2pt_component_select(ompi_win_t *win,
|
||||
{
|
||||
ompi_osc_pt2pt_module_t *module;
|
||||
int ret, i;
|
||||
char *sync_string;
|
||||
|
||||
/* create module structure */
|
||||
module = malloc(sizeof(ompi_osc_pt2pt_module_t));
|
||||
if (NULL == module) return OMPI_ERROR;
|
||||
@ -273,6 +291,35 @@ ompi_osc_pt2pt_component_select(ompi_win_t *win,
|
||||
module->p2p_fence_coll_counts[i] = 1;
|
||||
}
|
||||
|
||||
module->p2p_fence_coll_results = malloc(sizeof(int) *
|
||||
ompi_comm_size(module->p2p_comm));
|
||||
if (NULL == module->p2p_fence_coll_counts) {
|
||||
free(module->p2p_fence_coll_counts);
|
||||
free(module->p2p_copy_num_pending_sendreqs);
|
||||
OBJ_DESTRUCT(&module->p2p_copy_pending_sendreqs);
|
||||
OBJ_DESTRUCT(&module->p2p_long_msgs);
|
||||
free(module->p2p_num_pending_sendreqs);
|
||||
OBJ_DESTRUCT(&module->p2p_pending_sendreqs);
|
||||
ompi_comm_free(&comm);
|
||||
OBJ_DESTRUCT(&(module->p2p_acc_lock));
|
||||
OBJ_DESTRUCT(&(module->p2p_lock));
|
||||
free(module);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* figure out what sync method to use */
|
||||
mca_base_param_lookup_string(fence_sync_index, &sync_string);
|
||||
if (0 == strcmp(sync_string, "reduce_scatter")) {
|
||||
module->p2p_fence_sync_type = OSC_SYNC_REDUCE_SCATTER;
|
||||
} else if (0 == strcmp(sync_string, "allreduce")) {
|
||||
module->p2p_fence_sync_type = OSC_SYNC_ALLREDUCE;
|
||||
} else if (0 == strcmp(sync_string, "alltoall")) {
|
||||
module->p2p_fence_sync_type = OSC_SYNC_ALLTOALL;
|
||||
} else {
|
||||
opal_output(0, "invalid value for fence_sync_method parameter: %s\n", sync_string);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* pwsc data */
|
||||
module->p2p_pw_group = NULL;
|
||||
module->p2p_sc_group = NULL;
|
||||
|
@ -112,16 +112,51 @@ ompi_osc_pt2pt_module_fence(int assert, ompi_win_t *win)
|
||||
|
||||
ompi_osc_pt2pt_flip_sendreqs(P2P_MODULE(win));
|
||||
|
||||
/* find out how much data everyone is going to send us. Need
|
||||
to have the lock during this period so that we have a sane
|
||||
view of the number of sendreqs */
|
||||
ret = P2P_MODULE(win)->p2p_comm->
|
||||
c_coll.coll_reduce_scatter(P2P_MODULE(win)->p2p_copy_num_pending_sendreqs,
|
||||
&incoming_reqs,
|
||||
P2P_MODULE(win)->p2p_fence_coll_counts,
|
||||
MPI_SHORT,
|
||||
MPI_SUM,
|
||||
P2P_MODULE(win)->p2p_comm);
|
||||
switch (P2P_MODULE(win)->p2p_fence_sync_type) {
|
||||
|
||||
/* find out how much data everyone is going to send us. Need
|
||||
to have the lock during this period so that we have a sane
|
||||
view of the number of sendreqs */
|
||||
case OSC_SYNC_REDUCE_SCATTER:
|
||||
ret = P2P_MODULE(win)->p2p_comm->
|
||||
c_coll.coll_reduce_scatter(P2P_MODULE(win)->p2p_copy_num_pending_sendreqs,
|
||||
&incoming_reqs,
|
||||
P2P_MODULE(win)->p2p_fence_coll_counts,
|
||||
MPI_SHORT,
|
||||
MPI_SUM,
|
||||
P2P_MODULE(win)->p2p_comm);
|
||||
break;
|
||||
|
||||
case OSC_SYNC_ALLREDUCE:
|
||||
ret = P2P_MODULE(win)->p2p_comm->
|
||||
c_coll.coll_allreduce(P2P_MODULE(win)->p2p_copy_num_pending_sendreqs,
|
||||
P2P_MODULE(win)->p2p_fence_coll_results,
|
||||
ompi_comm_size(P2P_MODULE(win)->p2p_comm),
|
||||
MPI_SHORT,
|
||||
MPI_SUM,
|
||||
P2P_MODULE(win)->p2p_comm);
|
||||
incoming_reqs = P2P_MODULE(win)->
|
||||
p2p_fence_coll_results[P2P_MODULE(win)->p2p_comm->c_my_rank];
|
||||
break;
|
||||
|
||||
case OSC_SYNC_ALLTOALL:
|
||||
ret = P2P_MODULE(win)->p2p_comm->
|
||||
c_coll.coll_alltoall(P2P_MODULE(win)->p2p_copy_num_pending_sendreqs,
|
||||
1,
|
||||
MPI_SHORT,
|
||||
P2P_MODULE(win)->p2p_fence_coll_results,
|
||||
1,
|
||||
MPI_SHORT,
|
||||
P2P_MODULE(win)->p2p_comm);
|
||||
incoming_reqs = 0;
|
||||
for (i = 0 ; i < ompi_comm_size(P2P_MODULE(win)->p2p_comm) ; ++i) {
|
||||
incoming_reqs += P2P_MODULE(win)->p2p_fence_coll_results[i];
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(0 == 1);
|
||||
}
|
||||
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
/* put the stupid data back for the user. This is not
|
||||
cheap, but the user lost his data if we don't. */
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user