1
1

coll/libnbc: add knomial tree algorithm for MPI_Ibcast

Signed-off-by: Mikhail Kurnosov <mkurnosov@gmail.com>
(cherry picked from commit b0429d25dfb1ed3f9adfa45169478cada0ba2675)
Signed-off-by: Brian Barrett <bbarrett@amazon.com>
Этот коммит содержится в:
Mikhail Kurnosov 2018-09-25 21:14:09 +07:00 коммит произвёл Brian Barrett
родитель a318f117f6
Коммит 6971dab943
3 изменённых файлов: 111 добавлений и 13 удалений

Просмотреть файл

@ -70,6 +70,8 @@ BEGIN_C_DECLS
#define NBC_NUM_COLL 17
extern bool libnbc_ibcast_skip_dt_decision;
extern int libnbc_ibcast_algorithm;
extern int libnbc_ibcast_knomial_radix;
extern int libnbc_iexscan_algorithm;
extern int libnbc_ireduce_algorithm;
extern int libnbc_iscan_algorithm;

Просмотреть файл

@ -46,6 +46,17 @@ static int libnbc_priority = 10;
static bool libnbc_in_progress = false; /* protect from recursive calls */
bool libnbc_ibcast_skip_dt_decision = true;
int libnbc_ibcast_algorithm = 0; /* ibcast user forced algorithm */
int libnbc_ibcast_knomial_radix = 4;
static mca_base_var_enum_value_t ibcast_algorithms[] = {
{0, "ignore"},
{1, "linear"},
{2, "binomial"},
{3, "chain"},
{4, "knomial"},
{0, NULL}
};
int libnbc_iexscan_algorithm = 0; /* iexscan user forced algorithm */
static mca_base_var_enum_value_t iexscan_algorithms[] = {
{0, "ignore"},
@ -184,6 +195,24 @@ libnbc_register(void)
MCA_BASE_VAR_SCOPE_READONLY,
&libnbc_ibcast_skip_dt_decision);
libnbc_ibcast_algorithm = 0;
(void) mca_base_var_enum_create("coll_libnbc_ibcast_algorithms", ibcast_algorithms, &new_enum);
mca_base_component_var_register(&mca_coll_libnbc_component.super.collm_version,
"ibcast_algorithm",
"Which ibcast algorithm is used: 0 ignore, 1 linear, 2 binomial, 3 chain, 4 knomial",
MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_ALL,
&libnbc_ibcast_algorithm);
OBJ_RELEASE(new_enum);
libnbc_ibcast_knomial_radix = 4;
(void) mca_base_component_var_register(&mca_coll_libnbc_component.super.collm_version,
"ibcast_knomial_radix", "k-nomial tree radix for the ibcast algorithm (radix > 1)",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&libnbc_ibcast_knomial_radix);
libnbc_iexscan_algorithm = 0;
(void) mca_base_var_enum_create("coll_libnbc_iexscan_algorithms", iexscan_algorithms, &new_enum);
mca_base_component_var_register(&mca_coll_libnbc_component.super.collm_version,

Просмотреть файл

@ -26,6 +26,8 @@ static inline int bcast_sched_linear(int rank, int p, int root, NBC_Schedule *sc
MPI_Datatype datatype);
static inline int bcast_sched_chain(int rank, int p, int root, NBC_Schedule *schedule, void *buffer, int count,
MPI_Datatype datatype, int fragsize, size_t size);
static inline int bcast_sched_knomial(int rank, int comm_size, int root, NBC_Schedule *schedule, void *buf,
int count, MPI_Datatype datatype, int knomial_radix);
#ifdef NBC_CACHE_SCHEDULE
/* tree comparison function for schedule cache */
@ -55,7 +57,7 @@ static int nbc_bcast_init(void *buffer, int count, MPI_Datatype datatype, int ro
#ifdef NBC_CACHE_SCHEDULE
NBC_Bcast_args *args, *found, search;
#endif
enum { NBC_BCAST_LINEAR, NBC_BCAST_BINOMIAL, NBC_BCAST_CHAIN } alg;
enum { NBC_BCAST_LINEAR, NBC_BCAST_BINOMIAL, NBC_BCAST_CHAIN, NBC_BCAST_KNOMIAL } alg;
ompi_coll_libnbc_module_t *libnbc_module = (ompi_coll_libnbc_module_t*) module;
rank = ompi_comm_rank (comm);
@ -73,25 +75,40 @@ static int nbc_bcast_init(void *buffer, int count, MPI_Datatype datatype, int ro
segsize = 16384;
/* algorithm selection */
if( libnbc_ibcast_skip_dt_decision ) {
if (p <= 4) {
alg = NBC_BCAST_LINEAR;
if (libnbc_ibcast_algorithm == 0) {
if( libnbc_ibcast_skip_dt_decision ) {
if (p <= 4) {
alg = NBC_BCAST_LINEAR;
}
else {
alg = NBC_BCAST_BINOMIAL;
}
}
else {
alg = NBC_BCAST_BINOMIAL;
if (p <= 4) {
alg = NBC_BCAST_LINEAR;
} else if (size * count < 65536) {
alg = NBC_BCAST_BINOMIAL;
} else if (size * count < 524288) {
alg = NBC_BCAST_CHAIN;
segsize = 8192;
} else {
alg = NBC_BCAST_CHAIN;
segsize = 32768;
}
}
}
else {
if (p <= 4) {
} else {
/* user forced dynamic decision */
if (libnbc_ibcast_algorithm == 1) {
alg = NBC_BCAST_LINEAR;
} else if (size * count < 65536) {
} else if (libnbc_ibcast_algorithm == 2) {
alg = NBC_BCAST_BINOMIAL;
} else if (size * count < 524288) {
} else if (libnbc_ibcast_algorithm == 3) {
alg = NBC_BCAST_CHAIN;
segsize = 8192;
} else if (libnbc_ibcast_algorithm == 4 && libnbc_ibcast_knomial_radix > 1) {
alg = NBC_BCAST_KNOMIAL;
} else {
alg = NBC_BCAST_CHAIN;
segsize = 32768;
alg = NBC_BCAST_LINEAR;
}
}
@ -119,6 +136,9 @@ static int nbc_bcast_init(void *buffer, int count, MPI_Datatype datatype, int ro
case NBC_BCAST_CHAIN:
res = bcast_sched_chain(rank, p, root, schedule, buffer, count, datatype, segsize, size);
break;
case NBC_BCAST_KNOMIAL:
res = bcast_sched_knomial(rank, p, root, schedule, buffer, count, datatype, libnbc_ibcast_knomial_radix);
break;
}
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) {
@ -342,6 +362,53 @@ static inline int bcast_sched_chain(int rank, int p, int root, NBC_Schedule *sch
return OMPI_SUCCESS;
}
/*
* bcast_sched_knomial:
*
* Description: an implementation of Ibcast using k-nomial tree algorithm
*
* Time: (radix - 1)O(log_{radix}(comm_size))
* Memory: O(reqs_max)
* Schedule length (rounds): O(log(comm_size))
*/
static inline int bcast_sched_knomial(
int rank, int comm_size, int root, NBC_Schedule *schedule, void *buf,
int count, MPI_Datatype datatype, int knomial_radix)
{
int res = OMPI_SUCCESS;
/* Receive from parent */
int vrank = (rank - root + comm_size) % comm_size;
int mask = 0x1;
while (mask < comm_size) {
if (vrank % (knomial_radix * mask)) {
int parent = vrank / (knomial_radix * mask) * (knomial_radix * mask);
parent = (parent + root) % comm_size;
res = NBC_Sched_recv(buf, false, count, datatype, parent, schedule, true);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
break;
}
mask *= knomial_radix;
}
mask /= knomial_radix;
/* Send data to all children */
while (mask > 0) {
for (int r = 1; r < knomial_radix; r++) {
int child = vrank + mask * r;
if (child < comm_size) {
child = (child + root) % comm_size;
res = NBC_Sched_send(buf, false, count, datatype, child, schedule, false);
if (OPAL_UNLIKELY(OMPI_SUCCESS != res)) { goto cleanup_and_return; }
}
}
mask /= knomial_radix;
}
cleanup_and_return:
return res;
}
static int nbc_bcast_inter_init(void *buffer, int count, MPI_Datatype datatype, int root,
struct ompi_communicator_t *comm, ompi_request_t ** request,
struct mca_coll_base_module_2_3_0_t *module, bool persistent) {