1
1

coll/base: add knomial tree algorithm for MPI_Bcast

Signed-off-by: Mikhail Kurnosov <mkurnosov@gmail.com>
Этот коммит содержится в:
Mikhail Kurnosov 2018-06-06 10:34:04 +07:00 коммит произвёл Nathan Hjelm
родитель a64abadf97
Коммит 6547b58316
7 изменённых файлов: 176 добавлений и 8 удалений

Просмотреть файл

@ -662,5 +662,54 @@ ompi_coll_base_bcast_intra_basic_linear(void *buff, int count,
return err;
}
/* copied function (with appropriate renaming) ends here */
/*
* ompi_coll_base_bcast_intra_knomial
*
* Function: Bcast using k-nomial tree algorithm
* Accepts: Same arguments as MPI_Bcast
* Returns: MPI_SUCCESS or error code
* Parameters: radix -- k-nomial tree radix (>= 2)
*
* Time complexity: (radix - 1)O(\log_{radix}(comm_size))
*
* Example, comm_size=10
* radix=2 radix=3 radix=4
* 0 0 0
* / / \ \ / / | \ \ / / \ \ \
* 8 4 2 1 9 3 6 1 2 4 8 1 2 3
* | |\ | |\ |\ /|\ |
* 9 6 5 3 4 5 7 8 5 6 7 9
* |
* 7
*/
int ompi_coll_base_bcast_intra_knomial(
void *buf, int count, struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm, mca_coll_base_module_t *module,
uint32_t segsize, int radix)
{
int segcount = count;
size_t typesize;
mca_coll_base_comm_t *data = module->base_data;
COLL_BASE_UPDATE_KMTREE(comm, module, root, radix);
if (NULL == data->cached_kmtree) {
/* Failed to build k-nomial tree for given radix */
return ompi_coll_base_bcast_intra_binomial(buf, count, datatype, root, comm, module,
segcount);
}
/**
* Determine number of elements sent per operation.
*/
ompi_datatype_type_size(datatype, &typesize);
COLL_BASE_COMPUTED_SEGCOUNT(segsize, typesize, segcount);
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
"coll:base:bcast_intra_knomial rank %d segsize %5d typesize %lu segcount %d",
ompi_comm_rank(comm), segsize, (unsigned long)typesize, segcount));
return ompi_coll_base_bcast_intra_generic(buf, count, datatype, root, comm, module,
segcount, data->cached_kmtree);
}

Просмотреть файл

@ -96,6 +96,9 @@ coll_base_comm_destruct(mca_coll_base_comm_t *data)
if (data->cached_in_order_bmtree) { /* destroy bmtree if defined */
ompi_coll_base_topo_destroy_tree (&data->cached_in_order_bmtree);
}
if (data->cached_kmtree) { /* destroy kmtree if defined */
ompi_coll_base_topo_destroy_tree (&data->cached_kmtree);
}
if (data->cached_chain) { /* destroy general chain if defined */
ompi_coll_base_topo_destroy_tree (&data->cached_chain);
}

Просмотреть файл

@ -221,6 +221,7 @@ int ompi_coll_base_bcast_intra_pipeline(BCAST_ARGS, uint32_t segsize);
int ompi_coll_base_bcast_intra_binomial(BCAST_ARGS, uint32_t segsize);
int ompi_coll_base_bcast_intra_bintree(BCAST_ARGS, uint32_t segsize);
int ompi_coll_base_bcast_intra_split_bintree(BCAST_ARGS, uint32_t segsize);
int ompi_coll_base_bcast_intra_knomial(BCAST_ARGS, uint32_t segsize, int radix);
/* Exscan */
int ompi_coll_base_exscan_intra_recursivedoubling(EXSCAN_ARGS);
@ -313,6 +314,22 @@ do {
} \
} while (0)
#define COLL_BASE_UPDATE_KMTREE(OMPI_COMM, BASE_MODULE, ROOT, RADIX) \
do { \
mca_coll_base_comm_t* coll_comm = (BASE_MODULE)->base_data; \
if (!((coll_comm->cached_kmtree) \
&& (coll_comm->cached_kmtree_root == (ROOT)) \
&& (coll_comm->cached_kmtree_radix == (RADIX)))) \
{ \
if (coll_comm->cached_kmtree ) { /* destroy previous k-nomial tree if defined */ \
ompi_coll_base_topo_destroy_tree(&(coll_comm->cached_kmtree)); \
} \
coll_comm->cached_kmtree = ompi_coll_base_topo_build_kmtree((OMPI_COMM), (ROOT), (RADIX)); \
coll_comm->cached_kmtree_root = (ROOT); \
coll_comm->cached_kmtree_radix = (RADIX); \
} \
} while (0)
#define COLL_BASE_UPDATE_PIPELINE( OMPI_COMM, BASE_MODULE, ROOT ) \
do { \
mca_coll_base_comm_t* coll_comm = (BASE_MODULE)->base_data; \
@ -431,6 +448,11 @@ struct mca_coll_base_comm_t {
ompi_coll_tree_t *cached_in_order_bmtree;
int cached_in_order_bmtree_root;
/* k-nomial tree */
ompi_coll_tree_t *cached_kmtree;
int cached_kmtree_root;
int cached_kmtree_radix;
/* chained tree (fanout followed by pipelines) */
ompi_coll_tree_t *cached_chain;
int cached_chain_root;

Просмотреть файл

@ -102,7 +102,7 @@ ompi_coll_base_topo_build_tree( int fanout,
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
tree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
tree = (ompi_coll_tree_t*)malloc(COLL_TREE_SIZE(MAXTREEFANOUT));
if (!tree) {
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo_build_tree PANIC::out of memory"));
return NULL;
@ -200,7 +200,7 @@ ompi_coll_base_topo_build_in_order_bintree( struct ompi_communicator_t* comm )
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
tree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
tree = (ompi_coll_tree_t*)malloc(COLL_TREE_SIZE(MAXTREEFANOUT));
if (!tree) {
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
"coll:base:topo_build_tree PANIC::out of memory"));
@ -339,7 +339,7 @@ ompi_coll_base_topo_build_bmtree( struct ompi_communicator_t* comm,
index = rank -root;
bmtree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
bmtree = (ompi_coll_tree_t*)malloc(COLL_TREE_SIZE(MAXTREEFANOUT));
if (!bmtree) {
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree PANIC out of memory"));
return NULL;
@ -416,7 +416,7 @@ ompi_coll_base_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
vrank = (rank - root + size) % size;
bmtree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
bmtree = (ompi_coll_tree_t*)malloc(COLL_TREE_SIZE(MAXTREEFANOUT));
if (!bmtree) {
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree PANIC out of memory"));
return NULL;
@ -457,6 +457,75 @@ ompi_coll_base_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
return bmtree;
}
/*
* ompi_coll_base_topo_build_kmtree: Build k-nomial tree for Bcast
*
* Example, comm_size=10
* radix=2 radix=3 radix=4
* 0 0 0
* / / \ \ / / | \ \ / / \ \ \
* 8 4 2 1 9 3 6 1 2 4 8 1 2 3
* | |\ | |\ |\ /|\ |
* 9 6 5 3 4 5 7 8 5 6 7 9
* |
* 7
*/
ompi_coll_tree_t*
ompi_coll_base_topo_build_kmtree(struct ompi_communicator_t* comm,
int root, int radix)
{
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
"coll:base:topo:build_kmtree root %d, radix %d", root, radix));
int comm_size = ompi_comm_size(comm);
int rank = ompi_comm_rank(comm);
/* nchilds <= (radix - 1) * \ceil(\log_{radix}(comm_size)) */
int log_radix = 0;
for (int i = 1; i < comm_size; i *= radix)
log_radix++;
int nchilds_max = (radix - 1) * log_radix;
int vrank = (rank - root + comm_size) % comm_size;
ompi_coll_tree_t *kmtree = malloc(COLL_TREE_SIZE(nchilds_max));
if (NULL == kmtree) {
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
"coll:base:topo:build_kmtree PANIC out of memory"));
return NULL;
}
kmtree->tree_bmtree = 0;
kmtree->tree_root = root;
kmtree->tree_prev = MPI_PROC_NULL;
kmtree->tree_nextsize = 0;
/* Setup parent */
int mask = 0x1;
while (mask < comm_size) {
if (vrank % (radix * mask)) {
kmtree->tree_prev = vrank / (radix * mask) * (radix * mask);
kmtree->tree_prev = (kmtree->tree_prev + root) % comm_size;
break;
}
mask *= radix;
}
/* Setup childs */
mask /= radix;
int nchilds = 0;
while (mask > 0) {
for (int r = 1; r < radix; r++) {
int child = vrank + mask * r;
if (child < comm_size) {
child = (child + root) % comm_size;
kmtree->tree_next[nchilds] = child;
nchilds++;
}
}
mask /= radix;
}
kmtree->tree_nextsize = nchilds;
return kmtree;
}
ompi_coll_tree_t*
ompi_coll_base_topo_build_chain( int fanout,
@ -486,7 +555,7 @@ ompi_coll_base_topo_build_chain( int fanout,
/*
* Allocate space for topology arrays if needed
*/
chain = (ompi_coll_tree_t*)malloc( sizeof(ompi_coll_tree_t) );
chain = (ompi_coll_tree_t*)malloc(COLL_TREE_SIZE(MAXTREEFANOUT));
if (!chain) {
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_chain PANIC out of memory"));
fflush(stdout);

Просмотреть файл

@ -23,6 +23,10 @@
#define MAXTREEFANOUT 32
#define MEMBSIZE(type, member) (sizeof(((type *)0)->member))
#define COLL_TREE_SIZE(fanout) \
(sizeof(ompi_coll_tree_t) + ((fanout) - 1) * MEMBSIZE(ompi_coll_tree_t, tree_next[1]))
BEGIN_C_DECLS
typedef struct ompi_coll_tree_t {
@ -30,8 +34,8 @@ typedef struct ompi_coll_tree_t {
int32_t tree_fanout;
int32_t tree_bmtree;
int32_t tree_prev;
int32_t tree_next[MAXTREEFANOUT];
int32_t tree_nextsize;
int32_t tree_next[1];
} ompi_coll_tree_t;
ompi_coll_tree_t*
@ -47,6 +51,11 @@ ompi_coll_base_topo_build_bmtree( struct ompi_communicator_t* comm,
ompi_coll_tree_t*
ompi_coll_base_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
int root );
ompi_coll_tree_t*
ompi_coll_base_topo_build_kmtree(struct ompi_communicator_t* comm,
int root, int radix);
ompi_coll_tree_t*
ompi_coll_base_topo_build_chain( int fanout,
struct ompi_communicator_t* com,

Просмотреть файл

@ -30,6 +30,8 @@ static int coll_tuned_bcast_forced_algorithm = 0;
static int coll_tuned_bcast_segment_size = 0;
static int coll_tuned_bcast_tree_fanout;
static int coll_tuned_bcast_chain_fanout;
/* k-nomial tree radix for the bcast algorithm (>= 2) */
static int coll_tuned_bcast_knomial_radix = 4;
/* valid values for coll_tuned_bcast_forced_algorithm */
static mca_base_var_enum_value_t bcast_algorithms[] = {
@ -40,6 +42,7 @@ static mca_base_var_enum_value_t bcast_algorithms[] = {
{4, "split_binary_tree"},
{5, "binary_tree"},
{6, "binomial"},
{7, "knomial"},
{0, NULL}
};
@ -75,7 +78,7 @@ int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mc
mca_param_indices->algorithm_param_index =
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"bcast_algorithm",
"Which bcast algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 chain, 3: pipeline, 4: split binary tree, 5: binary tree, 6: binomial tree.",
"Which bcast algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 chain, 3: pipeline, 4: split binary tree, 5: binary tree, 6: binomial tree, 7: knomial tree.",
MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_ALL,
@ -115,6 +118,14 @@ int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mc
MCA_BASE_VAR_SCOPE_ALL,
&coll_tuned_bcast_chain_fanout);
coll_tuned_bcast_knomial_radix = 4;
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"bcast_algorithm_knomial_radix",
"k-nomial tree radix for the bcast algorithm (radix > 1).",
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_ALL,
&coll_tuned_bcast_knomial_radix);
return (MPI_SUCCESS);
}
@ -143,6 +154,9 @@ int ompi_coll_tuned_bcast_intra_do_this(void *buf, int count,
return ompi_coll_base_bcast_intra_bintree( buf, count, dtype, root, comm, module, segsize );
case (6):
return ompi_coll_base_bcast_intra_binomial( buf, count, dtype, root, comm, module, segsize );
case (7):
return ompi_coll_base_bcast_intra_knomial(buf, count, dtype, root, comm, module,
segsize, coll_tuned_bcast_knomial_radix);
} /* switch */
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST]));

Просмотреть файл

@ -259,6 +259,8 @@ tuned_module_enable( mca_coll_base_module_t *module,
data->cached_bmtree = NULL;
/* binomial tree */
data->cached_in_order_bmtree = NULL;
/* k-nomial tree */
data->cached_kmtree = NULL;
/* chains (fanout followed by pipelines) */
data->cached_chain = NULL;
/* standard pipeline */