coll/ml: improve the buffer size calculation and ensure the bcol_index in
a hierarchy actually matches a bcol that is in use. There was a bug in one of the paths to calculate the ml buffer size. I fixed the bug and squashed all the paths together to avoid further issues (the result was correct in another path that calculated the same value). Additionally, the i_hier was being used as the bcol_index. This is not correct in a couple of cases so I added a variable to keep track of the real bcol_index. cmr=v1.8:reviewer=pasha This commit was SVN r31189.
Этот коммит содержится в:
родитель
f1dd589092
Коммит
c7d830f4b9
@ -40,6 +40,7 @@
|
|||||||
#include "opal/datatype/opal_datatype.h"
|
#include "opal/datatype/opal_datatype.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
#include "opal/util/arch.h"
|
#include "opal/util/arch.h"
|
||||||
|
#include "opal/align.h"
|
||||||
|
|
||||||
#include "coll_ml.h"
|
#include "coll_ml.h"
|
||||||
#include "coll_ml_inlines.h"
|
#include "coll_ml_inlines.h"
|
||||||
@ -449,7 +450,7 @@ static int calculate_buffer_header_size(mca_coll_ml_module_t *ml_module)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
offset = ((offset + BCOL_HEAD_ALIGN - 1) / BCOL_HEAD_ALIGN) * BCOL_HEAD_ALIGN;
|
offset = OPAL_ALIGN(offset, BCOL_HEAD_ALIGN, uint32_t);
|
||||||
/* select largest offset between multiple topologies */
|
/* select largest offset between multiple topologies */
|
||||||
if (data_offset < (int) offset) {
|
if (data_offset < (int) offset) {
|
||||||
data_offset = (int) offset;
|
data_offset = (int) offset;
|
||||||
@ -745,7 +746,6 @@ static void ml_init_k_nomial_trees(mca_coll_ml_topology_t *topo, int *list_of_ra
|
|||||||
/* now cache this on the bcol module */
|
/* now cache this on the bcol module */
|
||||||
pair->bcol_modules[0]->list_n_connected = list_n_connected;
|
pair->bcol_modules[0]->list_n_connected = list_n_connected;
|
||||||
|
|
||||||
|
|
||||||
/* I should do one more round here and figure out my offset at this level
|
/* I should do one more round here and figure out my offset at this level
|
||||||
* the calculation is simple: Am I a local leader in this level? If so, then I keep the offset
|
* the calculation is simple: Am I a local leader in this level? If so, then I keep the offset
|
||||||
* from the previous level. Else, I find out how "far away" the local leader is from me and set
|
* from the previous level. Else, I find out how "far away" the local leader is from me and set
|
||||||
@ -1322,6 +1322,7 @@ static int mca_coll_ml_read_allbcols_settings(mca_coll_ml_module_t *ml_module,
|
|||||||
int64_t frag_size;
|
int64_t frag_size;
|
||||||
const mca_bcol_base_component_2_0_0_t *bcol_component = NULL;
|
const mca_bcol_base_component_2_0_0_t *bcol_component = NULL;
|
||||||
mca_base_component_list_item_t *bcol_cli = NULL;
|
mca_base_component_list_item_t *bcol_cli = NULL;
|
||||||
|
int bcol_index;
|
||||||
|
|
||||||
/* If this assert fails, it means that you changed initialization
|
/* If this assert fails, it means that you changed initialization
|
||||||
* order and the date offset , that is critical for this section of code,
|
* order and the date offset , that is critical for this section of code,
|
||||||
@ -1332,9 +1333,9 @@ static int mca_coll_ml_read_allbcols_settings(mca_coll_ml_module_t *ml_module,
|
|||||||
|
|
||||||
/* need to figure out which bcol's are participating
|
/* need to figure out which bcol's are participating
|
||||||
* in the hierarchy across the communicator, so that we can set
|
* in the hierarchy across the communicator, so that we can set
|
||||||
* appropriate segmantation parameters.
|
* appropriate segmentation parameters.
|
||||||
*/
|
*/
|
||||||
bcols_in_use = (int *) malloc(sizeof(int) * 2 * n_hierarchies);
|
bcols_in_use = (int *) calloc(2 * n_hierarchies, sizeof(int));
|
||||||
if (OPAL_UNLIKELY(NULL == bcols_in_use)) {
|
if (OPAL_UNLIKELY(NULL == bcols_in_use)) {
|
||||||
ML_VERBOSE(10, ("Cannot allocate memory for bcols_in_use."));
|
ML_VERBOSE(10, ("Cannot allocate memory for bcols_in_use."));
|
||||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
@ -1348,10 +1349,6 @@ static int mca_coll_ml_read_allbcols_settings(mca_coll_ml_module_t *ml_module,
|
|||||||
bcols_in_use_all_ranks = bcols_in_use+n_hierarchies;
|
bcols_in_use_all_ranks = bcols_in_use+n_hierarchies;
|
||||||
|
|
||||||
/* get list of bcols that I am using */
|
/* get list of bcols that I am using */
|
||||||
for(i = 0; i < n_hierarchies; i++) {
|
|
||||||
bcols_in_use[i] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (j = 0; j < COLL_ML_TOPO_MAX; j++) {
|
for (j = 0; j < COLL_ML_TOPO_MAX; j++) {
|
||||||
mca_coll_ml_topology_t *topo_info = &ml_module->topo_list[j];
|
mca_coll_ml_topology_t *topo_info = &ml_module->topo_list[j];
|
||||||
if (COLL_ML_TOPO_DISABLED == topo_info->status) {
|
if (COLL_ML_TOPO_DISABLED == topo_info->status) {
|
||||||
@ -1391,116 +1388,63 @@ static int mca_coll_ml_read_allbcols_settings(mca_coll_ml_module_t *ml_module,
|
|||||||
/*
|
/*
|
||||||
* figure out fragmenation parameters
|
* figure out fragmenation parameters
|
||||||
*/
|
*/
|
||||||
/* can user buffers be used */
|
|
||||||
use_user_bufs = true;
|
|
||||||
bcol_cli = (mca_base_component_list_item_t *) opal_list_get_first(&mca_bcol_base_components_in_use);
|
|
||||||
|
|
||||||
for (i = 0; i < n_hierarchies; i++) {
|
|
||||||
if(!bcols_in_use_all_ranks) {
|
|
||||||
/* this bcol is not being used - do nothing */
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
bcol_component = (mca_bcol_base_component_2_0_0_t *) bcol_cli->cli_component;
|
|
||||||
/* check to see if user buffers can be used */
|
|
||||||
if(!bcol_component->can_use_user_buffers) {
|
|
||||||
/* need to use library buffers, so all will do this */
|
|
||||||
use_user_bufs = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
bcol_cli = (mca_base_component_list_item_t *) opal_list_get_next((opal_list_item_t *) bcol_cli);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* size of ml buffer */
|
/* size of ml buffer */
|
||||||
length_ml_payload = mca_coll_ml_component.payload_buffer_size - ml_module->data_offset;
|
length_ml_payload = mca_coll_ml_component.payload_buffer_size - ml_module->data_offset;
|
||||||
|
|
||||||
if (use_user_bufs) {
|
|
||||||
/*
|
|
||||||
* using user buffers
|
|
||||||
*/
|
|
||||||
ml_module->use_user_buffers = 1;
|
|
||||||
|
|
||||||
/* figure out if data will be segmented for pipelining -
|
/* figure out if data will be segmented for pipelining -
|
||||||
* for non-contigous data will just use a fragment the size
|
* for non-contigous data will just use a fragment the size
|
||||||
* of the ml payload buffer */
|
* of the ml payload buffer */
|
||||||
|
|
||||||
/* check to see if any bcols impose a limit */
|
/* check to see if any bcols impose a limit */
|
||||||
limit_size_user_bufs = false;
|
limit_size_user_bufs = false;
|
||||||
bcol_cli = (mca_base_component_list_item_t *)opal_list_get_first(&mca_bcol_base_components_in_use);
|
use_user_bufs = true;
|
||||||
for (i = 0; i < n_hierarchies; i++) {
|
frag_size = length_ml_payload;
|
||||||
bcol_component = (mca_bcol_base_component_2_0_0_t *) bcol_cli->cli_component;
|
bcol_index = 0;
|
||||||
if(bcol_component->max_frag_size != FRAG_SIZE_NO_LIMIT ){
|
|
||||||
limit_size_user_bufs = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
bcol_cli = (mca_base_component_list_item_t *) opal_list_get_next((opal_list_item_t *) bcol_cli);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (limit_size_user_bufs) {
|
OPAL_LIST_FOREACH(bcol_cli, &mca_bcol_base_components_in_use, mca_base_component_list_item_t) {
|
||||||
/* figure out fragement size */
|
|
||||||
frag_size = 0;
|
|
||||||
bcol_cli = (mca_base_component_list_item_t *) opal_list_get_first(&mca_bcol_base_components_in_use);
|
|
||||||
for (i = 0; i < n_hierarchies; i++) {
|
|
||||||
/* check to see if this bcol is being used */
|
/* check to see if this bcol is being used */
|
||||||
if(!bcols_in_use_all_ranks[i]) {
|
if(!bcols_in_use_all_ranks[bcol_index]) {
|
||||||
/* not in use */
|
/* not in use */
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
bcol_component = (mca_bcol_base_component_2_0_0_t *) bcol_cli->cli_component;
|
bcol_component = (mca_bcol_base_component_2_0_0_t *) bcol_cli->cli_component;
|
||||||
if (FRAG_SIZE_NO_LIMIT == bcol_component->max_frag_size) {
|
|
||||||
/* no limit - will not determine fragement size */
|
/* check to see if user buffers can be used */
|
||||||
continue;
|
if (!bcol_component->can_use_user_buffers) {
|
||||||
|
/* need to use library buffers, so all will do this */
|
||||||
|
use_user_bufs = false;
|
||||||
}
|
}
|
||||||
if (0 != bcol_component->max_frag_size) {
|
|
||||||
|
/* figure out fragement size */
|
||||||
|
if (bcol_component->max_frag_size != FRAG_SIZE_NO_LIMIT ){
|
||||||
|
/* user buffers need to be limited in size */
|
||||||
|
limit_size_user_bufs = true;
|
||||||
|
|
||||||
|
if (0 == frag_size) {
|
||||||
/* nothing set yet */
|
/* nothing set yet */
|
||||||
frag_size = bcol_component->max_frag_size;
|
frag_size = bcol_component->max_frag_size;
|
||||||
} else {
|
} else if (frag_size < bcol_component->max_frag_size) {
|
||||||
if(frag_size < bcol_component->max_frag_size) {
|
|
||||||
/* stricter constraint on fragment size */
|
/* stricter constraint on fragment size */
|
||||||
frag_size = bcol_component->max_frag_size;
|
frag_size = bcol_component->max_frag_size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
bcol_cli = (mca_base_component_list_item_t *)opal_list_get_next((opal_list_item_t *)bcol_cli);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!use_user_bufs || limit_size_user_bufs) {
|
||||||
|
/* we need to limit the user buffer size or use library buffers */
|
||||||
ml_module->fragment_size = frag_size;
|
ml_module->fragment_size = frag_size;
|
||||||
} else {
|
} else {
|
||||||
/* entire message may be processed in single chunk */
|
/* entire message may be processed in single chunk */
|
||||||
ml_module->fragment_size = FRAG_SIZE_NO_LIMIT;
|
ml_module->fragment_size = FRAG_SIZE_NO_LIMIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* for non-contigous data - just use the ML buffers */
|
/* for non-contigous data - just use the ML buffers */
|
||||||
ml_module->ml_fragment_size = length_ml_payload;
|
ml_module->ml_fragment_size = length_ml_payload;
|
||||||
|
|
||||||
} else {
|
/* set whether we can use user buffers */
|
||||||
/*
|
ml_module->use_user_buffers = use_user_bufs;
|
||||||
* using library buffers
|
|
||||||
*/
|
|
||||||
ml_module->use_user_buffers = 0;
|
|
||||||
|
|
||||||
/* figure out buffer size */
|
|
||||||
ml_module->fragment_size = length_ml_payload;
|
|
||||||
/* see if this is too large */
|
|
||||||
bcol_cli = (mca_base_component_list_item_t *) opal_list_get_first(&mca_bcol_base_components_in_use);
|
|
||||||
for (i = 0; i < n_hierarchies; i++) {
|
|
||||||
/* check to see if this bcol is being used */
|
|
||||||
if(!bcols_in_use_all_ranks[i]) {
|
|
||||||
/* not in use */
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
bcol_component = (mca_bcol_base_component_2_0_0_t *) bcol_cli->cli_component;
|
|
||||||
bcol_cli = (mca_base_component_list_item_t *) opal_list_get_next((opal_list_item_t *) bcol_cli);
|
|
||||||
if (FRAG_SIZE_NO_LIMIT == bcol_component->max_frag_size) {
|
|
||||||
/* no limit - will not affect fragement size */
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (bcol_component->max_frag_size < (int)ml_module->fragment_size)
|
|
||||||
{
|
|
||||||
/* frag size set too large */
|
|
||||||
ml_module->fragment_size = bcol_component->max_frag_size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* for non-contigous data - just use the ML buffers */
|
|
||||||
ml_module->ml_fragment_size = ml_module->fragment_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
ML_VERBOSE(10, ("Seting payload size to %d %d [%d %d]",
|
ML_VERBOSE(10, ("Seting payload size to %d %d [%d %d]",
|
||||||
ml_module->ml_fragment_size, length_ml_payload,
|
ml_module->ml_fragment_size, length_ml_payload,
|
||||||
@ -1638,7 +1582,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
|||||||
const mca_sbgp_base_component_2_0_0_t *sbgp_component = NULL;
|
const mca_sbgp_base_component_2_0_0_t *sbgp_component = NULL;
|
||||||
|
|
||||||
|
|
||||||
int i_hier = 0, n_hier = 0, ll_p1,
|
int i_hier = 0, n_hier = 0, ll_p1, bcol_index = 0,
|
||||||
n_procs_in = 0, group_index = 0, n_remain = 0,
|
n_procs_in = 0, group_index = 0, n_remain = 0,
|
||||||
i, j, ret = OMPI_SUCCESS, my_rank_in_list = 0,
|
i, j, ret = OMPI_SUCCESS, my_rank_in_list = 0,
|
||||||
n_procs_selected = 0, original_group_size = 0, i_am_done = 0,
|
n_procs_selected = 0, original_group_size = 0, i_am_done = 0,
|
||||||
@ -1992,7 +1936,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
|||||||
pair->bcol_component = (mca_bcol_base_component_t *)
|
pair->bcol_component = (mca_bcol_base_component_t *)
|
||||||
((mca_base_component_list_item_t *) bcol_cli)->cli_component;
|
((mca_base_component_list_item_t *) bcol_cli)->cli_component;
|
||||||
|
|
||||||
pair->bcol_index = i_hier;
|
pair->bcol_index = bcol_index;
|
||||||
|
|
||||||
/* create bcol modules */
|
/* create bcol modules */
|
||||||
ML_VERBOSE(10, ("Create bcol modules."));
|
ML_VERBOSE(10, ("Create bcol modules."));
|
||||||
@ -2026,7 +1970,7 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
|||||||
module->group_size;
|
module->group_size;
|
||||||
|
|
||||||
/* set the bcol id */
|
/* set the bcol id */
|
||||||
pair->bcol_modules[i]->bcol_id = (int16_t) i_hier;
|
pair->bcol_modules[i]->bcol_id = (int16_t) bcol_index;
|
||||||
|
|
||||||
/* Set bcol mode bits */
|
/* Set bcol mode bits */
|
||||||
topo->all_bcols_mode &= (( mca_bcol_base_module_t *) pair->bcol_modules[i])->supported_mode;
|
topo->all_bcols_mode &= (( mca_bcol_base_module_t *) pair->bcol_modules[i])->supported_mode;
|
||||||
@ -2041,10 +1985,10 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
|||||||
n_hier++;
|
n_hier++;
|
||||||
|
|
||||||
if (-1 == my_lowest_group_index) {
|
if (-1 == my_lowest_group_index) {
|
||||||
my_lowest_group_index = i_hier;
|
my_lowest_group_index = bcol_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
my_highest_group_index = i_hier;
|
my_highest_group_index = bcol_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if n_remain is 1, and the communicator size is not 1, and module
|
/* if n_remain is 1, and the communicator size is not 1, and module
|
||||||
@ -2077,6 +2021,8 @@ static int mca_coll_ml_tree_hierarchy_discovery(mca_coll_ml_module_t *ml_module,
|
|||||||
i_hier++;
|
i_hier++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
++bcol_index;
|
||||||
|
|
||||||
n_procs_in = n_remain;
|
n_procs_in = n_remain;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user