1
1

COLL TUNED: Use per-rank data size instead of total size for decision

The total size depends on number of ranks so the usual ranges don't work.
Thus, use the average across all ranks to make a decision.

Signed-off-by: Joseph Schuchart <schuchart@icl.utk.edu>
(cherry picked from commit f670364d764bf7409e03860bf539a0a2884ffab3)
Этот коммит содержится в:
Joseph Schuchart 2020-11-13 12:18:42 +01:00
родитель c614c54818
Коммит b299b491d3
2 изменённых файлов: 34 добавлений и 39 удалений

Просмотреть файл

@ -528,15 +528,17 @@ int ompi_coll_tuned_allgatherv_intra_dec_dynamic(const void *sbuf, int scount,
- calculate message size and other necessary information */ - calculate message size and other necessary information */
int comsize, i; int comsize, i;
int alg, faninout, segsize, ignoreme; int alg, faninout, segsize, ignoreme;
size_t dsize, total_size; size_t dsize, total_size, per_rank_size;
comsize = ompi_comm_size(comm); comsize = ompi_comm_size(comm);
ompi_datatype_type_size (sdtype, &dsize); ompi_datatype_type_size (sdtype, &dsize);
total_size = 0; total_size = 0;
for (i = 0; i < comsize; i++) { total_size += dsize * rcounts[i]; } for (i = 0; i < comsize; i++) { total_size += dsize * rcounts[i]; }
per_rank_size = total_size / comsize;
alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[ALLGATHERV], alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[ALLGATHERV],
total_size, &faninout, &segsize, &ignoreme); per_rank_size, &faninout, &segsize, &ignoreme);
if (alg) { if (alg) {
/* we have found a valid choice from the file based rules for /* we have found a valid choice from the file based rules for
this message size */ this message size */

Просмотреть файл

@ -1245,7 +1245,7 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(const void *sbuf, int scount,
mca_coll_base_module_t *module) mca_coll_base_module_t *module)
{ {
int communicator_size, alg, i; int communicator_size, alg, i;
size_t dsize, total_dsize; size_t dsize, total_dsize, per_rank_dsize;
communicator_size = ompi_comm_size(comm); communicator_size = ompi_comm_size(comm);
@ -1258,6 +1258,9 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(const void *sbuf, int scount,
total_dsize = 0; total_dsize = 0;
for (i = 0; i < communicator_size; i++) { total_dsize += dsize * rcounts[i]; } for (i = 0; i < communicator_size; i++) { total_dsize += dsize * rcounts[i]; }
/* use the per-rank data size as basis, similar to allgather */
per_rank_dsize = total_dsize / communicator_size;
/** Algorithms: /** Algorithms:
* {1, "default"}, * {1, "default"},
* {2, "bruck"}, * {2, "bruck"},
@ -1266,97 +1269,87 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(const void *sbuf, int scount,
* {5, "two_proc"}, * {5, "two_proc"},
*/ */
if (communicator_size == 2) { if (communicator_size == 2) {
if (total_dsize < 2048) { if (per_rank_dsize < 2048) {
alg = 3; alg = 3;
} else if (total_dsize < 4096) { } else if (per_rank_dsize < 4096) {
alg = 5; alg = 5;
} else if (total_dsize < 8192) { } else if (per_rank_dsize < 8192) {
alg = 3; alg = 3;
} else { } else {
alg = 5; alg = 5;
} }
} else if (communicator_size < 8) { } else if (communicator_size < 8) {
if (total_dsize < 256) { if (per_rank_dsize < 256) {
alg = 1; alg = 1;
} else if (total_dsize < 4096) { } else if (per_rank_dsize < 4096) {
alg = 4; alg = 4;
} else if (total_dsize < 8192) { } else if (per_rank_dsize < 8192) {
alg = 3; alg = 3;
} else if (total_dsize < 16384) { } else if (per_rank_dsize < 16384) {
alg = 4; alg = 4;
} else if (total_dsize < 262144) { } else if (per_rank_dsize < 262144) {
alg = 2; alg = 2;
} else { } else {
alg = 4; alg = 4;
} }
} else if (communicator_size < 16) { } else if (communicator_size < 16) {
if (total_dsize < 1024) { if (per_rank_dsize < 1024) {
alg = 1; alg = 1;
} else { } else {
alg = 2; alg = 2;
} }
} else if (communicator_size < 32) { } else if (communicator_size < 32) {
if (total_dsize < 32) { if (per_rank_dsize < 128) {
alg = 1; alg = 1;
} else { } else if (per_rank_dsize < 262144) {
alg = 2; alg = 2;
} else {
alg = 3;
} }
} else if (communicator_size < 64) { } else if (communicator_size < 64) {
if (total_dsize < 1024) { if (per_rank_dsize < 256) {
alg = 1;
} else if (per_rank_dsize < 8192) {
alg = 2; alg = 2;
} else if (total_dsize < 2048) {
alg = 4;
} else if (total_dsize < 8192) {
alg = 3;
} else if (total_dsize < 16384) {
alg = 4;
} else if (total_dsize < 32768) {
alg = 3;
} else if (total_dsize < 65536) {
alg = 4;
} else { } else {
alg = 3; alg = 3;
} }
} else if (communicator_size < 128) { } else if (communicator_size < 128) {
if (total_dsize < 16) { if (per_rank_dsize < 256) {
alg = 1; alg = 1;
} else if (total_dsize < 1024) { } else if (per_rank_dsize < 4096) {
alg = 2; alg = 2;
} else if (total_dsize < 65536) {
alg = 4;
} else { } else {
alg = 3; alg = 3;
} }
} else if (communicator_size < 256) { } else if (communicator_size < 256) {
if (total_dsize < 1024) { if (per_rank_dsize < 1024) {
alg = 2; alg = 2;
} else if (total_dsize < 65536) { } else if (per_rank_dsize < 65536) {
alg = 4; alg = 4;
} else { } else {
alg = 3; alg = 3;
} }
} else if (communicator_size < 512) { } else if (communicator_size < 512) {
if (total_dsize < 1024) { if (per_rank_dsize < 1024) {
alg = 2; alg = 2;
} else if (total_dsize < 131072) {
alg = 4;
} else { } else {
alg = 3; alg = 3;
} }
} else if (communicator_size < 1024) { } else if (communicator_size < 1024) {
if (total_dsize < 512) { if (per_rank_dsize < 512) {
alg = 2; alg = 2;
} else if (total_dsize < 1024) { } else if (per_rank_dsize < 1024) {
alg = 1; alg = 1;
} else if (total_dsize < 4096) { } else if (per_rank_dsize < 4096) {
alg = 2; alg = 2;
} else if (total_dsize < 1048576) { } else if (per_rank_dsize < 1048576) {
alg = 4; alg = 4;
} else { } else {
alg = 3; alg = 3;
} }
} else { } else {
if (total_dsize < 4096) { if (per_rank_dsize < 4096) {
alg = 2; alg = 2;
} else { } else {
alg = 4; alg = 4;