COLL/TUNED: Add linear scatter using isend for mlnx platform
Signed-off-by: Mikhail Brinskii <mikhailb@mellanox.com> (cherry picked from commit f2cbd4806e9a38b5e58c0fc69b41624af79fb99b) Signed-off-by: Brian Barrett <bbarrett@amazon.com>
Этот коммит содержится в:
родитель
221fad6862
Коммит
7eb94164a0
@ -10,6 +10,7 @@
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2019 Mellanox Technologies. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -84,4 +85,8 @@ bml_r2_show_unreach_errors = 0
|
||||
coll_tuned_alltoall_large_msg = 250000
|
||||
coll_tuned_alltoall_min_procs = 2048
|
||||
coll_tuned_alltoall_algorithm_max_requests = 8
|
||||
coll_tuned_scatter_intermediate_msg = 8192
|
||||
coll_tuned_scatter_large_msg = 250000
|
||||
coll_tuned_scatter_min_procs = 1048510
|
||||
coll_tuned_scatter_algorithm_max_requests = 64
|
||||
|
||||
|
@ -18,6 +18,7 @@
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2017 FUJITSU LIMITED. All rights reserved.
|
||||
* Copyright (c) 2019 Mellanox Technologies. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -291,6 +292,7 @@ int ompi_coll_base_scan_intra_recursivedoubling(SCAN_ARGS);
|
||||
/* Scatter */
|
||||
int ompi_coll_base_scatter_intra_basic_linear(SCATTER_ARGS);
|
||||
int ompi_coll_base_scatter_intra_binomial(SCATTER_ARGS);
|
||||
int ompi_coll_base_scatter_intra_linear_nb(SCATTER_ARGS, int max_reqs);
|
||||
|
||||
/* ScatterV */
|
||||
|
||||
|
@ -14,6 +14,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2015-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2019 Mellanox Technologies. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -273,5 +274,114 @@ ompi_coll_base_scatter_intra_basic_linear(const void *sbuf, int scount,
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/* copied function (with appropriate renaming) ends here */
|
||||
|
||||
/*
|
||||
* Use isends for distributing the data with periodic sync by blocking send.
|
||||
* Blocking send acts like a local resources flush, because it ensures
|
||||
* progression until the message is sent/(copied to some sort of transmit buffer).
|
||||
*/
|
||||
int
|
||||
ompi_coll_base_scatter_intra_linear_nb(const void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module,
|
||||
int max_reqs)
|
||||
{
|
||||
int i, rank, size, err, line, nreqs;
|
||||
ptrdiff_t incr;
|
||||
char *ptmp;
|
||||
ompi_request_t **reqs = NULL, **preq;
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
|
||||
/* If not root, receive data. */
|
||||
if (rank != root) {
|
||||
err = MCA_PML_CALL(recv(rbuf, rcount, rdtype, root,
|
||||
MCA_COLL_BASE_TAG_SCATTER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
line = __LINE__; goto err_hndl;
|
||||
}
|
||||
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
if (max_reqs <= 1) {
|
||||
max_reqs = 0;
|
||||
nreqs = size - 1; /* no send for myself */
|
||||
} else {
|
||||
/* We use blocking MPI_Send (which does not need a request)
|
||||
* every max_reqs send operation (which is size/max_reqs at most),
|
||||
* therefore no need to allocate requests for these sends. */
|
||||
nreqs = size - (size / max_reqs);
|
||||
}
|
||||
|
||||
reqs = ompi_coll_base_comm_get_reqs(module->base_data, nreqs);
|
||||
if (NULL == reqs) {
|
||||
err = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
line = __LINE__; goto err_hndl;
|
||||
}
|
||||
|
||||
err = ompi_datatype_type_extent(sdtype, &incr);
|
||||
if (OMPI_SUCCESS != err) {
|
||||
line = __LINE__; goto err_hndl;
|
||||
}
|
||||
incr *= scount;
|
||||
|
||||
/* I am the root, loop sending data. */
|
||||
for (i = 0, ptmp = (char *)sbuf, preq = reqs; i < size; ++i, ptmp += incr) {
|
||||
/* simple optimization */
|
||||
if (i == rank) {
|
||||
if (MPI_IN_PLACE != rbuf) {
|
||||
err = ompi_datatype_sndrcv(ptmp, scount, sdtype, rbuf, rcount,
|
||||
rdtype);
|
||||
}
|
||||
} else {
|
||||
if (!max_reqs || (i % max_reqs)) {
|
||||
err = MCA_PML_CALL(isend(ptmp, scount, sdtype, i,
|
||||
MCA_COLL_BASE_TAG_SCATTER,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
comm, preq++));
|
||||
} else {
|
||||
err = MCA_PML_CALL(send(ptmp, scount, sdtype, i,
|
||||
MCA_COLL_BASE_TAG_SCATTER,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
comm));
|
||||
}
|
||||
}
|
||||
if (MPI_SUCCESS != err) {
|
||||
line = __LINE__; goto err_hndl;
|
||||
}
|
||||
}
|
||||
|
||||
err = ompi_request_wait_all(preq - reqs, reqs, MPI_STATUSES_IGNORE);
|
||||
if (MPI_SUCCESS != err) {
|
||||
line = __LINE__; goto err_hndl;
|
||||
}
|
||||
|
||||
return MPI_SUCCESS;
|
||||
|
||||
err_hndl:
|
||||
if (NULL != reqs) {
|
||||
/* find a real error code */
|
||||
if (MPI_ERR_IN_STATUS == err) {
|
||||
for (i = 0; i < nreqs; i++) {
|
||||
if (MPI_REQUEST_NULL == reqs[i]) continue;
|
||||
if (MPI_ERR_PENDING == reqs[i]->req_status.MPI_ERROR) continue;
|
||||
err = reqs[i]->req_status.MPI_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ompi_coll_base_free_reqs(reqs, nreqs);
|
||||
}
|
||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
||||
"%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, rank));
|
||||
(void)line; /* silence compiler warning */
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2015-2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2019 Mellanox Technologies. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -41,6 +42,10 @@ extern int ompi_coll_tuned_alltoall_intermediate_msg;
|
||||
extern int ompi_coll_tuned_alltoall_large_msg;
|
||||
extern int ompi_coll_tuned_alltoall_min_procs;
|
||||
extern int ompi_coll_tuned_alltoall_max_requests;
|
||||
extern int ompi_coll_tuned_scatter_intermediate_msg;
|
||||
extern int ompi_coll_tuned_scatter_large_msg;
|
||||
extern int ompi_coll_tuned_scatter_min_procs;
|
||||
extern int ompi_coll_tuned_scatter_blocking_send_ratio;
|
||||
|
||||
/* forced algorithm choices */
|
||||
/* this structure is for storing the indexes to the forced algorithm mca params... */
|
||||
|
@ -16,6 +16,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2015-2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2019 Mellanox Technologies. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -64,6 +65,12 @@ int ompi_coll_tuned_alltoall_large_msg = 3000;
|
||||
int ompi_coll_tuned_alltoall_min_procs = 0; /* disable by default */
|
||||
int ompi_coll_tuned_alltoall_max_requests = 0; /* no limit for alltoall by default */
|
||||
|
||||
/* Disable by default */
|
||||
int ompi_coll_tuned_scatter_intermediate_msg = 0;
|
||||
int ompi_coll_tuned_scatter_large_msg = 0;
|
||||
int ompi_coll_tuned_scatter_min_procs = 0;
|
||||
int ompi_coll_tuned_scatter_blocking_send_ratio = 0;
|
||||
|
||||
/* forced alogrithm variables */
|
||||
/* indices for the MCA parameters */
|
||||
coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COLLCOUNT] = {{0}};
|
||||
|
@ -15,6 +15,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2015-2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2019 Mellanox Technologies. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -780,6 +781,7 @@ int ompi_coll_tuned_scatter_intra_dec_fixed(const void *sbuf, int scount,
|
||||
{
|
||||
const size_t small_block_size = 300;
|
||||
const int small_comm_size = 10;
|
||||
const int intermediate_comm_size = 64;
|
||||
int communicator_size, rank;
|
||||
size_t dsize, block_size;
|
||||
|
||||
@ -802,7 +804,16 @@ int ompi_coll_tuned_scatter_intra_dec_fixed(const void *sbuf, int scount,
|
||||
return ompi_coll_base_scatter_intra_binomial(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
root, comm, module);
|
||||
} else if ((communicator_size < ompi_coll_tuned_scatter_min_procs) &&
|
||||
(communicator_size > intermediate_comm_size) &&
|
||||
(block_size >= ompi_coll_tuned_scatter_intermediate_msg) &&
|
||||
(block_size < ompi_coll_tuned_scatter_large_msg)) {
|
||||
return ompi_coll_base_scatter_intra_linear_nb(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
root, comm, module,
|
||||
ompi_coll_tuned_scatter_blocking_send_ratio);
|
||||
}
|
||||
|
||||
return ompi_coll_base_scatter_intra_basic_linear(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
root, comm, module);
|
||||
|
@ -5,6 +5,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2019 Mellanox Technologies. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -36,6 +37,7 @@ static mca_base_var_enum_value_t scatter_algorithms[] = {
|
||||
{0, "ignore"},
|
||||
{1, "basic_linear"},
|
||||
{2, "binomial"},
|
||||
{3, "linear_nb"},
|
||||
{0, NULL}
|
||||
};
|
||||
|
||||
@ -74,7 +76,7 @@ ompi_coll_tuned_scatter_intra_check_forced_init(coll_tuned_force_algorithm_mca_p
|
||||
mca_param_indices->algorithm_param_index =
|
||||
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
|
||||
"scatter_algorithm",
|
||||
"Which scatter algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 binomial.",
|
||||
"Which scatter algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 binomial, 3 non-blocking linear.",
|
||||
MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_ALL,
|
||||
@ -114,6 +116,38 @@ ompi_coll_tuned_scatter_intra_check_forced_init(coll_tuned_force_algorithm_mca_p
|
||||
MCA_BASE_VAR_SCOPE_ALL,
|
||||
&coll_tuned_scatter_chain_fanout);
|
||||
|
||||
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
|
||||
"scatter_min_procs",
|
||||
"use basic linear algorithm for communicators larger than this value",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_6,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ompi_coll_tuned_scatter_min_procs);
|
||||
|
||||
(void)mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
|
||||
"scatter_algorithm_max_requests",
|
||||
"Issue a blocking send every this many non-blocking requests. Only has meaning for non-blocking linear algorithm.",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_ALL,
|
||||
&ompi_coll_tuned_scatter_blocking_send_ratio);
|
||||
|
||||
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
|
||||
"scatter_intermediate_msg",
|
||||
"use non-blocking linear algorithm for messages larger than this value",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_6,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ompi_coll_tuned_scatter_intermediate_msg);
|
||||
|
||||
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
|
||||
"scatter_large_msg",
|
||||
"use linear algorithm for messages larger than this value",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_6,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ompi_coll_tuned_scatter_large_msg);
|
||||
|
||||
return (MPI_SUCCESS);
|
||||
}
|
||||
|
||||
@ -144,6 +178,11 @@ ompi_coll_tuned_scatter_intra_do_this(const void *sbuf, int scount,
|
||||
return ompi_coll_base_scatter_intra_binomial(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
root, comm, module);
|
||||
case (3):
|
||||
return ompi_coll_base_scatter_intra_linear_nb(sbuf, scount, sdtype,
|
||||
rbuf, rcount, rdtype,
|
||||
root, comm, module,
|
||||
ompi_coll_tuned_scatter_blocking_send_ratio);
|
||||
} /* switch */
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream,
|
||||
"coll:tuned:scatter_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user