
value to signal that the operation of retrieving the element from the free list failed. However in this case the returned pointer was set to NULL as well, so the error code was redundant. Moreover, this was a continuous source of warnings when the picky mode is on. The attached parch remove the rc argument from the OMPI_FREE_LIST_GET and OMPI_FREE_LIST_WAIT macros, and change to check if the item is NULL instead of using the return code. This commit was SVN r28722.
932 строки
31 KiB
C
932 строки
31 KiB
C
/*
|
|
* Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
|
|
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
|
|
* Copyright (c) 2013 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
#include "ompi_config.h"
|
|
|
|
#include "ompi/include/ompi/constants.h"
|
|
#include "ompi/mca/coll/ml/coll_ml.h"
|
|
#include "bcol_ptpcoll.h"
|
|
#include "bcol_ptpcoll_utils.h"
|
|
|
|
/*
|
|
* Fanin routines - no user data
|
|
*/
|
|
|
|
/********************************************* New Barrier *********************************************/
|
|
/*******************************************************************************************************/
|
|
/*******************************************************************************************************/
|
|
|
|
/*************************************** K-nominal ***************************************/
|
|
/*****************************************************************************************/
|
|
static int bcol_ptpcoll_barrier_recurs_knomial_new(
|
|
bcol_function_args_t *input_args,
|
|
struct coll_ml_function_t *const_args)
|
|
{
|
|
/* local variable */
|
|
uint64_t sequence_number;
|
|
mca_bcol_ptpcoll_module_t *ptpcoll_module =
|
|
(mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
|
|
|
|
netpatterns_k_exchange_node_t *my_exchange_node =
|
|
&ptpcoll_module->knomial_exchange_tree;
|
|
|
|
int rc, k, pair_comm_rank, exchange, completed,
|
|
tree_order = my_exchange_node->tree_order, tag,
|
|
n_extra_sources = my_exchange_node->n_extra_sources,
|
|
n_exchange = my_exchange_node->n_exchanges, num_reqs;
|
|
|
|
ompi_communicator_t *comm =
|
|
ptpcoll_module->super.sbgp_partner_module->group_comm;
|
|
|
|
int *extra_sources_array = NULL,
|
|
**rank_exchanges = my_exchange_node->rank_exchanges;
|
|
|
|
ompi_request_t **requests;
|
|
ompi_free_list_item_t *item;
|
|
|
|
mca_bcol_ptpcoll_collreq_t *collreq;
|
|
|
|
OMPI_FREE_LIST_WAIT(&ptpcoll_module->collreqs_free, item);
|
|
if (OPAL_UNLIKELY(NULL == item)) {
|
|
PTPCOLL_ERROR(("Free list waiting failed."));
|
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
collreq = (mca_bcol_ptpcoll_collreq_t *) item;
|
|
input_args->bcol_opaque_data = (void *) collreq;
|
|
|
|
requests = collreq->requests;
|
|
|
|
/* TAG Calculation */
|
|
sequence_number = input_args->sequence_num;
|
|
|
|
/* Keep tag within the limit supportd by the pml */
|
|
tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
|
|
|
|
/* Mark this as a collective tag, to avoid conflict with user-level flags */
|
|
tag = -tag;
|
|
|
|
if (0 < n_extra_sources) { /* EXCHANGE_NODE case */
|
|
collreq->need_toserv_extra = 1;
|
|
extra_sources_array = my_exchange_node->rank_extra_sources_array;
|
|
|
|
/* I will participate in the exchange (of the algorithm) -
|
|
* wait for signal from extra process */
|
|
for (k = 0; k < n_extra_sources; ++k) {
|
|
pair_comm_rank =
|
|
ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[k]];
|
|
|
|
rc = MCA_PML_CALL(irecv(
|
|
NULL, 0, MPI_INT,
|
|
pair_comm_rank, tag,
|
|
comm, &(requests[k])));
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("IRecv failed."));
|
|
return rc;
|
|
}
|
|
}
|
|
|
|
num_reqs = n_extra_sources;
|
|
|
|
/* Test for completion */
|
|
completed =
|
|
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("Test for all failed."));
|
|
return rc;
|
|
}
|
|
|
|
if (!completed) {
|
|
collreq->tag = tag;
|
|
collreq->num_reqs = num_reqs;
|
|
collreq->exchange = 0;
|
|
|
|
return BCOL_FN_STARTED;
|
|
}
|
|
} else {
|
|
collreq->need_toserv_extra = 0;
|
|
}
|
|
|
|
/* loop over exchange send/recv pairs */
|
|
for (exchange = 0; exchange < n_exchange; ++exchange) {
|
|
for (k = 0; k < tree_order - 1; ++k) {
|
|
/* rank of exchange partner within the group */
|
|
pair_comm_rank =
|
|
ptpcoll_module->super.sbgp_partner_module->group_list[rank_exchanges[exchange][k]];
|
|
|
|
assert(2 * ptpcoll_module->k_nomial_radix > (k * 2 + 1));
|
|
|
|
/* send to partner - we will wait for completion, as send
|
|
* completion is at the MPI level, and will not
|
|
* incur network level completion costs
|
|
*/
|
|
rc = MCA_PML_CALL(isend(
|
|
NULL, 0, MPI_INT,
|
|
pair_comm_rank, tag,
|
|
MCA_PML_BASE_SEND_STANDARD,
|
|
comm, &(requests[k * 2 + 1])));
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("ISend failed."));
|
|
return rc;
|
|
}
|
|
|
|
PTPCOLL_VERBOSE(10, ("Ex %d, K %d send to %d[%d]", exchange, k,
|
|
pair_comm_rank, rank_exchanges[exchange][k]));
|
|
|
|
/* recive from partner */
|
|
rc = MCA_PML_CALL(irecv(
|
|
NULL, 0, MPI_INT,
|
|
pair_comm_rank, tag,
|
|
comm, &(requests[k * 2])));
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("IRecv failed."));
|
|
return rc;
|
|
}
|
|
|
|
PTPCOLL_VERBOSE(10, ("Ex %d, K %d irecv from %d[%d]", exchange, k,
|
|
pair_comm_rank, rank_exchanges[exchange][k]));
|
|
}
|
|
|
|
num_reqs = 2 * (tree_order - 1);
|
|
|
|
/* Test for completion */
|
|
completed =
|
|
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("Test for all failed."));
|
|
return rc;
|
|
}
|
|
|
|
if (!completed) {
|
|
collreq->tag = tag;
|
|
collreq->num_reqs = num_reqs;
|
|
collreq->exchange = exchange + 1;
|
|
|
|
return BCOL_FN_STARTED;
|
|
}
|
|
}
|
|
|
|
/* If non power of 2, may need to send message to "extra" proc */
|
|
if (0 < n_extra_sources) { /* EXCHANGE_NODE case */
|
|
for (k = 0; k < n_extra_sources; ++k) {
|
|
pair_comm_rank =
|
|
ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[k]];
|
|
|
|
rc = MCA_PML_CALL(isend(
|
|
NULL, 0, MPI_INT,
|
|
pair_comm_rank, tag,
|
|
MCA_PML_BASE_SEND_STANDARD,
|
|
comm, &(requests[k])));
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("ISend failed."));
|
|
return rc;
|
|
}
|
|
}
|
|
|
|
num_reqs = n_extra_sources;
|
|
|
|
/* Test for completion */
|
|
completed =
|
|
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("Test for all failed."));
|
|
return rc;
|
|
}
|
|
|
|
if (!completed) {
|
|
collreq->tag = tag;
|
|
collreq->num_reqs = num_reqs;
|
|
|
|
collreq->exchange = n_exchange;
|
|
collreq->need_toserv_extra = 0;
|
|
|
|
return BCOL_FN_STARTED;
|
|
}
|
|
}
|
|
|
|
OMPI_FREE_LIST_RETURN(&ptpcoll_module->collreqs_free, (ompi_free_list_item_t *) collreq);
|
|
return BCOL_FN_COMPLETE;
|
|
}
|
|
|
|
static int bcol_ptpcoll_barrier_recurs_knomial_new_progress(
|
|
bcol_function_args_t *input_args,
|
|
struct coll_ml_function_t *const_args)
|
|
{
|
|
/* local variable */
|
|
mca_bcol_ptpcoll_module_t *ptpcoll_module =
|
|
(mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
|
|
|
|
netpatterns_k_exchange_node_t *my_exchange_node =
|
|
&ptpcoll_module->knomial_exchange_tree;
|
|
|
|
int rc, k, tag, pair_comm_rank, exchange,
|
|
tree_order = my_exchange_node->tree_order, num_reqs,
|
|
n_exchange = my_exchange_node->n_exchanges, completed,
|
|
n_extra_sources = my_exchange_node->n_extra_sources;
|
|
|
|
ompi_communicator_t *comm =
|
|
ptpcoll_module->super.sbgp_partner_module->group_comm;
|
|
|
|
int *extra_sources_array,
|
|
**rank_exchanges = my_exchange_node->rank_exchanges;
|
|
|
|
mca_bcol_ptpcoll_collreq_t *collreq =
|
|
(mca_bcol_ptpcoll_collreq_t *) input_args->bcol_opaque_data;
|
|
|
|
ompi_request_t **requests = collreq->requests;
|
|
|
|
num_reqs = collreq->num_reqs;
|
|
|
|
/* Test for completion */
|
|
completed =
|
|
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("Test for all failed."));
|
|
return rc;
|
|
}
|
|
|
|
if (!completed) {
|
|
return BCOL_FN_STARTED;
|
|
}
|
|
|
|
/* Continue loop over exchange send/recv pairs */
|
|
tag = collreq->tag;
|
|
|
|
for (exchange = collreq->exchange; exchange < n_exchange; ++exchange) {
|
|
for (k = 0; k < tree_order - 1; ++k) {
|
|
/* rank of exchange partner within the group */
|
|
pair_comm_rank =
|
|
ptpcoll_module->super.sbgp_partner_module->group_list[rank_exchanges[exchange][k]];
|
|
|
|
assert(2 * ptpcoll_module->k_nomial_radix > (k * 2 + 1));
|
|
|
|
/* send to partner - we will wait for completion, as send
|
|
* completion is at the MPI level, and will not
|
|
* incur network level completion costs
|
|
*/
|
|
rc = MCA_PML_CALL(isend(
|
|
NULL, 0, MPI_INT,
|
|
pair_comm_rank, tag,
|
|
MCA_PML_BASE_SEND_STANDARD,
|
|
comm, &(requests[k * 2 + 1])));
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("ISend failed."));
|
|
return rc;
|
|
}
|
|
|
|
PTPCOLL_VERBOSE(10, ("Ex %d, K %d send to %d[%d]", exchange, k,
|
|
pair_comm_rank, rank_exchanges[exchange][k]));
|
|
|
|
/* recive from partner */
|
|
rc = MCA_PML_CALL(irecv(
|
|
NULL, 0, MPI_INT,
|
|
pair_comm_rank, tag,
|
|
comm, &(requests[k * 2])));
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("IRecv failed."));
|
|
return rc;
|
|
}
|
|
|
|
PTPCOLL_VERBOSE(10, ("Ex %d, K %d irecv from %d[%d]", exchange, k,
|
|
pair_comm_rank, rank_exchanges[exchange][k]));
|
|
}
|
|
|
|
num_reqs = 2 * (tree_order - 1);
|
|
|
|
/* Test for completion */
|
|
completed =
|
|
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("Test for all failed."));
|
|
return rc;
|
|
}
|
|
|
|
if (!completed) {
|
|
collreq->num_reqs = num_reqs;
|
|
collreq->exchange = exchange + 1;
|
|
|
|
return BCOL_FN_STARTED;
|
|
}
|
|
}
|
|
|
|
/* If non power of 2, may need to send message to "extra" proc */
|
|
if (collreq->need_toserv_extra) { /* EXCHANGE_NODE case */
|
|
extra_sources_array = my_exchange_node->rank_extra_sources_array;
|
|
|
|
for (k = 0; k < n_extra_sources; ++k) {
|
|
pair_comm_rank =
|
|
ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[k]];
|
|
|
|
rc = MCA_PML_CALL(isend(
|
|
NULL, 0, MPI_INT,
|
|
pair_comm_rank, tag,
|
|
MCA_PML_BASE_SEND_STANDARD,
|
|
comm, &(requests[k])));
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("ISend failed."));
|
|
return rc;
|
|
}
|
|
}
|
|
|
|
num_reqs = n_extra_sources;
|
|
|
|
/* Test for completion */
|
|
completed =
|
|
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("Test for all failed."));
|
|
return rc;
|
|
}
|
|
|
|
if (!completed) {
|
|
collreq->num_reqs = num_reqs;
|
|
collreq->exchange = n_exchange;
|
|
collreq->need_toserv_extra = 0;
|
|
|
|
return BCOL_FN_STARTED;
|
|
}
|
|
}
|
|
|
|
return BCOL_FN_COMPLETE;
|
|
}
|
|
|
|
/****************************************** Extra node Barrier ******************************************/
|
|
|
|
static int bcol_ptpcoll_barrier_recurs_knomial_extra_new(
|
|
bcol_function_args_t *input_args,
|
|
struct coll_ml_function_t *const_args)
|
|
{
|
|
/* local variable */
|
|
uint64_t sequence_number;
|
|
int rc, tag, pair_comm_rank,
|
|
completed, num_reqs = 2;
|
|
|
|
mca_bcol_ptpcoll_module_t *ptpcoll_module =
|
|
(mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
|
|
|
|
netpatterns_k_exchange_node_t *my_exchange_node =
|
|
&ptpcoll_module->knomial_exchange_tree;
|
|
|
|
ompi_communicator_t *comm =
|
|
ptpcoll_module->super.sbgp_partner_module->group_comm;
|
|
|
|
int *extra_sources_array = my_exchange_node->rank_extra_sources_array;
|
|
|
|
ompi_request_t **requests;
|
|
ompi_free_list_item_t *item;
|
|
|
|
mca_bcol_ptpcoll_collreq_t *collreq;
|
|
|
|
OMPI_FREE_LIST_WAIT(&ptpcoll_module->collreqs_free, item);
|
|
if (OPAL_UNLIKELY(NULL == item)) {
|
|
PTPCOLL_ERROR(("Free list waiting failed."));
|
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
collreq = (mca_bcol_ptpcoll_collreq_t *) item;
|
|
input_args->bcol_opaque_data = (void *) collreq;
|
|
|
|
requests = collreq->requests;
|
|
|
|
/* TAG Calculation */
|
|
sequence_number = input_args->sequence_num;
|
|
|
|
/* Keep tag within the limit supportd by the pml */
|
|
tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
|
|
|
|
/* Mark this as a collective tag, to avoid conflict with user-level flags */
|
|
tag = -tag;
|
|
|
|
pair_comm_rank =
|
|
ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[0]];
|
|
|
|
rc = MCA_PML_CALL(isend(
|
|
NULL, 0, MPI_INT,
|
|
pair_comm_rank, tag,
|
|
MCA_PML_BASE_SEND_STANDARD,
|
|
comm, &(requests[0])));
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("ISend failed."));
|
|
return rc;
|
|
}
|
|
|
|
rc = MCA_PML_CALL(irecv(
|
|
NULL, 0, MPI_INT,
|
|
pair_comm_rank, tag,
|
|
comm, &(requests[1])));
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("IRecv failed."));
|
|
return rc;
|
|
}
|
|
|
|
/* Test for completion */
|
|
completed =
|
|
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("Test for all failed."));
|
|
return rc;
|
|
}
|
|
|
|
if (!completed) {
|
|
return BCOL_FN_STARTED;
|
|
}
|
|
|
|
OMPI_FREE_LIST_RETURN(&ptpcoll_module->collreqs_free, (ompi_free_list_item_t *) collreq);
|
|
return BCOL_FN_COMPLETE;
|
|
}
|
|
|
|
/*************************************** Recursive-Doubling ***************************************/
|
|
/**************************************************************************************************/
|
|
|
|
static int bcol_ptpcoll_barrier_recurs_dbl_new(
|
|
bcol_function_args_t *input_args,
|
|
struct coll_ml_function_t *const_args)
|
|
{
|
|
/* local variable */
|
|
uint64_t sequence_number;
|
|
mca_bcol_ptpcoll_module_t *ptp_module =
|
|
(mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
|
|
|
|
ompi_communicator_t *comm = ptp_module->super.sbgp_partner_module->group_comm;
|
|
|
|
int rc, my_extra_partner_comm_rank = 0, exchange, completed,
|
|
pair_comm_rank, pair_rank, delta, tag, num_reqs = 0,
|
|
my_rank = ptp_module->super.sbgp_partner_module->my_index,
|
|
n_exchange = ptp_module->super.sbgp_partner_module->n_levels_pow2;
|
|
|
|
ompi_request_t **requests;
|
|
ompi_free_list_item_t *item;
|
|
|
|
mca_bcol_ptpcoll_collreq_t *collreq;
|
|
|
|
OMPI_FREE_LIST_WAIT(&ptp_module->collreqs_free, item);
|
|
if (OPAL_UNLIKELY(NULL == item)) {
|
|
PTPCOLL_ERROR(("Free list waiting failed."));
|
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
collreq = (mca_bcol_ptpcoll_collreq_t *) item;
|
|
input_args->bcol_opaque_data = (void *) collreq;
|
|
|
|
assert(PTPCOLL_EXTRA != ptp_module->pow_2type);
|
|
|
|
requests = collreq->requests;
|
|
|
|
/* TAG Calculation */
|
|
sequence_number = input_args->sequence_num;
|
|
|
|
/* keep tag within the limit supportd by the pml */
|
|
tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptp_module->tag_mask);
|
|
|
|
/* mark this as a collective tag, to avoid conflict with user-level flags */
|
|
tag = -tag;
|
|
|
|
if (PTPCOLL_PROXY == ptp_module->pow_2type) {
|
|
/* I will participate in the exchange - wait for signal from extra
|
|
** process */
|
|
/*
|
|
* recv from extra rank - my_extra_partner_comm_rank
|
|
* can use blocking recv, as no other communications
|
|
* need to take place.
|
|
*/
|
|
my_extra_partner_comm_rank =
|
|
ptp_module->super.sbgp_partner_module->group_list[ptp_module->proxy_extra_index];
|
|
|
|
collreq->need_toserv_extra = 1;
|
|
collreq->extra_partner_rank = my_extra_partner_comm_rank;
|
|
|
|
rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT,
|
|
my_extra_partner_comm_rank, tag, comm,
|
|
&(requests[0])));
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("IRecv failed."));
|
|
return rc;
|
|
}
|
|
|
|
completed = mca_bcol_ptpcoll_test_for_match(&requests[0], &rc);
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("Test for irecv failed."));
|
|
return rc;
|
|
}
|
|
|
|
if (!completed) {
|
|
collreq->tag = tag;
|
|
collreq->num_reqs = 1;
|
|
collreq->exchange = 0;
|
|
|
|
return BCOL_FN_STARTED;
|
|
}
|
|
} else {
|
|
collreq->need_toserv_extra = 0;
|
|
}
|
|
|
|
/* Loop over exchange send/recv pairs */
|
|
delta = 1;
|
|
for (exchange = 0; exchange < n_exchange; ++exchange) {
|
|
|
|
/* rank of exchange partner within the group */
|
|
pair_rank = my_rank ^ delta;
|
|
|
|
/* rank within the communicator */
|
|
pair_comm_rank =
|
|
ptp_module->super.sbgp_partner_module->group_list[pair_rank];
|
|
|
|
/* send to partner - we will wait for completion, as send
|
|
* completion is at the MPI level, and will not
|
|
* incur network level completion costs
|
|
*/
|
|
rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT,
|
|
pair_comm_rank, tag,
|
|
MCA_PML_BASE_SEND_STANDARD, comm,
|
|
&(requests[0])));
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("ISend failed."));
|
|
return rc;
|
|
}
|
|
|
|
++num_reqs;
|
|
|
|
/* recive from partner */
|
|
rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT,
|
|
pair_comm_rank, tag, comm,
|
|
&(requests[1])));
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("IRecv failed."));
|
|
return rc;
|
|
}
|
|
|
|
++num_reqs;
|
|
|
|
PTPCOLL_VERBOSE(5, ("exchange - %d, pair_rank - %d, pair_comm_rank - %d",
|
|
exchange, pair_rank, pair_comm_rank));
|
|
|
|
/* test for completion */
|
|
completed =
|
|
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("Test for all failed."));
|
|
return rc;
|
|
}
|
|
|
|
if (!completed) {
|
|
collreq->tag = tag;
|
|
collreq->num_reqs = num_reqs;
|
|
|
|
collreq->exchange = exchange + 1;
|
|
assert(collreq->exchange >= 0);
|
|
|
|
return BCOL_FN_STARTED;
|
|
}
|
|
|
|
delta <<= 1; /* delta *= 2 */
|
|
}
|
|
|
|
if (PTPCOLL_PROXY == ptp_module->pow_2type) {
|
|
/* send - let the extra rank know that we are done */
|
|
rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT,
|
|
my_extra_partner_comm_rank, tag,
|
|
MCA_PML_BASE_SEND_STANDARD, comm,
|
|
&(requests[0])));
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("ISend failed."));
|
|
return rc;
|
|
}
|
|
|
|
completed = mca_bcol_ptpcoll_test_for_match(&requests[0], &rc);
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("Test for isend failed."));
|
|
return rc;
|
|
}
|
|
|
|
if (!completed) {
|
|
collreq->tag = tag;
|
|
collreq->num_reqs = 1;
|
|
|
|
collreq->need_toserv_extra = 0;
|
|
collreq->exchange = n_exchange;
|
|
|
|
return BCOL_FN_STARTED;
|
|
}
|
|
}
|
|
|
|
OMPI_FREE_LIST_RETURN(&ptp_module->collreqs_free, (ompi_free_list_item_t *) collreq);
|
|
return BCOL_FN_COMPLETE;
|
|
}
|
|
|
|
static int bcol_ptpcoll_barrier_recurs_dbl_new_progress(
|
|
bcol_function_args_t *input_args,
|
|
struct coll_ml_function_t *const_args)
|
|
{
|
|
/* local variable */
|
|
mca_bcol_ptpcoll_module_t *ptp_module =
|
|
(mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
|
|
|
|
ompi_communicator_t *comm = ptp_module->super.sbgp_partner_module->group_comm;
|
|
|
|
int rc, exchange, pair_comm_rank, tag,
|
|
pair_rank, delta, num_reqs, completed,
|
|
my_rank = ptp_module->super.sbgp_partner_module->my_index,
|
|
n_exchange = ptp_module->super.sbgp_partner_module->n_levels_pow2;
|
|
|
|
ompi_request_t **requests;
|
|
mca_bcol_ptpcoll_collreq_t *collreq =
|
|
(mca_bcol_ptpcoll_collreq_t *) input_args->bcol_opaque_data;
|
|
|
|
num_reqs = collreq->num_reqs;
|
|
requests = collreq->requests;
|
|
|
|
/* test for completion */
|
|
completed =
|
|
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("Test for all failed."));
|
|
return rc;
|
|
}
|
|
|
|
if (!completed) {
|
|
return BCOL_FN_STARTED;
|
|
}
|
|
|
|
assert(PTPCOLL_EXTRA != ptp_module->pow_2type);
|
|
|
|
/* Continue loop over exchange send/recv pairs */
|
|
num_reqs = 0;
|
|
tag = collreq->tag;
|
|
|
|
exchange = collreq->exchange;
|
|
assert(exchange >= 0);
|
|
|
|
delta = 1 << exchange;
|
|
for (; exchange < n_exchange; ++exchange) {
|
|
|
|
/* rank of exchange partner within the group */
|
|
pair_rank = my_rank ^ delta;
|
|
|
|
/* rank within the communicator */
|
|
pair_comm_rank =
|
|
ptp_module->super.sbgp_partner_module->group_list[pair_rank];
|
|
|
|
/* send to partner - we will wait for completion, as send
|
|
* completion is at the MPI level, and will not
|
|
* incur network level completion costs
|
|
*/
|
|
rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT,
|
|
pair_comm_rank, tag,
|
|
MCA_PML_BASE_SEND_STANDARD, comm,
|
|
&(requests[0])));
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("ISend failed."));
|
|
return rc;
|
|
}
|
|
|
|
++num_reqs;
|
|
|
|
/* recive from partner */
|
|
rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT,
|
|
pair_comm_rank, tag, comm,
|
|
&(requests[1])));
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("IRecv failed."));
|
|
return rc;
|
|
}
|
|
|
|
++num_reqs;
|
|
|
|
PTPCOLL_VERBOSE(5, ("exchange - %d, pair_rank - %d, pair_comm_rank - %d",
|
|
exchange, pair_rank, pair_comm_rank));
|
|
|
|
/* test for completion */
|
|
completed =
|
|
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("Test for all failed."));
|
|
return rc;
|
|
}
|
|
|
|
if (!completed) {
|
|
collreq->num_reqs = num_reqs;
|
|
collreq->exchange = exchange + 1;
|
|
assert(collreq->exchange >= 0);
|
|
|
|
return BCOL_FN_STARTED;
|
|
}
|
|
|
|
delta <<= 1; /* delta *= 2 */
|
|
}
|
|
|
|
/* if non power of 2, may need to send message to "extra" proc */
|
|
if (collreq->need_toserv_extra) {
|
|
/* send - let the extra rank know that we are done */
|
|
rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT,
|
|
collreq->extra_partner_rank, tag,
|
|
MCA_PML_BASE_SEND_STANDARD, comm,
|
|
&(requests[0])));
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("ISend failed."));
|
|
return rc;
|
|
}
|
|
|
|
completed = mca_bcol_ptpcoll_test_for_match(&requests[0], &rc);
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("Test for isend failed."));
|
|
return rc;
|
|
}
|
|
|
|
if (!completed) {
|
|
collreq->num_reqs = 1;
|
|
collreq->need_toserv_extra = 0;
|
|
collreq->exchange = n_exchange;
|
|
|
|
return BCOL_FN_STARTED;
|
|
}
|
|
}
|
|
|
|
return BCOL_FN_COMPLETE;
|
|
}
|
|
|
|
/****************************************** Extra node Barrier ******************************************/
|
|
|
|
static int bcol_ptpcoll_barrier_recurs_dbl_extra_new(
|
|
bcol_function_args_t *input_args,
|
|
struct coll_ml_function_t *const_args)
|
|
{
|
|
/* local variable */
|
|
uint64_t sequence_number;
|
|
int rc, completed, num_reqs = 2,
|
|
tag, my_extra_partner_comm_rank;
|
|
|
|
ompi_request_t **requests;
|
|
ompi_free_list_item_t *item;
|
|
|
|
mca_bcol_ptpcoll_collreq_t *collreq;
|
|
|
|
mca_bcol_ptpcoll_module_t *ptp_module =
|
|
(mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
|
|
ompi_communicator_t *comm = ptp_module->super.sbgp_partner_module->group_comm;
|
|
|
|
OMPI_FREE_LIST_WAIT(&ptp_module->collreqs_free, item);
|
|
if (OPAL_UNLIKELY(NULL == item)) {
|
|
PTPCOLL_ERROR(("Free list waiting failed."));
|
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
}
|
|
|
|
collreq = (mca_bcol_ptpcoll_collreq_t *) item;
|
|
input_args->bcol_opaque_data = (void *) collreq;
|
|
|
|
requests = collreq->requests;
|
|
|
|
/* TAG Calculation */
|
|
sequence_number = input_args->sequence_num;
|
|
|
|
/* Keep tag within the limit supportd by the pml */
|
|
tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptp_module->tag_mask);
|
|
|
|
/* mark this as a collective tag, to avoid conflict with user-level flags */
|
|
tag = -tag;
|
|
|
|
/* I will not participate in the exchange - so just "register" as here,
|
|
* signal the extra rank that I am here */
|
|
|
|
my_extra_partner_comm_rank =
|
|
ptp_module->super.sbgp_partner_module->group_list[ptp_module->proxy_extra_index];
|
|
|
|
rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT,
|
|
my_extra_partner_comm_rank, tag,
|
|
MCA_PML_BASE_SEND_STANDARD, comm,
|
|
&(requests[0])));
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("Send failed."));
|
|
return rc;
|
|
}
|
|
|
|
/* Recv signal that the rest are done - my_extra_partner_comm_rank */
|
|
rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT,
|
|
my_extra_partner_comm_rank, tag, comm,
|
|
&(requests[1])));
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("IRecv failed."));
|
|
return rc;
|
|
}
|
|
|
|
/* Test for completion */
|
|
completed =
|
|
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("Test for all failed."));
|
|
return rc;
|
|
}
|
|
|
|
if (!completed) {
|
|
return BCOL_FN_STARTED;
|
|
}
|
|
|
|
OMPI_FREE_LIST_RETURN(&ptp_module->collreqs_free, (ompi_free_list_item_t *) collreq);
|
|
return BCOL_FN_COMPLETE;
|
|
}
|
|
|
|
/* We have the same progress func for both cases (R-D and K-Nominal) */
|
|
static int bcol_ptpcoll_barrier_extra_node_progress(
|
|
bcol_function_args_t *input_args,
|
|
struct coll_ml_function_t *const_args)
|
|
{
|
|
/* local variable */
|
|
ompi_request_t **requests;
|
|
int rc, completed, num_reqs = 2;
|
|
|
|
mca_bcol_ptpcoll_collreq_t *collreq =
|
|
(mca_bcol_ptpcoll_collreq_t *) input_args->bcol_opaque_data;
|
|
|
|
requests = collreq->requests;
|
|
|
|
/* test for completion */
|
|
completed =
|
|
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
|
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
|
PTPCOLL_ERROR(("Test for all failed."));
|
|
return rc;
|
|
}
|
|
|
|
if (!completed) {
|
|
return BCOL_FN_STARTED;
|
|
}
|
|
|
|
return BCOL_FN_COMPLETE;
|
|
}
|
|
|
|
static int mca_bcol_ptpcoll_barrier_setup(mca_bcol_base_module_t *super, int bcoll_type)
|
|
{
|
|
netpatterns_k_exchange_node_t *my_exchange_node;
|
|
mca_bcol_ptpcoll_module_t * ptpcoll_module =
|
|
(mca_bcol_ptpcoll_module_t *) super;
|
|
|
|
mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
|
|
mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
|
|
|
|
comm_attribs.bcoll_type = bcoll_type;
|
|
|
|
comm_attribs.comm_size_min = 0;
|
|
comm_attribs.comm_size_max = 1024 * 1024;
|
|
comm_attribs.waiting_semantics = NON_BLOCKING;
|
|
|
|
inv_attribs.bcol_msg_min = 0;
|
|
inv_attribs.bcol_msg_max = 20000; /* range 1 */
|
|
|
|
inv_attribs.datatype_bitmap = 0xffffffff;
|
|
inv_attribs.op_types_bitmap = 0xffffffff;
|
|
|
|
comm_attribs.data_src = DATA_SRC_KNOWN;
|
|
|
|
switch(mca_bcol_ptpcoll_component.barrier_alg) {
|
|
case 1:
|
|
if (PTPCOLL_EXTRA == ptpcoll_module->pow_2type) {
|
|
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
|
|
bcol_ptpcoll_barrier_recurs_dbl_extra_new,
|
|
bcol_ptpcoll_barrier_extra_node_progress);
|
|
break;
|
|
}
|
|
|
|
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
|
|
bcol_ptpcoll_barrier_recurs_dbl_new,
|
|
bcol_ptpcoll_barrier_recurs_dbl_new_progress);
|
|
break;
|
|
case 2:
|
|
my_exchange_node = &ptpcoll_module->knomial_exchange_tree;
|
|
if (my_exchange_node->n_extra_sources > 0 &&
|
|
EXTRA_NODE == my_exchange_node->node_type) {
|
|
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
|
|
bcol_ptpcoll_barrier_recurs_knomial_extra_new,
|
|
bcol_ptpcoll_barrier_extra_node_progress);
|
|
break;
|
|
}
|
|
|
|
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
|
|
bcol_ptpcoll_barrier_recurs_knomial_new,
|
|
bcol_ptpcoll_barrier_recurs_knomial_new_progress);
|
|
break;
|
|
default:
|
|
PTPCOLL_ERROR(("Wrong barrier_alg flag value."));
|
|
}
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
int mca_bcol_ptpcoll_memsync_init(mca_bcol_base_module_t *super)
|
|
{
|
|
return mca_bcol_ptpcoll_barrier_setup(super, BCOL_SYNC);
|
|
}
|
|
|
|
int bcol_ptpcoll_barrier_init(mca_bcol_base_module_t *super)
|
|
{
|
|
return mca_bcol_ptpcoll_barrier_setup(super, BCOL_BARRIER);
|
|
}
|