1
1
openmpi/ompi/mca/bcol/ptpcoll/bcol_ptpcoll_barrier.c

934 строки
31 KiB
C
Исходник Обычный вид История

/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
* Copyright (c) 2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/include/ompi/constants.h"
#include "bcol_ptpcoll.h"
#include "bcol_ptpcoll_utils.h"
/*
* Fanin routines - no user data
*/
/********************************************* New Barrier *********************************************/
/*******************************************************************************************************/
/*******************************************************************************************************/
/*************************************** K-nominal ***************************************/
/*****************************************************************************************/
static int bcol_ptpcoll_barrier_recurs_knomial_new(
bcol_function_args_t *input_args,
struct mca_bcol_base_function_t *const_args)
{
/* local variable */
uint64_t sequence_number;
mca_bcol_ptpcoll_module_t *ptpcoll_module =
(mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
netpatterns_k_exchange_node_t *my_exchange_node =
&ptpcoll_module->knomial_exchange_tree;
int rc, k, pair_comm_rank, exchange, completed,
tree_order = my_exchange_node->tree_order, tag,
n_extra_sources = my_exchange_node->n_extra_sources,
n_exchange = my_exchange_node->n_exchanges, num_reqs;
ompi_communicator_t *comm =
ptpcoll_module->super.sbgp_partner_module->group_comm;
int *extra_sources_array = NULL,
**rank_exchanges = my_exchange_node->rank_exchanges;
ompi_request_t **requests;
opal_free_list_item_t *item;
mca_bcol_ptpcoll_collreq_t *collreq;
item = opal_free_list_wait (&ptpcoll_module->collreqs_free);
if (OPAL_UNLIKELY(NULL == item)) {
PTPCOLL_ERROR(("Free list waiting failed."));
return OMPI_ERR_OUT_OF_RESOURCE;
}
collreq = (mca_bcol_ptpcoll_collreq_t *) item;
input_args->bcol_opaque_data = (void *) collreq;
requests = collreq->requests;
/* TAG Calculation */
sequence_number = input_args->sequence_num;
/* Keep tag within the limit supportd by the pml */
tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
/* Mark this as a collective tag, to avoid conflict with user-level flags */
tag = -tag;
if (0 < n_extra_sources) { /* EXCHANGE_NODE case */
collreq->need_toserv_extra = 1;
extra_sources_array = my_exchange_node->rank_extra_sources_array;
/* I will participate in the exchange (of the algorithm) -
* wait for signal from extra process */
for (k = 0; k < n_extra_sources; ++k) {
pair_comm_rank =
ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[k]];
rc = MCA_PML_CALL(irecv(
NULL, 0, MPI_INT,
pair_comm_rank, tag,
comm, &(requests[k])));
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("IRecv failed."));
return rc;
}
}
num_reqs = n_extra_sources;
/* Test for completion */
completed =
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("Test for all failed."));
return rc;
}
if (!completed) {
collreq->tag = tag;
collreq->num_reqs = num_reqs;
collreq->exchange = 0;
return BCOL_FN_STARTED;
}
} else {
collreq->need_toserv_extra = 0;
}
/* loop over exchange send/recv pairs */
for (exchange = 0; exchange < n_exchange; ++exchange) {
for (k = 0; k < tree_order - 1; ++k) {
/* rank of exchange partner within the group */
pair_comm_rank =
ptpcoll_module->super.sbgp_partner_module->group_list[rank_exchanges[exchange][k]];
assert(2 * ptpcoll_module->k_nomial_radix > (k * 2 + 1));
/* send to partner - we will wait for completion, as send
* completion is at the MPI level, and will not
* incur network level completion costs
*/
rc = MCA_PML_CALL(isend(
NULL, 0, MPI_INT,
pair_comm_rank, tag,
MCA_PML_BASE_SEND_STANDARD,
comm, &(requests[k * 2 + 1])));
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("ISend failed."));
return rc;
}
PTPCOLL_VERBOSE(10, ("Ex %d, K %d send to %d[%d]", exchange, k,
pair_comm_rank, rank_exchanges[exchange][k]));
/* recive from partner */
rc = MCA_PML_CALL(irecv(
NULL, 0, MPI_INT,
pair_comm_rank, tag,
comm, &(requests[k * 2])));
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("IRecv failed."));
return rc;
}
PTPCOLL_VERBOSE(10, ("Ex %d, K %d irecv from %d[%d]", exchange, k,
pair_comm_rank, rank_exchanges[exchange][k]));
}
num_reqs = 2 * (tree_order - 1);
/* Test for completion */
completed =
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("Test for all failed."));
return rc;
}
if (!completed) {
collreq->tag = tag;
collreq->num_reqs = num_reqs;
collreq->exchange = exchange + 1;
return BCOL_FN_STARTED;
}
}
/* If non power of 2, may need to send message to "extra" proc */
if (0 < n_extra_sources) { /* EXCHANGE_NODE case */
for (k = 0; k < n_extra_sources; ++k) {
pair_comm_rank =
ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[k]];
rc = MCA_PML_CALL(isend(
NULL, 0, MPI_INT,
pair_comm_rank, tag,
MCA_PML_BASE_SEND_STANDARD,
comm, &(requests[k])));
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("ISend failed."));
return rc;
}
}
num_reqs = n_extra_sources;
/* Test for completion */
completed =
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("Test for all failed."));
return rc;
}
if (!completed) {
collreq->tag = tag;
collreq->num_reqs = num_reqs;
collreq->exchange = n_exchange;
collreq->need_toserv_extra = 0;
return BCOL_FN_STARTED;
}
}
opal_free_list_return (&ptpcoll_module->collreqs_free, (opal_free_list_item_t *) collreq);
return BCOL_FN_COMPLETE;
}
static int bcol_ptpcoll_barrier_recurs_knomial_new_progress(
bcol_function_args_t *input_args,
struct mca_bcol_base_function_t *const_args)
{
/* local variable */
mca_bcol_ptpcoll_module_t *ptpcoll_module =
(mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
netpatterns_k_exchange_node_t *my_exchange_node =
&ptpcoll_module->knomial_exchange_tree;
int rc, k, tag, pair_comm_rank, exchange,
tree_order = my_exchange_node->tree_order, num_reqs,
n_exchange = my_exchange_node->n_exchanges, completed,
n_extra_sources = my_exchange_node->n_extra_sources;
ompi_communicator_t *comm =
ptpcoll_module->super.sbgp_partner_module->group_comm;
int *extra_sources_array,
**rank_exchanges = my_exchange_node->rank_exchanges;
mca_bcol_ptpcoll_collreq_t *collreq =
(mca_bcol_ptpcoll_collreq_t *) input_args->bcol_opaque_data;
ompi_request_t **requests = collreq->requests;
num_reqs = collreq->num_reqs;
/* Test for completion */
completed =
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("Test for all failed."));
return rc;
}
if (!completed) {
return BCOL_FN_STARTED;
}
/* Continue loop over exchange send/recv pairs */
tag = collreq->tag;
for (exchange = collreq->exchange; exchange < n_exchange; ++exchange) {
for (k = 0; k < tree_order - 1; ++k) {
/* rank of exchange partner within the group */
pair_comm_rank =
ptpcoll_module->super.sbgp_partner_module->group_list[rank_exchanges[exchange][k]];
assert(2 * ptpcoll_module->k_nomial_radix > (k * 2 + 1));
/* send to partner - we will wait for completion, as send
* completion is at the MPI level, and will not
* incur network level completion costs
*/
rc = MCA_PML_CALL(isend(
NULL, 0, MPI_INT,
pair_comm_rank, tag,
MCA_PML_BASE_SEND_STANDARD,
comm, &(requests[k * 2 + 1])));
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("ISend failed."));
return rc;
}
PTPCOLL_VERBOSE(10, ("Ex %d, K %d send to %d[%d]", exchange, k,
pair_comm_rank, rank_exchanges[exchange][k]));
/* recive from partner */
rc = MCA_PML_CALL(irecv(
NULL, 0, MPI_INT,
pair_comm_rank, tag,
comm, &(requests[k * 2])));
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("IRecv failed."));
return rc;
}
PTPCOLL_VERBOSE(10, ("Ex %d, K %d irecv from %d[%d]", exchange, k,
pair_comm_rank, rank_exchanges[exchange][k]));
}
num_reqs = 2 * (tree_order - 1);
/* Test for completion */
completed =
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("Test for all failed."));
return rc;
}
if (!completed) {
collreq->num_reqs = num_reqs;
collreq->exchange = exchange + 1;
return BCOL_FN_STARTED;
}
}
/* If non power of 2, may need to send message to "extra" proc */
if (collreq->need_toserv_extra) { /* EXCHANGE_NODE case */
extra_sources_array = my_exchange_node->rank_extra_sources_array;
for (k = 0; k < n_extra_sources; ++k) {
pair_comm_rank =
ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[k]];
rc = MCA_PML_CALL(isend(
NULL, 0, MPI_INT,
pair_comm_rank, tag,
MCA_PML_BASE_SEND_STANDARD,
comm, &(requests[k])));
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("ISend failed."));
return rc;
}
}
num_reqs = n_extra_sources;
/* Test for completion */
completed =
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("Test for all failed."));
return rc;
}
if (!completed) {
collreq->num_reqs = num_reqs;
collreq->exchange = n_exchange;
collreq->need_toserv_extra = 0;
return BCOL_FN_STARTED;
}
}
return BCOL_FN_COMPLETE;
}
/****************************************** Extra node Barrier ******************************************/
static int bcol_ptpcoll_barrier_recurs_knomial_extra_new(
bcol_function_args_t *input_args,
struct mca_bcol_base_function_t *const_args)
{
/* local variable */
uint64_t sequence_number;
int rc, tag, pair_comm_rank,
completed, num_reqs = 2;
mca_bcol_ptpcoll_module_t *ptpcoll_module =
(mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
netpatterns_k_exchange_node_t *my_exchange_node =
&ptpcoll_module->knomial_exchange_tree;
ompi_communicator_t *comm =
ptpcoll_module->super.sbgp_partner_module->group_comm;
int *extra_sources_array = my_exchange_node->rank_extra_sources_array;
ompi_request_t **requests;
opal_free_list_item_t *item;
mca_bcol_ptpcoll_collreq_t *collreq;
item = opal_free_list_wait (&ptpcoll_module->collreqs_free);
if (OPAL_UNLIKELY(NULL == item)) {
PTPCOLL_ERROR(("Free list waiting failed."));
return OMPI_ERR_OUT_OF_RESOURCE;
}
collreq = (mca_bcol_ptpcoll_collreq_t *) item;
input_args->bcol_opaque_data = (void *) collreq;
requests = collreq->requests;
/* TAG Calculation */
sequence_number = input_args->sequence_num;
/* Keep tag within the limit supportd by the pml */
tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptpcoll_module->tag_mask);
/* Mark this as a collective tag, to avoid conflict with user-level flags */
tag = -tag;
pair_comm_rank =
ptpcoll_module->super.sbgp_partner_module->group_list[extra_sources_array[0]];
rc = MCA_PML_CALL(isend(
NULL, 0, MPI_INT,
pair_comm_rank, tag,
MCA_PML_BASE_SEND_STANDARD,
comm, &(requests[0])));
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("ISend failed."));
return rc;
}
rc = MCA_PML_CALL(irecv(
NULL, 0, MPI_INT,
pair_comm_rank, tag,
comm, &(requests[1])));
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("IRecv failed."));
return rc;
}
/* Test for completion */
completed =
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("Test for all failed."));
return rc;
}
if (!completed) {
return BCOL_FN_STARTED;
}
opal_free_list_return (&ptpcoll_module->collreqs_free, (opal_free_list_item_t *) collreq);
return BCOL_FN_COMPLETE;
}
/*************************************** Recursive-Doubling ***************************************/
/**************************************************************************************************/
static int bcol_ptpcoll_barrier_recurs_dbl_new(
bcol_function_args_t *input_args,
struct mca_bcol_base_function_t *const_args)
{
/* local variable */
uint64_t sequence_number;
mca_bcol_ptpcoll_module_t *ptp_module =
(mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
ompi_communicator_t *comm = ptp_module->super.sbgp_partner_module->group_comm;
int rc, my_extra_partner_comm_rank = 0, exchange, completed,
pair_comm_rank, pair_rank, delta, tag, num_reqs = 0,
my_rank = ptp_module->super.sbgp_partner_module->my_index,
n_exchange = ptp_module->super.sbgp_partner_module->n_levels_pow2;
ompi_request_t **requests;
opal_free_list_item_t *item;
mca_bcol_ptpcoll_collreq_t *collreq;
item = opal_free_list_wait (&ptp_module->collreqs_free);
if (OPAL_UNLIKELY(NULL == item)) {
PTPCOLL_ERROR(("Free list waiting failed."));
return OMPI_ERR_OUT_OF_RESOURCE;
}
collreq = (mca_bcol_ptpcoll_collreq_t *) item;
input_args->bcol_opaque_data = (void *) collreq;
assert(PTPCOLL_EXTRA != ptp_module->pow_2type);
requests = collreq->requests;
/* TAG Calculation */
sequence_number = input_args->sequence_num;
/* keep tag within the limit supportd by the pml */
tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptp_module->tag_mask);
/* mark this as a collective tag, to avoid conflict with user-level flags */
tag = -tag;
if (PTPCOLL_PROXY == ptp_module->pow_2type) {
/* I will participate in the exchange - wait for signal from extra
** process */
/*
* recv from extra rank - my_extra_partner_comm_rank
* can use blocking recv, as no other communications
* need to take place.
*/
my_extra_partner_comm_rank =
ptp_module->super.sbgp_partner_module->group_list[ptp_module->proxy_extra_index];
collreq->need_toserv_extra = 1;
collreq->extra_partner_rank = my_extra_partner_comm_rank;
rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT,
my_extra_partner_comm_rank, tag, comm,
&(requests[0])));
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("IRecv failed."));
return rc;
}
completed = mca_bcol_ptpcoll_test_for_match(&requests[0], &rc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("Test for irecv failed."));
return rc;
}
if (!completed) {
collreq->tag = tag;
collreq->num_reqs = 1;
collreq->exchange = 0;
return BCOL_FN_STARTED;
}
} else {
collreq->need_toserv_extra = 0;
}
/* Loop over exchange send/recv pairs */
delta = 1;
for (exchange = 0; exchange < n_exchange; ++exchange) {
/* rank of exchange partner within the group */
pair_rank = my_rank ^ delta;
/* rank within the communicator */
pair_comm_rank =
ptp_module->super.sbgp_partner_module->group_list[pair_rank];
/* send to partner - we will wait for completion, as send
* completion is at the MPI level, and will not
* incur network level completion costs
*/
rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT,
pair_comm_rank, tag,
MCA_PML_BASE_SEND_STANDARD, comm,
&(requests[0])));
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("ISend failed."));
return rc;
}
++num_reqs;
/* recive from partner */
rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT,
pair_comm_rank, tag, comm,
&(requests[1])));
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("IRecv failed."));
return rc;
}
++num_reqs;
PTPCOLL_VERBOSE(5, ("exchange - %d, pair_rank - %d, pair_comm_rank - %d",
exchange, pair_rank, pair_comm_rank));
/* test for completion */
completed =
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("Test for all failed."));
return rc;
}
if (!completed) {
collreq->tag = tag;
collreq->num_reqs = num_reqs;
collreq->exchange = exchange + 1;
assert(collreq->exchange >= 0);
return BCOL_FN_STARTED;
}
delta <<= 1; /* delta *= 2 */
}
if (PTPCOLL_PROXY == ptp_module->pow_2type) {
/* send - let the extra rank know that we are done */
rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT,
my_extra_partner_comm_rank, tag,
MCA_PML_BASE_SEND_STANDARD, comm,
&(requests[0])));
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("ISend failed."));
return rc;
}
completed = mca_bcol_ptpcoll_test_for_match(&requests[0], &rc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("Test for isend failed."));
return rc;
}
if (!completed) {
collreq->tag = tag;
collreq->num_reqs = 1;
collreq->need_toserv_extra = 0;
collreq->exchange = n_exchange;
return BCOL_FN_STARTED;
}
}
opal_free_list_return (&ptp_module->collreqs_free, (opal_free_list_item_t *) collreq);
return BCOL_FN_COMPLETE;
}
static int bcol_ptpcoll_barrier_recurs_dbl_new_progress(
bcol_function_args_t *input_args,
struct mca_bcol_base_function_t *const_args)
{
/* local variable */
mca_bcol_ptpcoll_module_t *ptp_module =
(mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
ompi_communicator_t *comm = ptp_module->super.sbgp_partner_module->group_comm;
int rc, exchange, pair_comm_rank, tag,
pair_rank, delta, num_reqs, completed,
my_rank = ptp_module->super.sbgp_partner_module->my_index,
n_exchange = ptp_module->super.sbgp_partner_module->n_levels_pow2;
ompi_request_t **requests;
mca_bcol_ptpcoll_collreq_t *collreq =
(mca_bcol_ptpcoll_collreq_t *) input_args->bcol_opaque_data;
num_reqs = collreq->num_reqs;
requests = collreq->requests;
/* test for completion */
completed =
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("Test for all failed."));
return rc;
}
if (!completed) {
return BCOL_FN_STARTED;
}
assert(PTPCOLL_EXTRA != ptp_module->pow_2type);
/* Continue loop over exchange send/recv pairs */
num_reqs = 0;
tag = collreq->tag;
exchange = collreq->exchange;
assert(exchange >= 0);
delta = 1 << exchange;
for (; exchange < n_exchange; ++exchange) {
/* rank of exchange partner within the group */
pair_rank = my_rank ^ delta;
/* rank within the communicator */
pair_comm_rank =
ptp_module->super.sbgp_partner_module->group_list[pair_rank];
/* send to partner - we will wait for completion, as send
* completion is at the MPI level, and will not
* incur network level completion costs
*/
rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT,
pair_comm_rank, tag,
MCA_PML_BASE_SEND_STANDARD, comm,
&(requests[0])));
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("ISend failed."));
return rc;
}
++num_reqs;
/* recive from partner */
rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT,
pair_comm_rank, tag, comm,
&(requests[1])));
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("IRecv failed."));
return rc;
}
++num_reqs;
PTPCOLL_VERBOSE(5, ("exchange - %d, pair_rank - %d, pair_comm_rank - %d",
exchange, pair_rank, pair_comm_rank));
/* test for completion */
completed =
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("Test for all failed."));
return rc;
}
if (!completed) {
collreq->num_reqs = num_reqs;
collreq->exchange = exchange + 1;
assert(collreq->exchange >= 0);
return BCOL_FN_STARTED;
}
delta <<= 1; /* delta *= 2 */
}
/* if non power of 2, may need to send message to "extra" proc */
if (collreq->need_toserv_extra) {
/* send - let the extra rank know that we are done */
rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT,
collreq->extra_partner_rank, tag,
MCA_PML_BASE_SEND_STANDARD, comm,
&(requests[0])));
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("ISend failed."));
return rc;
}
completed = mca_bcol_ptpcoll_test_for_match(&requests[0], &rc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("Test for isend failed."));
return rc;
}
if (!completed) {
collreq->num_reqs = 1;
collreq->need_toserv_extra = 0;
collreq->exchange = n_exchange;
return BCOL_FN_STARTED;
}
}
return BCOL_FN_COMPLETE;
}
/****************************************** Extra node Barrier ******************************************/
static int bcol_ptpcoll_barrier_recurs_dbl_extra_new(
bcol_function_args_t *input_args,
struct mca_bcol_base_function_t *const_args)
{
/* local variable */
uint64_t sequence_number;
int rc, completed, num_reqs = 2,
tag, my_extra_partner_comm_rank;
ompi_request_t **requests;
opal_free_list_item_t *item;
mca_bcol_ptpcoll_collreq_t *collreq;
mca_bcol_ptpcoll_module_t *ptp_module =
(mca_bcol_ptpcoll_module_t *) const_args->bcol_module;
ompi_communicator_t *comm = ptp_module->super.sbgp_partner_module->group_comm;
item = opal_free_list_wait (&ptp_module->collreqs_free);
if (OPAL_UNLIKELY(NULL == item)) {
PTPCOLL_ERROR(("Free list waiting failed."));
return OMPI_ERR_OUT_OF_RESOURCE;
}
collreq = (mca_bcol_ptpcoll_collreq_t *) item;
input_args->bcol_opaque_data = (void *) collreq;
requests = collreq->requests;
/* TAG Calculation */
sequence_number = input_args->sequence_num;
/* Keep tag within the limit supportd by the pml */
tag = (PTPCOLL_TAG_OFFSET + sequence_number * PTPCOLL_TAG_FACTOR) & (ptp_module->tag_mask);
/* mark this as a collective tag, to avoid conflict with user-level flags */
tag = -tag;
/* I will not participate in the exchange - so just "register" as here,
* signal the extra rank that I am here */
my_extra_partner_comm_rank =
ptp_module->super.sbgp_partner_module->group_list[ptp_module->proxy_extra_index];
rc = MCA_PML_CALL(isend(NULL, 0, MPI_INT,
my_extra_partner_comm_rank, tag,
MCA_PML_BASE_SEND_STANDARD, comm,
&(requests[0])));
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("Send failed."));
return rc;
}
/* Recv signal that the rest are done - my_extra_partner_comm_rank */
rc = MCA_PML_CALL(irecv(NULL, 0, MPI_INT,
my_extra_partner_comm_rank, tag, comm,
&(requests[1])));
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("IRecv failed."));
return rc;
}
/* Test for completion */
completed =
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("Test for all failed."));
return rc;
}
if (!completed) {
return BCOL_FN_STARTED;
}
opal_free_list_return (&ptp_module->collreqs_free, (opal_free_list_item_t *) collreq);
return BCOL_FN_COMPLETE;
}
/* We have the same progress func for both cases (R-D and K-Nominal) */
static int bcol_ptpcoll_barrier_extra_node_progress(
bcol_function_args_t *input_args,
struct mca_bcol_base_function_t *const_args)
{
/* local variable */
ompi_request_t **requests;
int rc, completed, num_reqs = 2;
mca_bcol_ptpcoll_collreq_t *collreq =
(mca_bcol_ptpcoll_collreq_t *) input_args->bcol_opaque_data;
requests = collreq->requests;
/* test for completion */
completed =
mca_bcol_ptpcoll_test_all_for_match(&num_reqs, requests, &rc);
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
PTPCOLL_ERROR(("Test for all failed."));
return rc;
}
if (!completed) {
return BCOL_FN_STARTED;
}
return BCOL_FN_COMPLETE;
}
static int mca_bcol_ptpcoll_barrier_setup(mca_bcol_base_module_t *super, int bcoll_type)
{
netpatterns_k_exchange_node_t *my_exchange_node;
mca_bcol_ptpcoll_module_t * ptpcoll_module =
(mca_bcol_ptpcoll_module_t *) super;
mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
comm_attribs.bcoll_type = bcoll_type;
comm_attribs.comm_size_min = 0;
comm_attribs.comm_size_max = 1024 * 1024;
comm_attribs.waiting_semantics = NON_BLOCKING;
inv_attribs.bcol_msg_min = 0;
inv_attribs.bcol_msg_max = 20000; /* range 1 */
inv_attribs.datatype_bitmap = 0xffffffff;
inv_attribs.op_types_bitmap = 0xffffffff;
comm_attribs.data_src = DATA_SRC_KNOWN;
switch(mca_bcol_ptpcoll_component.barrier_alg) {
case 1:
if (PTPCOLL_EXTRA == ptpcoll_module->pow_2type) {
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
bcol_ptpcoll_barrier_recurs_dbl_extra_new,
bcol_ptpcoll_barrier_extra_node_progress);
break;
}
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
bcol_ptpcoll_barrier_recurs_dbl_new,
bcol_ptpcoll_barrier_recurs_dbl_new_progress);
break;
case 2:
my_exchange_node = &ptpcoll_module->knomial_exchange_tree;
if (my_exchange_node->n_extra_sources > 0 &&
EXTRA_NODE == my_exchange_node->node_type) {
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
bcol_ptpcoll_barrier_recurs_knomial_extra_new,
bcol_ptpcoll_barrier_extra_node_progress);
break;
}
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
bcol_ptpcoll_barrier_recurs_knomial_new,
bcol_ptpcoll_barrier_recurs_knomial_new_progress);
break;
default:
PTPCOLL_ERROR(("Wrong barrier_alg flag value."));
}
return OMPI_SUCCESS;
}
int mca_bcol_ptpcoll_memsync_init(mca_bcol_base_module_t *super)
{
return mca_bcol_ptpcoll_barrier_setup(super, BCOL_SYNC);
}
int bcol_ptpcoll_barrier_init(mca_bcol_base_module_t *super)
{
return mca_bcol_ptpcoll_barrier_setup(super, BCOL_BARRIER);
}