3a683419c5
This is hot-fix patch for the issue reported by Ralph. In future we plan to restructure ml data structure layout. Tested by Nathan. cmr=v1.7.5:ticket=trac:4158 This commit was SVN r30619. The following Trac tickets were found above: Ticket 4158 --> https://svn.open-mpi.org/trac/ompi/ticket/4158
717 строки
26 KiB
C
717 строки
26 KiB
C
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
/*
|
|
* Copyright (c) 2009-2013 Oak Ridge National Laboratory. All rights reserved.
|
|
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
|
|
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
|
|
* reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
/**
|
|
* @file
|
|
*
|
|
*/
|
|
|
|
#include "ompi_config.h"
|
|
#include <unistd.h>
|
|
#include <sys/types.h>
|
|
#include <sys/mman.h>
|
|
#include <fcntl.h>
|
|
#include <errno.h>
|
|
|
|
#include "ompi/constants.h"
|
|
#include "ompi/communicator/communicator.h"
|
|
#include "ompi/mca/bcol/bcol.h"
|
|
#include "opal/util/show_help.h"
|
|
#include "ompi/mca/bcol/base/base.h"
|
|
#include "ompi/mca/pml/pml.h" /* need this for the max tag size */
|
|
|
|
#include "bcol_ptpcoll.h"
|
|
#include "bcol_ptpcoll_utils.h"
|
|
#include "bcol_ptpcoll_bcast.h"
|
|
#include "bcol_ptpcoll_allreduce.h"
|
|
#include "bcol_ptpcoll_reduce.h"
|
|
|
|
#define BCOL_PTP_CACHE_LINE_SIZE 128
|
|
|
|
/*
|
|
* Local functions
|
|
*/
|
|
static int alloc_allreduce_offsets_array(mca_bcol_ptpcoll_module_t *ptpcoll_module)
|
|
{
|
|
int rc = OMPI_SUCCESS, i = 0;
|
|
netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree;
|
|
int n_exchanges = k_node->n_exchanges;
|
|
|
|
/* Precalculate the allreduce offsets */
|
|
if (0 < k_node->n_exchanges) {
|
|
ptpcoll_module->allgather_offsets = (int **)malloc(n_exchanges * sizeof(int*));
|
|
|
|
if (!ptpcoll_module->allgather_offsets) {
|
|
rc = OMPI_ERROR;
|
|
return rc;
|
|
}
|
|
|
|
for (i=0; i < n_exchanges ; i++) {
|
|
ptpcoll_module->allgather_offsets[i] = (int *)malloc (sizeof(int) * NOFFSETS);
|
|
|
|
if (!ptpcoll_module->allgather_offsets[i]){
|
|
rc = OMPI_ERROR;
|
|
return rc;
|
|
}
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
static int free_allreduce_offsets_array(mca_bcol_ptpcoll_module_t *ptpcoll_module)
|
|
{
|
|
int rc = OMPI_SUCCESS, i = 0;
|
|
netpatterns_k_exchange_node_t *k_node = &ptpcoll_module->knomial_exchange_tree;
|
|
int n_exchanges = k_node->n_exchanges;
|
|
|
|
if (ptpcoll_module->allgather_offsets) {
|
|
for (i=0; i < n_exchanges; i++) {
|
|
free (ptpcoll_module->allgather_offsets[i]);
|
|
}
|
|
}
|
|
|
|
free(ptpcoll_module->allgather_offsets);
|
|
return rc;
|
|
}
|
|
|
|
static void
|
|
mca_bcol_ptpcoll_module_construct(mca_bcol_ptpcoll_module_t *ptpcoll_module)
|
|
{
|
|
uint64_t i;
|
|
/* Pointer to component */
|
|
ptpcoll_module->super.bcol_component = (mca_bcol_base_component_t *) &mca_bcol_ptpcoll_component;
|
|
ptpcoll_module->super.list_n_connected = NULL;
|
|
ptpcoll_module->super.hier_scather_offset = 0;
|
|
/* no header support in ptp */
|
|
ptpcoll_module->super.header_size = 0;
|
|
/* No network context */
|
|
ptpcoll_module->super.network_context = NULL;
|
|
/* set the upper limit on the tag */
|
|
i = 2;
|
|
ptpcoll_module->tag_mask = 1;
|
|
while ( i <= (uint64_t) mca_pml.pml_max_tag && i > 0) {
|
|
i <<= 1;
|
|
}
|
|
ptpcoll_module->ml_mem.ml_buf_desc = NULL;
|
|
ptpcoll_module->tag_mask = i - 1;
|
|
}
|
|
|
|
static void
|
|
mca_bcol_ptpcoll_module_destruct(mca_bcol_ptpcoll_module_t *ptpcoll_module)
|
|
{
|
|
int i;
|
|
mca_bcol_ptpcoll_local_mlmem_desc_t *ml_mem = &ptpcoll_module->ml_mem;
|
|
|
|
if (NULL != ml_mem->ml_buf_desc) {
|
|
/* Release the memory structs that were cache ML memory data */
|
|
uint32_t i, j, ci;
|
|
for (i = 0; i < ml_mem->num_banks; i++) {
|
|
for (j = 0; j < ml_mem->num_buffers_per_bank; j++) {
|
|
ci = i * ml_mem->num_buffers_per_bank + j;
|
|
if (NULL != ml_mem->ml_buf_desc[ci].requests) {
|
|
free(ml_mem->ml_buf_desc[ci].requests);
|
|
}
|
|
}
|
|
}
|
|
/* release the buffer descriptor */
|
|
free(ml_mem->ml_buf_desc);
|
|
}
|
|
|
|
if (NULL != ptpcoll_module->allgather_offsets) {
|
|
free_allreduce_offsets_array(ptpcoll_module);
|
|
}
|
|
|
|
if (NULL != ptpcoll_module->narray_node) {
|
|
for (i = 0; i < ptpcoll_module->group_size; i++) {
|
|
if (NULL != ptpcoll_module->narray_node[i].children_ranks) {
|
|
free(ptpcoll_module->narray_node[i].children_ranks);
|
|
}
|
|
}
|
|
|
|
free(ptpcoll_module->narray_node);
|
|
}
|
|
|
|
OBJ_DESTRUCT(&ptpcoll_module->collreqs_free);
|
|
|
|
if( NULL != ptpcoll_module->super.list_n_connected ){
|
|
free(ptpcoll_module->super.list_n_connected);
|
|
ptpcoll_module->super.list_n_connected = NULL;
|
|
}
|
|
|
|
|
|
}
|
|
|
|
OBJ_CLASS_INSTANCE(mca_bcol_ptpcoll_module_t,
|
|
mca_bcol_base_module_t,
|
|
mca_bcol_ptpcoll_module_construct,
|
|
mca_bcol_ptpcoll_module_destruct);
|
|
|
|
static int init_ml_buf_desc(mca_bcol_ptpcoll_ml_buffer_desc_t **desc, void *base_addr, uint32_t num_banks,
|
|
uint32_t num_buffers_per_bank, uint32_t size_buffer, uint32_t header_size, int group_size, int pow_k)
|
|
{
|
|
uint32_t i, j, ci;
|
|
mca_bcol_ptpcoll_ml_buffer_desc_t *tmp_desc = NULL;
|
|
int k_nomial_radix = mca_bcol_ptpcoll_component.k_nomial_radix;
|
|
int pow_k_val = (0 == pow_k) ? 1 : pow_k;
|
|
int num_to_alloc =
|
|
((k_nomial_radix - 1) * pow_k_val * 2 + 1 > mca_bcol_ptpcoll_component.narray_radix) ?
|
|
(k_nomial_radix - 1) * pow_k_val * 2 + 1 :
|
|
mca_bcol_ptpcoll_component.narray_radix * 2;
|
|
|
|
|
|
*desc = (mca_bcol_ptpcoll_ml_buffer_desc_t *)calloc(num_banks * num_buffers_per_bank,
|
|
sizeof(mca_bcol_ptpcoll_ml_buffer_desc_t));
|
|
if (NULL == *desc) {
|
|
PTPCOLL_ERROR(("Failed to allocate memory"));
|
|
return OMPI_ERROR;
|
|
}
|
|
|
|
tmp_desc = *desc;
|
|
|
|
for (i = 0; i < num_banks; i++) {
|
|
for (j = 0; j < num_buffers_per_bank; j++) {
|
|
ci = i * num_buffers_per_bank + j;
|
|
tmp_desc[ci].bank_index = i;
|
|
tmp_desc[ci].buffer_index = j;
|
|
/* *2 is for gather session +1 for extra peer */
|
|
tmp_desc[ci].requests = (ompi_request_t **)
|
|
calloc(num_to_alloc, sizeof(ompi_request_t *));
|
|
if (NULL == tmp_desc[ci].requests) {
|
|
PTPCOLL_ERROR(("Failed to allocate memory for requests"));
|
|
return OMPI_ERROR;
|
|
}
|
|
/*
|
|
* ptpcoll don't have any header, but other bcols may to have. So
|
|
* we need to take it in account.
|
|
*/
|
|
tmp_desc[ci].data_addr = (void *)
|
|
((unsigned char*)base_addr + ci * size_buffer + header_size);
|
|
PTPCOLL_VERBOSE(10, ("ml memory cache setup %d %d - %p", i, j, tmp_desc[ci].data_addr));
|
|
|
|
/* init reduce implementation flags */
|
|
tmp_desc[ci].reduce_init_called = false;
|
|
tmp_desc[ci].reduction_status = 0;
|
|
}
|
|
}
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
static void mca_bcol_ptpcoll_set_small_msg_thresholds(struct mca_bcol_base_module_t *super)
|
|
{
|
|
mca_bcol_ptpcoll_module_t *ptpcoll_module =
|
|
(mca_bcol_ptpcoll_module_t *) super;
|
|
mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
|
|
|
|
/* Subtract out the maximum header size when calculating the thresholds. This
|
|
* will account for the headers used by the basesmuma component. If we do not
|
|
* take these headers into account we may overrun our buffer. */
|
|
|
|
/* Set the Allgather threshold equals to a ML buff size */
|
|
super->small_message_thresholds[BCOL_ALLGATHER] =
|
|
(ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX) /
|
|
ompi_comm_size(ptpcoll_module->super.sbgp_partner_module->group_comm);
|
|
|
|
/* Set the Bcast threshold, all Bcast algths have the same threshold */
|
|
super->small_message_thresholds[BCOL_BCAST] =
|
|
(ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX);
|
|
|
|
/* Set the Alltoall threshold, the Ring algth sets some limitation */
|
|
super->small_message_thresholds[BCOL_ALLTOALL] =
|
|
(ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX) / 2;
|
|
|
|
/* Set the Allreduce threshold, NARRAY algth sets some limitation */
|
|
super->small_message_thresholds[BCOL_ALLREDUCE] =
|
|
(ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX) / ptpcoll_module->k_nomial_radix;
|
|
|
|
/* Set the Reduce threshold, NARRAY algth sets some limitation */
|
|
super->small_message_thresholds[BCOL_REDUCE] =
|
|
(ptpcoll_module->ml_mem.size_buffer - BCOL_HEADER_MAX) / cm->narray_radix;
|
|
}
|
|
|
|
/*
|
|
* Cache information about ML memory
|
|
*/
|
|
static int mca_bcol_ptpcoll_cache_ml_memory_info(struct mca_bcol_base_memory_block_desc_t *payload_block,
|
|
uint32_t data_offset,
|
|
struct mca_bcol_base_module_t *bcol,
|
|
void *reg_data)
|
|
{
|
|
mca_bcol_ptpcoll_module_t *ptpcoll_module = (mca_bcol_ptpcoll_module_t *) bcol;
|
|
mca_bcol_ptpcoll_local_mlmem_desc_t *ml_mem = &ptpcoll_module->ml_mem;
|
|
struct mca_bcol_base_memory_block_desc_t *desc = payload_block;
|
|
int group_size = ptpcoll_module->super.sbgp_partner_module->group_size;
|
|
|
|
PTPCOLL_VERBOSE(10, ("mca_bcol_ptpcoll_init_buffer_memory was called"));
|
|
|
|
/* cache ml mem desc tunings localy */
|
|
ml_mem->num_banks = desc->num_banks;
|
|
ml_mem->num_buffers_per_bank = desc->num_buffers_per_bank;
|
|
ml_mem->size_buffer = desc->size_buffer;
|
|
|
|
PTPCOLL_VERBOSE(10, ("ML buffer configuration num banks %d num_per_bank %d size %d base addr %p",
|
|
desc->num_banks, desc->num_buffers_per_bank, desc->size_buffer, desc->block->base_addr));
|
|
|
|
/* Set first bank index for release */
|
|
ml_mem->bank_index_for_release = 0;
|
|
|
|
if (OMPI_SUCCESS != init_ml_buf_desc(&ml_mem->ml_buf_desc,
|
|
desc->block->base_addr,
|
|
ml_mem->num_banks,
|
|
ml_mem->num_buffers_per_bank,
|
|
ml_mem->size_buffer,
|
|
data_offset,
|
|
group_size,
|
|
ptpcoll_module->pow_k)) {
|
|
PTPCOLL_VERBOSE(10, ("Failed to allocate rdma memory descriptor\n"));
|
|
return OMPI_ERROR;
|
|
}
|
|
|
|
PTPCOLL_VERBOSE(10, ("ptpcoll_module = %p, ml_mem_desc = %p.\n",
|
|
ptpcoll_module));
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
/*
|
|
* Load ptpcoll bcol functions
|
|
*/
|
|
static void load_func(mca_bcol_ptpcoll_module_t *ptpcoll_module)
|
|
{
|
|
int fnc;
|
|
|
|
/* reset everything to NULL */
|
|
for (fnc = 0; fnc < BCOL_NUM_OF_FUNCTIONS; fnc++) {
|
|
|
|
/*ptpcoll_module->super.bcol_function_table[fnc] = NULL;*/
|
|
ptpcoll_module->super.bcol_function_table[fnc] = NULL;
|
|
ptpcoll_module->super.bcol_function_init_table[fnc] = NULL;
|
|
}
|
|
|
|
ptpcoll_module->super.bcol_function_init_table[BCOL_BARRIER] = bcol_ptpcoll_barrier_init;
|
|
|
|
ptpcoll_module->super.bcol_function_init_table[BCOL_BCAST] = bcol_ptpcoll_bcast_init;
|
|
ptpcoll_module->super.bcol_function_init_table[BCOL_ALLREDUCE] = bcol_ptpcoll_allreduce_init;
|
|
ptpcoll_module->super.bcol_function_init_table[BCOL_ALLGATHER] = bcol_ptpcoll_allgather_init;
|
|
ptpcoll_module->super.bcol_function_table[BCOL_BCAST] = bcol_ptpcoll_bcast_k_nomial_anyroot;
|
|
ptpcoll_module->super.bcol_function_init_table[BCOL_ALLTOALL] = NULL;
|
|
ptpcoll_module->super.bcol_function_init_table[BCOL_SYNC] = mca_bcol_ptpcoll_memsync_init;
|
|
ptpcoll_module->super.bcol_function_init_table[BCOL_REDUCE] = bcol_ptpcoll_reduce_init;
|
|
|
|
/* ML memory cacher */
|
|
ptpcoll_module->super.bcol_memory_init = mca_bcol_ptpcoll_cache_ml_memory_info;
|
|
|
|
/* Set thresholds */
|
|
ptpcoll_module->super.set_small_msg_thresholds = mca_bcol_ptpcoll_set_small_msg_thresholds;
|
|
|
|
/* setup recursive k-ing tree */
|
|
ptpcoll_module->super.k_nomial_tree = mca_bcol_ptpcoll_setup_knomial_tree;
|
|
}
|
|
|
|
int mca_bcol_ptpcoll_setup_knomial_tree(mca_bcol_base_module_t *super)
|
|
{
|
|
mca_bcol_ptpcoll_module_t *p2p_module = (mca_bcol_ptpcoll_module_t *) super;
|
|
int rc = 0;
|
|
|
|
rc = netpatterns_setup_recursive_knomial_allgather_tree_node(
|
|
p2p_module->super.sbgp_partner_module->group_size,
|
|
p2p_module->super.sbgp_partner_module->my_index,
|
|
mca_bcol_ptpcoll_component.k_nomial_radix,
|
|
super->list_n_connected,
|
|
&p2p_module->knomial_allgather_tree);
|
|
|
|
return rc;
|
|
}
|
|
|
|
/* The function used to calculate size */
|
|
static int calc_full_tree_size(int radix, int group_size, int *num_leafs)
|
|
{
|
|
int level_cnt = 1;
|
|
int total_cnt = 0;
|
|
|
|
while( total_cnt < group_size ) {
|
|
total_cnt += level_cnt;
|
|
level_cnt *= radix;
|
|
}
|
|
|
|
if (total_cnt > group_size) {
|
|
*num_leafs = level_cnt / radix;
|
|
return total_cnt - level_cnt / radix;
|
|
} else {
|
|
*num_leafs = level_cnt;
|
|
return group_size;
|
|
}
|
|
}
|
|
|
|
/* Setup N-array scatter Knomial-gather static information */
|
|
static int load_narray_knomial_tree (mca_bcol_ptpcoll_module_t *ptpcoll_module)
|
|
{
|
|
int rc, i, peer;
|
|
mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
|
|
|
|
ptpcoll_module->full_narray_tree_size = calc_full_tree_size(
|
|
cm->narray_knomial_radix,
|
|
ptpcoll_module->group_size,
|
|
&ptpcoll_module->full_narray_tree_num_leafs);
|
|
|
|
ptpcoll_module->narray_knomial_proxy_extra_index = (int *)
|
|
malloc(sizeof(int) * (cm->narray_knomial_radix));
|
|
if (NULL == ptpcoll_module->narray_knomial_proxy_extra_index) {
|
|
PTPCOLL_ERROR(("Failed to allocate memory"));
|
|
goto Error;
|
|
}
|
|
|
|
ptpcoll_module->narray_knomial_node = calloc(
|
|
ptpcoll_module->full_narray_tree_size,
|
|
sizeof(netpatterns_narray_knomial_tree_node_t));
|
|
if(NULL == ptpcoll_module->narray_knomial_node) {
|
|
goto Error;
|
|
}
|
|
|
|
PTPCOLL_VERBOSE(10 ,("My type is proxy, full tree size = %d [%d]",
|
|
ptpcoll_module->full_narray_tree_size,
|
|
cm->narray_knomial_radix
|
|
));
|
|
|
|
if (ptpcoll_module->super.sbgp_partner_module->my_index <
|
|
ptpcoll_module->full_narray_tree_size) {
|
|
if (ptpcoll_module->super.sbgp_partner_module->my_index <
|
|
ptpcoll_module->group_size - ptpcoll_module->full_narray_tree_size) {
|
|
ptpcoll_module->narray_type = PTPCOLL_PROXY;
|
|
for (i = 0; i < cm->narray_knomial_radix; i++) {
|
|
peer =
|
|
ptpcoll_module->super.sbgp_partner_module->my_index *
|
|
cm->narray_knomial_radix + i +
|
|
ptpcoll_module->full_narray_tree_size;
|
|
if (peer >= ptpcoll_module->group_size) {
|
|
break;
|
|
}
|
|
ptpcoll_module->narray_knomial_proxy_extra_index[i] = peer;
|
|
}
|
|
ptpcoll_module->narray_knomial_proxy_num = i;
|
|
} else {
|
|
ptpcoll_module->narray_type = PTPCOLL_IN_GROUP;;
|
|
}
|
|
/* Setting node info */
|
|
for(i = 0; i < ptpcoll_module->full_narray_tree_size; i++) {
|
|
rc = netpatterns_setup_narray_knomial_tree(
|
|
cm->narray_knomial_radix,
|
|
i,
|
|
ptpcoll_module->full_narray_tree_size,
|
|
&ptpcoll_module->narray_knomial_node[i]);
|
|
if(OMPI_SUCCESS != rc) {
|
|
goto Error;
|
|
}
|
|
}
|
|
} else {
|
|
ptpcoll_module->narray_type = PTPCOLL_EXTRA;
|
|
ptpcoll_module->narray_knomial_proxy_extra_index[0] =
|
|
(ptpcoll_module->super.sbgp_partner_module->my_index -
|
|
ptpcoll_module->full_narray_tree_size) /
|
|
cm->narray_knomial_radix;
|
|
}
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
Error:
|
|
if (NULL != ptpcoll_module->narray_knomial_node) {
|
|
free(ptpcoll_module->narray_knomial_node);
|
|
}
|
|
if (NULL != ptpcoll_module->narray_knomial_proxy_extra_index) {
|
|
free(ptpcoll_module->narray_knomial_proxy_extra_index);
|
|
}
|
|
return OMPI_ERROR;
|
|
}
|
|
|
|
/* Setup N-array static information */
|
|
static int load_narray_tree(mca_bcol_ptpcoll_module_t *ptpcoll_module)
|
|
{
|
|
int rc, i;
|
|
mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
|
|
|
|
ptpcoll_module->narray_node = calloc(ptpcoll_module->group_size,
|
|
sizeof(netpatterns_tree_node_t));
|
|
if(NULL == ptpcoll_module->narray_node ) {
|
|
goto Error;
|
|
}
|
|
|
|
for(i = 0; i < ptpcoll_module->group_size; i++) {
|
|
rc = netpatterns_setup_narray_tree(
|
|
cm->narray_radix,
|
|
i,
|
|
ptpcoll_module->group_size,
|
|
&ptpcoll_module->narray_node[i]);
|
|
if(OMPI_SUCCESS != rc) {
|
|
goto Error;
|
|
}
|
|
}
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
Error:
|
|
if (NULL != ptpcoll_module->narray_node) {
|
|
free(ptpcoll_module->narray_node);
|
|
}
|
|
return OMPI_ERROR;
|
|
}
|
|
|
|
static int load_knomial_info(mca_bcol_ptpcoll_module_t *ptpcoll_module)
|
|
{
|
|
int i;
|
|
mca_bcol_ptpcoll_component_t *cm = &mca_bcol_ptpcoll_component;
|
|
|
|
ptpcoll_module->k_nomial_radix =
|
|
cm->k_nomial_radix > ptpcoll_module->group_size ?
|
|
ptpcoll_module->group_size :
|
|
cm->k_nomial_radix;
|
|
|
|
ptpcoll_module->pow_k = pow_k_calc(ptpcoll_module->k_nomial_radix,
|
|
ptpcoll_module->group_size,
|
|
&ptpcoll_module->pow_knum);
|
|
|
|
ptpcoll_module->kn_proxy_extra_index = (int *)
|
|
malloc(sizeof(int) * (ptpcoll_module->k_nomial_radix - 1));
|
|
if (NULL == ptpcoll_module->kn_proxy_extra_index) {
|
|
PTPCOLL_ERROR(("Failed to allocate memory"));
|
|
goto Error;
|
|
}
|
|
|
|
/* Setting peer type for K-nomial algorithm*/
|
|
if (ptpcoll_module->super.sbgp_partner_module->my_index < ptpcoll_module->pow_knum ) {
|
|
if (ptpcoll_module->super.sbgp_partner_module->my_index <
|
|
ptpcoll_module->group_size - ptpcoll_module->pow_knum) {
|
|
for (i = 0;
|
|
i < (ptpcoll_module->k_nomial_radix - 1) &&
|
|
ptpcoll_module->super.sbgp_partner_module->my_index *
|
|
(ptpcoll_module->k_nomial_radix - 1) +
|
|
i + ptpcoll_module->pow_knum < ptpcoll_module->group_size
|
|
; i++) {
|
|
ptpcoll_module->pow_ktype = PTPCOLL_KN_PROXY;
|
|
ptpcoll_module->kn_proxy_extra_index[i] =
|
|
ptpcoll_module->super.sbgp_partner_module->my_index *
|
|
(ptpcoll_module->k_nomial_radix - 1) +
|
|
i + ptpcoll_module->pow_knum;
|
|
PTPCOLL_VERBOSE(10 ,("My type is proxy, pow_knum = %d [%d] my extra %d",
|
|
ptpcoll_module->pow_knum,
|
|
ptpcoll_module->pow_k,
|
|
ptpcoll_module->kn_proxy_extra_index[i]));
|
|
}
|
|
ptpcoll_module->kn_proxy_extra_num = i;
|
|
} else {
|
|
PTPCOLL_VERBOSE(10 ,("My type is in group, pow_knum = %d [%d]", ptpcoll_module->pow_knum,
|
|
ptpcoll_module->pow_k));
|
|
ptpcoll_module->pow_ktype = PTPCOLL_KN_IN_GROUP;
|
|
}
|
|
} else {
|
|
ptpcoll_module->pow_ktype = PTPCOLL_KN_EXTRA;
|
|
ptpcoll_module->kn_proxy_extra_index[0] = (ptpcoll_module->super.sbgp_partner_module->my_index -
|
|
ptpcoll_module->pow_knum) / (ptpcoll_module->k_nomial_radix - 1);
|
|
PTPCOLL_VERBOSE(10 ,("My type is extra , pow_knum = %d [%d] my proxy %d",
|
|
ptpcoll_module->pow_knum,
|
|
ptpcoll_module->pow_k,
|
|
ptpcoll_module->kn_proxy_extra_index[0]));
|
|
}
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
Error:
|
|
if (NULL == ptpcoll_module->kn_proxy_extra_index) {
|
|
free(ptpcoll_module->kn_proxy_extra_index);
|
|
}
|
|
|
|
return OMPI_ERROR;
|
|
}
|
|
|
|
static int load_binomial_info(mca_bcol_ptpcoll_module_t *ptpcoll_module)
|
|
{
|
|
ptpcoll_module->pow_2 = pow_k_calc(2,
|
|
ptpcoll_module->group_size,
|
|
&ptpcoll_module->pow_2num);
|
|
|
|
assert(ptpcoll_module->pow_2num == 1 << ptpcoll_module->pow_2);
|
|
assert(ptpcoll_module->pow_2num <= ptpcoll_module->group_size);
|
|
|
|
/* Setting peer type for binary algorithm*/
|
|
if (ptpcoll_module->super.sbgp_partner_module->my_index < ptpcoll_module->pow_2num ) {
|
|
if (ptpcoll_module->super.sbgp_partner_module->my_index <
|
|
ptpcoll_module->group_size - ptpcoll_module->pow_2num) {
|
|
PTPCOLL_VERBOSE(10 ,("My type is proxy, pow_2num = %d [%d]", ptpcoll_module->pow_2num,
|
|
ptpcoll_module->pow_2));
|
|
ptpcoll_module->pow_2type = PTPCOLL_PROXY;
|
|
ptpcoll_module->proxy_extra_index = ptpcoll_module->super.sbgp_partner_module->my_index +
|
|
ptpcoll_module->pow_2num;
|
|
} else {
|
|
PTPCOLL_VERBOSE(10 ,("My type is in group, pow_2num = %d [%d]", ptpcoll_module->pow_2num,
|
|
ptpcoll_module->pow_2));
|
|
ptpcoll_module->pow_2type = PTPCOLL_IN_GROUP;
|
|
}
|
|
} else {
|
|
PTPCOLL_VERBOSE(10 ,("My type is extra , pow_2num = %d [%d]", ptpcoll_module->pow_2num,
|
|
ptpcoll_module->pow_2));
|
|
ptpcoll_module->pow_2type = PTPCOLL_EXTRA;
|
|
ptpcoll_module->proxy_extra_index = ptpcoll_module->super.sbgp_partner_module->my_index -
|
|
ptpcoll_module->pow_2num;
|
|
}
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
static int load_recursive_knomial_info(mca_bcol_ptpcoll_module_t *ptpcoll_module)
|
|
{
|
|
int rc = OMPI_SUCCESS;
|
|
rc = netpatterns_setup_recursive_knomial_tree_node(
|
|
ptpcoll_module->group_size,
|
|
ptpcoll_module->super.sbgp_partner_module->my_index,
|
|
mca_bcol_ptpcoll_component.k_nomial_radix,
|
|
&ptpcoll_module->knomial_exchange_tree);
|
|
return rc;
|
|
}
|
|
|
|
static void bcol_ptpcoll_collreq_init(ompi_free_list_item_t *item, void* ctx)
|
|
{
|
|
mca_bcol_ptpcoll_module_t *ptpcoll_module= (mca_bcol_ptpcoll_module_t *) ctx;
|
|
mca_bcol_ptpcoll_collreq_t *collreq = (mca_bcol_ptpcoll_collreq_t *) item;
|
|
|
|
switch(mca_bcol_ptpcoll_component.barrier_alg) {
|
|
case 1:
|
|
collreq->requests = (ompi_request_t **)
|
|
calloc(2, sizeof(ompi_request_t *));
|
|
break;
|
|
case 2:
|
|
collreq->requests = (ompi_request_t **)
|
|
calloc(2 * ptpcoll_module->k_nomial_radix, sizeof(ompi_request_t *));
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* query to see if the module is available for use on the given
|
|
* communicator, and if so, what it's priority is. This is where
|
|
* the backing shared-memory file is created.
|
|
*/
|
|
mca_bcol_base_module_t **mca_bcol_ptpcoll_comm_query(mca_sbgp_base_module_t *sbgp,
|
|
int *num_modules)
|
|
{
|
|
int rc;
|
|
/* local variables */
|
|
struct ompi_communicator_t *comm = sbgp->group_comm;
|
|
mca_bcol_ptpcoll_module_t *ptpcoll_module = NULL;
|
|
mca_bcol_base_module_t **ptpcoll_modules = NULL;
|
|
int iovec_size;
|
|
|
|
/* initialize local variables */
|
|
*num_modules = 0;
|
|
|
|
/*
|
|
* This is activated only for intra-communicators
|
|
*/
|
|
if (OMPI_COMM_IS_INTER(comm) ) {
|
|
return NULL;
|
|
}
|
|
|
|
/* allocate and initialize an sm-v2 module */
|
|
ptpcoll_modules = (mca_bcol_base_module_t **) malloc(sizeof(mca_bcol_base_module_t *));
|
|
if (NULL == ptpcoll_modules) {
|
|
return NULL;
|
|
}
|
|
|
|
ptpcoll_module = OBJ_NEW(mca_bcol_ptpcoll_module_t);
|
|
if (NULL == ptpcoll_module) {
|
|
return NULL;
|
|
}
|
|
|
|
/* On this stage we support only one single module */
|
|
ptpcoll_modules[*num_modules] = &(ptpcoll_module->super);
|
|
|
|
(*num_modules)++;
|
|
/* set the subgroup */
|
|
ptpcoll_module->super.sbgp_partner_module = sbgp;
|
|
/* caching some useful information */
|
|
ptpcoll_module->group_size =
|
|
ptpcoll_module->super.sbgp_partner_module->group_size;
|
|
|
|
rc = load_binomial_info(ptpcoll_module);
|
|
if (OMPI_SUCCESS != rc) {
|
|
PTPCOLL_VERBOSE(10, ("Failed to load knomial info"));
|
|
goto CLEANUP;
|
|
}
|
|
|
|
rc = load_knomial_info(ptpcoll_module);
|
|
if (OMPI_SUCCESS != rc) {
|
|
PTPCOLL_VERBOSE(10, ("Failed to load knomial info"));
|
|
goto CLEANUP;
|
|
}
|
|
|
|
rc = load_narray_tree(ptpcoll_module);
|
|
if (OMPI_SUCCESS != rc) {
|
|
PTPCOLL_VERBOSE(10, ("Failed to load narray tree"));
|
|
goto CLEANUP;
|
|
}
|
|
|
|
rc = load_narray_knomial_tree(ptpcoll_module);
|
|
if (OMPI_SUCCESS != rc) {
|
|
PTPCOLL_VERBOSE(10, ("Failed to load narray-knomila tree"));
|
|
goto CLEANUP;
|
|
}
|
|
|
|
rc = load_recursive_knomial_info(ptpcoll_module);
|
|
if (OMPI_SUCCESS != rc) {
|
|
PTPCOLL_VERBOSE(10, ("Failed to load recursive knomial tree"));
|
|
goto CLEANUP;
|
|
}
|
|
|
|
/* creating collfrag free list */
|
|
OBJ_CONSTRUCT(&ptpcoll_module->collreqs_free, ompi_free_list_t);
|
|
rc = ompi_free_list_init_ex_new(&ptpcoll_module->collreqs_free,
|
|
sizeof(mca_bcol_ptpcoll_collreq_t),
|
|
BCOL_PTP_CACHE_LINE_SIZE,
|
|
OBJ_CLASS(mca_bcol_ptpcoll_collreq_t),
|
|
0, BCOL_PTP_CACHE_LINE_SIZE,
|
|
256 /* free_list_num */,
|
|
-1 /* free_list_max, -1 = infinite */,
|
|
32 /* free_list_inc */,
|
|
NULL,
|
|
bcol_ptpcoll_collreq_init,
|
|
ptpcoll_module);
|
|
if (OMPI_SUCCESS != rc) {
|
|
goto CLEANUP;
|
|
}
|
|
|
|
load_func(ptpcoll_module);
|
|
|
|
rc = alloc_allreduce_offsets_array(ptpcoll_module);
|
|
if (OMPI_SUCCESS != rc) {
|
|
goto CLEANUP;
|
|
}
|
|
|
|
/* Allocating iovec for PTP alltoall */
|
|
iovec_size = ptpcoll_module->group_size / 2 + ptpcoll_module->group_size % 2;
|
|
ptpcoll_module->alltoall_iovec = (struct iovec *) malloc(sizeof(struct iovec)
|
|
* iovec_size);
|
|
ptpcoll_module->log_group_size = lognum(ptpcoll_module->group_size);
|
|
|
|
rc = mca_bcol_base_bcol_fns_table_init(&(ptpcoll_module->super));
|
|
if (OMPI_SUCCESS != rc) {
|
|
goto CLEANUP;
|
|
}
|
|
|
|
/* Zero copy is supported */
|
|
ptpcoll_module->super.supported_mode = MCA_BCOL_BASE_ZERO_COPY;
|
|
|
|
/* return */
|
|
return ptpcoll_modules;
|
|
|
|
CLEANUP:
|
|
|
|
OBJ_RELEASE(ptpcoll_module);
|
|
return NULL;
|
|
}
|