2014-01-22 19:39:19 +04:00
|
|
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
2012-08-16 23:11:35 +04:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
|
|
|
|
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
|
2014-01-22 19:39:19 +04:00
|
|
|
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
|
|
|
|
* reserved.
|
2012-08-16 23:11:35 +04:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @file
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "ompi_config.h"
|
|
|
|
#include "ompi/constants.h"
|
|
|
|
#include "ompi/communicator/communicator.h"
|
|
|
|
#include "ompi/mca/bcol/bcol.h"
|
|
|
|
#include "ompi/mca/bcol/base/base.h"
|
2013-02-06 01:52:55 +04:00
|
|
|
#include "ompi/patterns/net/netpatterns.h"
|
2012-08-16 23:11:35 +04:00
|
|
|
|
|
|
|
#include "opal/util/show_help.h"
|
|
|
|
#include "opal/align.h"
|
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
#include "ompi/mca/bcol/basesmuma/bcol_basesmuma_reduce.h"
|
2012-08-16 23:11:35 +04:00
|
|
|
#include "bcol_basesmuma.h"
|
|
|
|
#include "bcol_basesmuma_utils.h"
|
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
#ifdef __PORTALS_AVAIL__
|
|
|
|
#include "bcol_basesmuma_portals.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
|
|
|
|
/* debug */
|
|
|
|
static mca_bcol_basesmuma_module_t *test_sm_module;
|
|
|
|
|
|
|
|
int test_addressing(int seq_num,
|
|
|
|
mca_bcol_base_module_t *b_module)
|
|
|
|
{
|
|
|
|
|
|
|
|
/* local variables */
|
|
|
|
int ret=OMPI_SUCCESS, idx, leading_dim, loop_cnt, exchange, flag_to_set, i;
|
|
|
|
int pair_rank, flag_offset;
|
|
|
|
mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
|
|
|
|
netpatterns_pair_exchange_node_t *my_exchange_node;
|
|
|
|
int extra_rank, my_rank, pow_2;
|
|
|
|
mca_bcol_basesmuma_ctl_struct_t volatile *partner_ctl;
|
|
|
|
mca_bcol_basesmuma_ctl_struct_t volatile *my_ctl;
|
|
|
|
int64_t sequence_number,seq_actual,seq_exp,flag_actual,flag_exp ;
|
|
|
|
bool found;
|
|
|
|
int buff_index, first_instance=0;
|
|
|
|
mca_bcol_basesmuma_module_t *bcol_module=
|
|
|
|
(mca_bcol_basesmuma_module_t *)b_module;
|
|
|
|
|
|
|
|
/* get the pointer to the segment of control structures */
|
|
|
|
my_exchange_node=&(bcol_module->recursive_doubling_tree);
|
|
|
|
my_rank=bcol_module->super.sbgp_partner_module->my_index;
|
|
|
|
pow_2=bcol_module->super.sbgp_partner_module->pow_2;
|
|
|
|
|
|
|
|
/* figure out what instance of the basesmuma bcol I am */
|
|
|
|
leading_dim=bcol_module->colls_no_user_data.size_of_group;
|
|
|
|
sequence_number=seq_num;
|
|
|
|
buff_index=sequence_number & (bcol_module->colls_no_user_data.mask);
|
|
|
|
idx=SM_ARRAY_INDEX(leading_dim,buff_index,0);
|
|
|
|
ctl_structs=(mca_bcol_basesmuma_ctl_struct_t **)
|
|
|
|
bcol_module->colls_no_user_data.ctl_buffs+idx;
|
|
|
|
my_ctl=ctl_structs[my_rank];
|
|
|
|
for(i=0 ; i < leading_dim ; i++ ) {
|
|
|
|
partner_ctl=ctl_structs[i];
|
|
|
|
seq_actual=partner_ctl->sequence_number;
|
|
|
|
flag_actual=partner_ctl->flag;
|
|
|
|
seq_exp=i;
|
|
|
|
flag_exp=sequence_number;
|
|
|
|
if( seq_actual != seq_exp ) {
|
|
|
|
fprintf(stderr," Error sn expected %ld got %ld \n",
|
|
|
|
seq_exp,seq_actual);
|
|
|
|
fflush(stderr);
|
|
|
|
}
|
|
|
|
if( flag_actual != flag_exp ) {
|
|
|
|
fprintf(stderr," Error flag expected %ld got %ld \n",
|
|
|
|
flag_exp,flag_actual);
|
|
|
|
fflush(stderr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
static void test_resrouce_recycle()
|
|
|
|
{
|
|
|
|
int i,j, grp_size, my_rank;
|
|
|
|
int idx,sum;
|
|
|
|
mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
|
|
|
|
int num_bufs= cs->basesmuma_num_mem_banks*
|
|
|
|
cs->basesmuma_num_regions_per_bank;
|
|
|
|
int64_t sn=100;
|
|
|
|
bcol_function_args_t fun_args;
|
|
|
|
mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
|
|
|
|
|
|
|
|
fprintf(stderr," num buffs %d \n",num_bufs);
|
|
|
|
fflush(stderr);
|
|
|
|
|
|
|
|
sm_nbbar_desc_t *sm_desc=
|
|
|
|
&((test_sm_module->colls_no_user_data.ctl_buffs_mgmt[0].nb_barrier_desc));
|
|
|
|
|
|
|
|
grp_size=sm_desc->sm_module->super.sbgp_partner_module->group_size;
|
|
|
|
my_rank=sm_desc->sm_module->super.sbgp_partner_module->my_index;
|
|
|
|
|
|
|
|
/* need to hack the sequence number offset, as this is set after this
|
|
|
|
* routine is called */
|
|
|
|
test_sm_module->super.squence_number_offset=100;
|
|
|
|
|
|
|
|
for( i=0 ; i < num_bufs +2 ; i++ ) {
|
|
|
|
/* get my ctl structure index in this buffer set */
|
|
|
|
idx=SM_ARRAY_INDEX(grp_size,i,0);
|
|
|
|
ctl_structs=(mca_bcol_basesmuma_ctl_struct_t **)
|
|
|
|
test_sm_module->colls_no_user_data.ctl_buffs+idx;
|
|
|
|
ctl_structs[my_rank]->sequence_number=(int64_t)my_rank;
|
|
|
|
ctl_structs[my_rank]->flag=(int64_t)i;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* mark that I am here */
|
|
|
|
idx=SM_ARRAY_INDEX(grp_size,(num_bufs+1),0);
|
|
|
|
ctl_structs=(mca_bcol_basesmuma_ctl_struct_t **)
|
|
|
|
test_sm_module->colls_no_user_data.ctl_buffs+idx;
|
|
|
|
ctl_structs[my_rank]->index=(int64_t)1;
|
|
|
|
|
|
|
|
/* make sure all have arrived */
|
|
|
|
|
|
|
|
sum=0;
|
|
|
|
while(sum < grp_size ) {
|
|
|
|
sum=0;
|
|
|
|
idx=SM_ARRAY_INDEX(grp_size,(num_bufs+1),0);
|
|
|
|
ctl_structs=(mca_bcol_basesmuma_ctl_struct_t **)
|
|
|
|
test_sm_module->colls_no_user_data.ctl_buffs+idx;
|
|
|
|
for( i=0 ; i < grp_size ; i++ ) {
|
|
|
|
sum+=ctl_structs[i]->index;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check to see what values we see */
|
|
|
|
for( i=0 ; i < num_bufs +2 ; i++ ) {
|
|
|
|
idx=SM_ARRAY_INDEX(grp_size,i,0);
|
|
|
|
ctl_structs=(mca_bcol_basesmuma_ctl_struct_t **)
|
|
|
|
test_sm_module->colls_no_user_data.ctl_buffs+idx;
|
|
|
|
for( j=0 ; j < grp_size ; j++ ) {
|
|
|
|
if( ctl_structs[j]->sequence_number != j ) {
|
|
|
|
fprintf(stderr," Error sn %ld expected %ld i %d j %d my %d \n",
|
|
|
|
ctl_structs[i]->sequence_number,j, i,j,my_rank);
|
|
|
|
fflush(stderr);
|
|
|
|
}
|
|
|
|
if( ctl_structs[j]->flag != i ) {
|
|
|
|
fprintf(stderr," Error sn %ld expected %ld \n",
|
|
|
|
ctl_structs[i]->flag,i);
|
|
|
|
fflush(stderr);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for( i=0 ; i < num_bufs ; i++ ) {
|
|
|
|
test_addressing((int64_t)i,test_sm_module);
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
fun_args.n_of_this_type_in_collective=2;
|
|
|
|
for( i=0 ; i < 2*grp_size ; i++ ) {
|
|
|
|
|
|
|
|
if( my_rank == (i%grp_size) ) {
|
|
|
|
sleep(3);
|
|
|
|
}
|
|
|
|
|
|
|
|
fun_args.sequence_num=sn;
|
|
|
|
sn++;
|
|
|
|
bcol_basesmuma_fanin(&fun_args,
|
|
|
|
(mca_bcol_base_module_t *)test_sm_module);
|
|
|
|
bcol_basesmuma_fanout(&fun_args,
|
|
|
|
(mca_bcol_base_module_t *)test_sm_module);
|
|
|
|
|
|
|
|
if( my_rank == (i%grp_size) ) {
|
|
|
|
fprintf(stderr," hi from %d \n",my_rank);
|
|
|
|
fflush(stderr);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
for( i=0 ; i < 2*grp_size ; i++ ) {
|
|
|
|
|
|
|
|
if( my_rank == (i%grp_size) ) {
|
|
|
|
sleep(3);
|
|
|
|
}
|
|
|
|
bcol_basesmuma_rd_nb_barrier_init_admin(sm_desc);
|
|
|
|
while( sm_desc->collective_phase != NB_BARRIER_DONE ) {
|
|
|
|
bcol_basesmuma_rd_nb_barrier_progress_admin(sm_desc);
|
|
|
|
}
|
|
|
|
sm_desc->sm_module->colls_no_user_data.ctl_buffs_mgmt[0].bank_gen_counter++;
|
|
|
|
if( my_rank == (i%grp_size) ) {
|
|
|
|
fprintf(stderr," hi from %d \n",my_rank);
|
|
|
|
fflush(stderr);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
for( i = 0 ; i < num_bufs+1 ; i++ ) {
|
|
|
|
indx=bcol_basesmuma_get_buff_index(&(test_sm_module->colls_no_user_data),
|
|
|
|
(uint64_t)i);
|
|
|
|
fprintf(stderr," indx %d \n ",indx);
|
|
|
|
fflush(stderr);
|
|
|
|
opal_progress();
|
|
|
|
if( -1 != indx ) {
|
|
|
|
ret=bcol_basesmuma_free_buff(&(test_sm_module->colls_no_user_data),
|
|
|
|
(uint64_t)i);
|
|
|
|
if( OMPI_SUCCESS != ret ) {
|
|
|
|
fprintf(stderr," Error when returning %d \n",i);
|
|
|
|
fflush(stderr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
|
|
|
|
}
|
|
|
|
/* end debug */
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if 0 /* this routine (with appropriate bug fixes) is correct when
|
|
|
|
* the subgroup is a single socket. However, it is not correct
|
|
|
|
* if used over several sockets - the data is correct, but dermining
|
|
|
|
* the tree radix based on that is not.
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
* Local function
|
|
|
|
* Get the maximal number of processors sharing the
|
|
|
|
* same socket within a sub-group.
|
|
|
|
* @param my_rank - the rank of the executing process.
|
|
|
|
* @param group_comm - pointer to the sbgp's communicator.
|
|
|
|
* @return - maximal number of procs sharing a socket.
|
|
|
|
*/
|
|
|
|
static int get_num_of_ranks_sharing_socket(int my_rank, struct ompi_communicator_t *group_comm)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
int rc;
|
|
|
|
int ret;
|
|
|
|
int proc;
|
|
|
|
int local;
|
|
|
|
bool bound;
|
|
|
|
int cnt = 0;
|
|
|
|
int socket_tmp;
|
|
|
|
int group_size;
|
|
|
|
|
|
|
|
int32_t pcnt = 1;
|
|
|
|
int num_sockets;
|
|
|
|
opal_list_t peers;
|
|
|
|
int n_local_peers;
|
|
|
|
int num_processors;
|
|
|
|
int core_index = -1;
|
|
|
|
int my_socket_index = -1;
|
|
|
|
int* num_proc_per_socket;
|
|
|
|
|
|
|
|
orte_namelist_t *peer;
|
|
|
|
struct ompi_proc_t** procs = NULL;
|
|
|
|
opal_paffinity_base_cpu_set_t my_cpu_set;
|
|
|
|
opal_buffer_t* sbuffer = OBJ_NEW(opal_buffer_t);
|
|
|
|
opal_buffer_t* rbuffer = OBJ_NEW(opal_buffer_t);
|
|
|
|
|
|
|
|
|
|
|
|
/* Set proc info */
|
|
|
|
procs = ompi_comm_local_group_procs(group_comm);
|
|
|
|
group_size = ompi_comm_size(group_comm);
|
|
|
|
|
|
|
|
/* Get the number of processors on this node */
|
|
|
|
ret = opal_paffinity_base_get_processor_info(&num_processors);
|
|
|
|
if((OMPI_SUCCESS != ret) || (OMPI_SUCCESS != rc)){
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Get process affinity mask */
|
|
|
|
OPAL_PAFFINITY_CPU_ZERO(my_cpu_set);
|
|
|
|
ret = opal_paffinity_base_get(&my_cpu_set);
|
|
|
|
OPAL_PAFFINITY_PROCESS_IS_BOUND(my_cpu_set, &bound);
|
|
|
|
if(!bound) {
|
|
|
|
return 0;
|
|
|
|
}else{
|
|
|
|
/* figure out how many sockets are in this system */
|
|
|
|
rc = opal_paffinity_base_get_socket_info(&num_sockets);
|
|
|
|
if((OMPI_SUCCESS != ret) || (OMPI_SUCCESS != rc)){
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* No sockets*/
|
|
|
|
if(num_sockets == 0){
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* All subgroup members share the same socket on the node */
|
|
|
|
if(num_sockets == 1){
|
|
|
|
return group_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allocate a buffer that counts the number of sockets on a node */
|
|
|
|
num_proc_per_socket = (int*)calloc(num_sockets, sizeof(int));
|
|
|
|
if(NULL == num_proc_per_socket){
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/* Loop over processors on this node */
|
|
|
|
/* for (proc = 0 ; proc < num_processors ; proc++) {
|
|
|
|
if (OPAL_PAFFINITY_CPU_ISSET(proc, my_cpu_set)) {
|
|
|
|
ret = opal_paffinity_base_get_map_to_socket_core(proc,
|
|
|
|
&socket_tmp, &core_index);
|
|
|
|
|
|
|
|
if(my_socket_index != socket_tmp) {
|
|
|
|
my_socket_index = socket_tmp;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}*/
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The subgroup devided according to node hierarchy,
|
|
|
|
* therefore several sockets might be shared by the groups procs.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Construct peers list */
|
|
|
|
n_local_peers = 0;
|
|
|
|
OBJ_CONSTRUCT(&peers, opal_list_t);
|
|
|
|
for(proc = 0; proc < group_size; proc++){
|
|
|
|
local = OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags);
|
|
|
|
if (local) {
|
|
|
|
peer = OBJ_NEW(orte_namelist_t);
|
|
|
|
peer->name.jobid = group_comm->c_local_group->grp_proc_pointers[proc]->proc_name.jobid;
|
|
|
|
peer->name.vpid = group_comm->c_local_group->grp_proc_pointers[proc]->proc_name.vpid;
|
|
|
|
opal_list_append(&peers, &peer->item);
|
|
|
|
n_local_peers++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*Pack socket index */
|
|
|
|
ret = opal_dss.pack(sbuffer, &my_socket_index, 1, OPAL_INT32);
|
|
|
|
if (ORTE_SUCCESS != ret){
|
|
|
|
fprintf(stderr, " pack returned error %d for my_socket_index \n",ret);
|
|
|
|
fflush(stderr);
|
|
|
|
return -1; /*ret;*/
|
|
|
|
}
|
|
|
|
|
|
|
|
/*Allgather data over the communicator */
|
|
|
|
if (ORTE_SUCCESS != (ret = orte_grpcomm.allgather_list(&peers, sbuffer, rbuffer))) {
|
|
|
|
fprintf(stderr," orte_grpcomm.allgather_list returned error %d \n", ret);
|
|
|
|
fflush(stderr);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Is every socket being shared by the same number of procs?
|
|
|
|
* If not we should go over each proc index and find maximal socket */
|
|
|
|
|
|
|
|
/* TODO: Need to find maximal number of procs
|
|
|
|
* sharing the same socket on the node in case
|
|
|
|
* there is more than one socket on the node
|
|
|
|
* and that the sbgp type is according to node
|
|
|
|
* and not socket */
|
|
|
|
/*Unpack the data and find fellow socketeers*/
|
|
|
|
cnt = 0;
|
|
|
|
for (proc = 0; proc < n_local_peers; proc++) {
|
|
|
|
|
|
|
|
int rem_socket_index;
|
|
|
|
|
|
|
|
/*Unpack socket_index*/
|
|
|
|
ret = opal_dss.unpack(rbuffer, &rem_socket_index, &pcnt, OPAL_INT32);
|
|
|
|
if (ORTE_SUCCESS != ret) {
|
|
|
|
fprintf(stderr," Unpack returned error %d for rem_socket_index\n", ret);
|
|
|
|
fflush(stderr);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*Populate the list*/
|
|
|
|
/* if (rem_socket_index == my_socket_index) {
|
|
|
|
cnt++;
|
|
|
|
}*/
|
|
|
|
num_proc_per_socket[rem_socket_index]++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Find the maximal number of procs sharing the same socket */
|
|
|
|
for(i = 0; i < num_sockets; i++){
|
|
|
|
if(cnt < num_proc_per_socket[i]){
|
|
|
|
cnt = num_proc_per_socket[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*Free resources*/
|
|
|
|
OBJ_DESTRUCT(&peers);
|
|
|
|
OBJ_RELEASE(sbuffer);
|
|
|
|
OBJ_RELEASE(rbuffer);
|
|
|
|
free(num_proc_per_socket);
|
|
|
|
|
|
|
|
return cnt;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
/*
|
|
|
|
* Local functions
|
|
|
|
*/
|
|
|
|
static int basesmuma_module_enable(mca_bcol_base_module_t *module,
|
|
|
|
struct ompi_communicator_t *comm);
|
|
|
|
#endif
|
2012-08-16 23:11:35 +04:00
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
/*
|
|
|
|
* Local functions
|
|
|
|
*/
|
|
|
|
static int alloc_lmsg_reduce_offsets_array(mca_bcol_basesmuma_module_t *sm_module)
|
|
|
|
{
|
|
|
|
int rc = OMPI_SUCCESS, i = 0;
|
|
|
|
netpatterns_k_exchange_node_t *k_node = &sm_module->knomial_exchange_tree;
|
|
|
|
int n_exchanges = k_node->n_exchanges;
|
|
|
|
|
|
|
|
/* Precalculate the allreduce offsets */
|
|
|
|
if (0 < k_node->n_exchanges) {
|
|
|
|
sm_module->reduce_offsets = (int **)malloc(n_exchanges * sizeof(int*));
|
|
|
|
|
|
|
|
if (!sm_module->reduce_offsets) {
|
|
|
|
rc = OMPI_ERROR;
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i=0; i < n_exchanges ; i++) {
|
|
|
|
sm_module->reduce_offsets[i] = (int *)malloc (sizeof(int) * NOFFSETS);
|
|
|
|
|
|
|
|
if (!sm_module->reduce_offsets[i]){
|
|
|
|
rc = OMPI_ERROR;
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int free_lmsg_reduce_offsets_array(mca_bcol_basesmuma_module_t *sm_module)
|
|
|
|
{
|
|
|
|
int rc = OMPI_SUCCESS, i = 0;
|
|
|
|
netpatterns_k_exchange_node_t *k_node = &sm_module->knomial_exchange_tree;
|
|
|
|
int n_exchanges = k_node->n_exchanges;
|
2012-08-16 23:11:35 +04:00
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
if (sm_module->reduce_offsets) {
|
|
|
|
for (i=0; i < n_exchanges; i++) {
|
|
|
|
free (sm_module->reduce_offsets[i]);
|
|
|
|
}
|
2012-08-16 23:11:35 +04:00
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
free(sm_module->reduce_offsets);
|
|
|
|
}
|
|
|
|
return rc;
|
|
|
|
}
|
2012-08-16 23:11:35 +04:00
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
static void
|
2012-08-16 23:11:35 +04:00
|
|
|
mca_bcol_basesmuma_module_construct(mca_bcol_basesmuma_module_t *module)
|
|
|
|
{
|
|
|
|
module->super.bcol_component = (mca_bcol_base_component_t *) &mca_bcol_basesmuma_component;
|
|
|
|
module->super.list_n_connected = NULL;
|
|
|
|
module->super.hier_scather_offset = 0;
|
2014-01-22 19:39:19 +04:00
|
|
|
|
2012-08-16 23:11:35 +04:00
|
|
|
}
|
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
static void
|
2012-08-16 23:11:35 +04:00
|
|
|
mca_bcol_basesmuma_module_destruct(mca_bcol_basesmuma_module_t *sm_module)
|
|
|
|
{
|
|
|
|
/* local variables */
|
|
|
|
int i;
|
|
|
|
mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
|
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
/*
|
|
|
|
* release allocated resrouces
|
2012-08-16 23:11:35 +04:00
|
|
|
*/
|
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
/* ...but not until you're sure you have no outstanding collectives */
|
|
|
|
while(0 != opal_list_get_size(&(cs->nb_admin_barriers))) {
|
|
|
|
opal_progress();
|
|
|
|
}
|
2012-08-16 23:11:35 +04:00
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
#ifdef __PORTALS_AVAIL__
|
|
|
|
/* Remove portals bcast specific resources */
|
|
|
|
if ( PTL_OK != PtlEQFree(sm_module->sg_state.read_eq)) {
|
|
|
|
BASESMUMA_VERBOSE(10,("PtlEQFree() failed: )"));
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* Remove Lmsg Reduce Offsets Array */
|
|
|
|
free_lmsg_reduce_offsets_array(sm_module);
|
2012-08-16 23:11:35 +04:00
|
|
|
|
|
|
|
|
|
|
|
/* collective topology data */
|
|
|
|
if( sm_module->fanout_read_tree) {
|
|
|
|
for(i=0 ; i < sm_module->super.size_of_subgroup ; i++ ) {
|
|
|
|
if(0 < sm_module->fanout_read_tree[i].n_children ) {
|
|
|
|
free(sm_module->fanout_read_tree[i].children_ranks);
|
|
|
|
sm_module->fanout_read_tree[i].children_ranks=NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
free(sm_module->fanout_read_tree);
|
|
|
|
sm_module->fanout_read_tree=NULL;
|
|
|
|
}
|
2014-01-22 19:39:19 +04:00
|
|
|
|
|
|
|
/* gvm Leak FIX Reduction_tree[].children_ranks has
|
|
|
|
* to be removed. I don't how to get the size (which is
|
|
|
|
* size of subgroup) of array reduction_tree
|
|
|
|
*/
|
2012-08-16 23:11:35 +04:00
|
|
|
if( sm_module->reduction_tree) {
|
|
|
|
for(i=0 ; i < sm_module->super.size_of_subgroup ; i++ ) {
|
|
|
|
if(0 < sm_module->reduction_tree[i].n_children ) {
|
|
|
|
free(sm_module->reduction_tree[i].children_ranks);
|
|
|
|
sm_module->reduction_tree[i].children_ranks=NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
free(sm_module->reduction_tree);
|
|
|
|
sm_module->reduction_tree=NULL;
|
|
|
|
}
|
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
/* gvm Leak FIX */
|
|
|
|
if (sm_module->fanout_node.children_ranks){
|
|
|
|
free(sm_module->fanout_node.children_ranks);
|
|
|
|
sm_module->fanout_node.children_ranks = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sm_module->fanin_node.children_ranks){
|
|
|
|
free(sm_module->fanin_node.children_ranks);
|
|
|
|
sm_module->fanin_node.children_ranks = NULL;
|
|
|
|
}
|
2012-08-16 23:11:35 +04:00
|
|
|
|
|
|
|
/* colls_no_user_data resrouces */
|
|
|
|
if(sm_module->colls_no_user_data.ctl_buffs_mgmt){
|
|
|
|
free(sm_module->colls_no_user_data.ctl_buffs_mgmt);
|
|
|
|
sm_module->colls_no_user_data.ctl_buffs_mgmt=NULL;
|
|
|
|
}
|
|
|
|
if(sm_module->colls_no_user_data.ctl_buffs){
|
|
|
|
free(sm_module->colls_no_user_data.ctl_buffs);
|
|
|
|
sm_module->colls_no_user_data.ctl_buffs=NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* colls_with_user_data resrouces */
|
2014-01-22 19:39:19 +04:00
|
|
|
/*
|
|
|
|
*debug print */
|
|
|
|
/*
|
|
|
|
fprintf(stderr,"AAA colls_with_user_data.ctl_buffs %p \n",
|
|
|
|
sm_module->colls_with_user_data.ctl_buffs_mgmt);
|
|
|
|
fflush(stderr);
|
|
|
|
end debug */
|
|
|
|
|
2012-08-16 23:11:35 +04:00
|
|
|
if(sm_module->colls_with_user_data.ctl_buffs_mgmt){
|
|
|
|
free(sm_module->colls_with_user_data.ctl_buffs_mgmt);
|
|
|
|
sm_module->colls_with_user_data.ctl_buffs_mgmt=NULL;
|
|
|
|
}
|
|
|
|
if(sm_module->colls_with_user_data.ctl_buffs){
|
|
|
|
free(sm_module->colls_with_user_data.ctl_buffs);
|
|
|
|
sm_module->colls_with_user_data.ctl_buffs=NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(sm_module->shared_memory_scratch_space) {
|
|
|
|
free(sm_module->shared_memory_scratch_space);
|
|
|
|
sm_module->shared_memory_scratch_space=NULL;
|
|
|
|
}
|
2014-01-22 19:39:19 +04:00
|
|
|
|
|
|
|
#if 1
|
2012-08-16 23:11:35 +04:00
|
|
|
if(sm_module->scatter_kary_tree) {
|
|
|
|
for(i=0 ; i < sm_module->super.size_of_subgroup ; i++ ) {
|
|
|
|
if(0 < sm_module->scatter_kary_tree[i].n_children) {
|
|
|
|
free(sm_module->scatter_kary_tree[i].children_ranks);
|
|
|
|
sm_module->scatter_kary_tree[i].children_ranks=NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
free(sm_module->scatter_kary_tree);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if(NULL != sm_module->super.list_n_connected ){
|
|
|
|
free(sm_module->super.list_n_connected);
|
|
|
|
sm_module->super.list_n_connected = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* free the k-nomial allgather tree here */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* done */
|
|
|
|
}
|
|
|
|
|
|
|
|
static void bcol_basesmuma_set_small_msg_thresholds(struct mca_bcol_base_module_t *super)
|
|
|
|
{
|
2014-01-22 19:39:19 +04:00
|
|
|
mca_bcol_basesmuma_module_t *basesmuma_module =
|
|
|
|
(mca_bcol_basesmuma_module_t *) super;
|
2012-08-16 23:11:35 +04:00
|
|
|
|
|
|
|
size_t basesmuma_offset = bcol_basesmuma_data_offset_calc(basesmuma_module);
|
|
|
|
|
|
|
|
/* Set the Allreduce threshold, for Basesmuma it equals to ML buffer size - data offset */
|
|
|
|
super->small_message_thresholds[BCOL_ALLREDUCE] =
|
2014-01-22 19:39:19 +04:00
|
|
|
basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset;
|
2012-08-16 23:11:35 +04:00
|
|
|
|
|
|
|
/* Set the Bcast threshold, for Basesmuma it equals to ML buffer size - data offset */
|
|
|
|
super->small_message_thresholds[BCOL_BCAST] =
|
2014-01-22 19:39:19 +04:00
|
|
|
basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset;
|
2012-08-16 23:11:35 +04:00
|
|
|
|
|
|
|
/* Set the Gather threshold, for Basesmuma it equals to ML buffer size - data offset */
|
|
|
|
super->small_message_thresholds[BCOL_GATHER] =
|
2014-01-22 19:39:19 +04:00
|
|
|
(basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset) /
|
|
|
|
ompi_comm_size(basesmuma_module->super.sbgp_partner_module->group_comm);
|
2012-08-16 23:11:35 +04:00
|
|
|
|
|
|
|
/* Set the ALLgather threshold, for Basesmuma it equals to ML buffer size - data offset */
|
|
|
|
super->small_message_thresholds[BCOL_ALLGATHER] =
|
2014-01-22 19:39:19 +04:00
|
|
|
(basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset) /
|
|
|
|
ompi_comm_size(basesmuma_module->super.sbgp_partner_module->group_comm);
|
2012-08-16 23:11:35 +04:00
|
|
|
|
|
|
|
/* Set the Reduce threshold, for Basesmuma it equals to ML buffer size - data offset */
|
|
|
|
super->small_message_thresholds[BCOL_REDUCE] =
|
2014-01-22 19:39:19 +04:00
|
|
|
basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset;
|
2012-08-16 23:11:35 +04:00
|
|
|
|
|
|
|
/* Set the Scatter threshold, for Basesmuma it equals to ML buffer size - data offset */
|
|
|
|
super->small_message_thresholds[BCOL_SCATTER] =
|
2014-01-22 19:39:19 +04:00
|
|
|
basesmuma_module->ml_mem.ml_mem_desc->size_buffer - basesmuma_offset;
|
2012-08-16 23:11:35 +04:00
|
|
|
}
|
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
/* setup memory management and collective routines */
|
|
|
|
|
|
|
|
static void load_func(mca_bcol_base_module_t *super)
|
|
|
|
{
|
|
|
|
int fnc;
|
|
|
|
|
|
|
|
/* Loading memory management and collective functions */
|
|
|
|
|
|
|
|
for (fnc = 0; fnc < BCOL_NUM_OF_FUNCTIONS; fnc++) {
|
|
|
|
super->bcol_function_table[fnc] = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*super->bcol_function_table[BCOL_BARRIER] = bcol_basesmuma_recursive_double_barrier;*/
|
|
|
|
|
|
|
|
#ifdef __PORTALS_AVAIL__
|
|
|
|
super->bcol_function_table[BCOL_BCAST] = bcol_basesmuma_lmsg_scatter_allgather_portals_bcast;
|
|
|
|
/* super->bcol_function_table[BCOL_BCAST] =
|
|
|
|
bcol_basesmuma_lmsg_bcast_k_nomial_anyroot; */
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*super->bcol_function_table[BCOL_BCAST] = bcol_basesmuma_bcast;*/
|
|
|
|
/*super->bcol_function_table[BCOL_BCAST] = bcol_basesmuma_binary_scatter_allgather_segment;*/
|
|
|
|
/*super->bcol_function_table[BCOL_BCAST] = bcol_basesmuma_bcast_k_nomial_anyroot;*/
|
|
|
|
super->bcol_function_table[BCOL_BCAST] = bcol_basesmuma_bcast;
|
|
|
|
#ifdef __PORTALS_AVAIL__
|
|
|
|
super->bcol_function_table[BCOL_BCAST] =
|
|
|
|
bcol_basesmuma_lmsg_scatter_allgather_portals_bcast;
|
|
|
|
#endif
|
|
|
|
/* super->bcol_function_table[BCOL_ALLREDUCE] = bcol_basesmuma_allreduce_intra_fanin_fanout; */
|
|
|
|
super->bcol_function_table[BCOL_ALLREDUCE] = bcol_basesmuma_allreduce_intra_recursive_doubling;
|
|
|
|
super->bcol_function_table[BCOL_REDUCE] = bcol_basesmuma_reduce_intra_fanin_old;
|
|
|
|
/* memory management */
|
|
|
|
super->bcol_memory_init = bcol_basesmuma_bank_init_opti;
|
|
|
|
|
|
|
|
super->k_nomial_tree = bcol_basesmuma_setup_knomial_tree;
|
|
|
|
|
|
|
|
/* Set thresholds */
|
|
|
|
super->set_small_msg_thresholds = bcol_basesmuma_set_small_msg_thresholds;
|
|
|
|
}
|
2012-08-16 23:11:35 +04:00
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
static void load_func_with_choices(mca_bcol_base_module_t *super)
|
2012-08-16 23:11:35 +04:00
|
|
|
{
|
|
|
|
int fnc;
|
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
/* Loading memory management and collective functions */
|
2012-08-16 23:11:35 +04:00
|
|
|
|
|
|
|
for (fnc=0; fnc < BCOL_NUM_OF_FUNCTIONS; fnc++) {
|
|
|
|
super->bcol_function_init_table[fnc] = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
super->bcol_function_init_table[BCOL_FANIN] = bcol_basesmuma_fanin_init;
|
|
|
|
super->bcol_function_init_table[BCOL_FANOUT] = bcol_basesmuma_fanout_init;
|
|
|
|
super->bcol_function_init_table[BCOL_BARRIER] = bcol_basesmuma_barrier_init;
|
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
super->bcol_function_init_table[BCOL_BCAST] = bcol_basesmuma_bcast_init;
|
|
|
|
super->bcol_function_init_table[BCOL_ALLREDUCE] = bcol_basesmuma_allreduce_init;
|
|
|
|
super->bcol_function_init_table[BCOL_REDUCE] = bcol_basesmuma_reduce_init;
|
|
|
|
super->bcol_function_init_table[BCOL_GATHER] = bcol_basesmuma_gather_init;
|
|
|
|
super->bcol_function_init_table[BCOL_ALLGATHER] = bcol_basesmuma_allgather_init;
|
|
|
|
super->bcol_function_init_table[BCOL_SYNC] = bcol_basesmuma_memsync_init;
|
2012-08-16 23:11:35 +04:00
|
|
|
/* memory management */
|
|
|
|
super->bcol_memory_init = bcol_basesmuma_bank_init_opti;
|
2014-01-22 19:39:19 +04:00
|
|
|
|
2012-08-16 23:11:35 +04:00
|
|
|
super->k_nomial_tree = bcol_basesmuma_setup_knomial_tree;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
static int load_recursive_knomial_info(mca_bcol_basesmuma_module_t
|
2014-01-22 19:39:19 +04:00
|
|
|
*sm_module)
|
2012-08-16 23:11:35 +04:00
|
|
|
{
|
2014-01-22 19:39:19 +04:00
|
|
|
int rc = OMPI_SUCCESS;
|
|
|
|
rc = netpatterns_setup_recursive_knomial_tree_node(sm_module->super.sbgp_partner_module->group_size,
|
|
|
|
sm_module->super.sbgp_partner_module->my_index,
|
|
|
|
mca_bcol_basesmuma_component.k_nomial_radix,
|
|
|
|
&sm_module->knomial_exchange_tree);
|
|
|
|
return rc;
|
2012-08-16 23:11:35 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int bcol_basesmuma_setup_knomial_tree(mca_bcol_base_module_t *super)
|
|
|
|
{
|
|
|
|
mca_bcol_basesmuma_module_t *sm_module = (mca_bcol_basesmuma_module_t *) super;
|
2014-01-22 19:39:19 +04:00
|
|
|
|
|
|
|
return netpatterns_setup_recursive_knomial_allgather_tree_node(sm_module->super.sbgp_partner_module->group_size,
|
|
|
|
sm_module->super.sbgp_partner_module->my_index,
|
|
|
|
mca_bcol_basesmuma_component.k_nomial_radix,
|
|
|
|
super->list_n_connected,
|
|
|
|
&sm_module->knomial_allgather_tree);
|
2012-08-16 23:11:35 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* query to see if the module is available for use on the given
|
|
|
|
* communicator, and if so, what it's priority is. This is where
|
|
|
|
* the backing shared-memory file is created.
|
|
|
|
*/
|
|
|
|
mca_bcol_base_module_t **
|
|
|
|
mca_bcol_basesmuma_comm_query(mca_sbgp_base_module_t *module, int *num_modules)
|
|
|
|
{
|
|
|
|
/* local variables */
|
|
|
|
mca_bcol_base_module_t **sm_modules = NULL;
|
|
|
|
mca_bcol_basesmuma_module_t *sm_module;
|
|
|
|
bcol_basesmuma_registration_data_t *sm_reg_data;
|
|
|
|
int ret, my_rank, name_length;
|
|
|
|
char *name;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
int bcast_radix;
|
|
|
|
|
|
|
|
mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
|
|
|
|
/*mca_base_component_list_item_t *hdl_cli = NULL;*/
|
|
|
|
/*int hdl_num;*/
|
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
/* at this point I think there is only a sinle shared
|
2012-08-16 23:11:35 +04:00
|
|
|
memory bcol that we need to be concerned with */
|
|
|
|
|
|
|
|
/* No group, no modules */
|
|
|
|
if (OPAL_UNLIKELY(NULL == module)) {
|
2014-01-22 19:39:19 +04:00
|
|
|
return NULL;
|
2012-08-16 23:11:35 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* allocate and initialize an sm_bcol module */
|
|
|
|
sm_module = OBJ_NEW(mca_bcol_basesmuma_module_t);
|
|
|
|
|
|
|
|
/* set the subgroup */
|
|
|
|
sm_module->super.sbgp_partner_module=module;
|
|
|
|
|
|
|
|
(*num_modules)=1;
|
|
|
|
cs->super.n_net_contexts = *num_modules;
|
|
|
|
sm_modules = (mca_bcol_base_module_t **) malloc((cs->super.n_net_contexts)*
|
2014-01-22 19:39:19 +04:00
|
|
|
sizeof(mca_bcol_base_module_t *));
|
|
|
|
|
2012-08-16 23:11:35 +04:00
|
|
|
if( !sm_modules ) {
|
2014-01-22 19:39:19 +04:00
|
|
|
fprintf(stderr,"In base_bcol_masesmuma_setup_library_buffers failed to allocate memory for sm_modules\n");
|
|
|
|
fflush(stderr);
|
|
|
|
return NULL;
|
2012-08-16 23:11:35 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
sm_module->reduction_tree = NULL;
|
|
|
|
sm_module->fanout_read_tree = NULL;
|
|
|
|
|
2013-02-06 01:52:55 +04:00
|
|
|
ret=netpatterns_setup_recursive_doubling_tree_node(
|
2014-01-22 19:39:19 +04:00
|
|
|
module->group_size,module->my_index,
|
|
|
|
&(sm_module->recursive_doubling_tree));
|
2012-08-16 23:11:35 +04:00
|
|
|
if(OMPI_SUCCESS != ret) {
|
2014-01-22 19:39:19 +04:00
|
|
|
fprintf(stderr,"Error setting up recursive_doubling_tree \n");
|
|
|
|
fflush(stderr);
|
|
|
|
return NULL;
|
2012-08-16 23:11:35 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* setup the fanin tree - this is used only as part of a hierarchical
|
|
|
|
* barrier, so will set this up with rank 0 as the root */
|
|
|
|
my_rank=module->my_index;
|
2013-02-06 01:52:55 +04:00
|
|
|
ret=netpatterns_setup_narray_tree(cs->radix_fanin,
|
2014-01-22 19:39:19 +04:00
|
|
|
my_rank,module->group_size,&(sm_module->fanin_node));
|
2012-08-16 23:11:35 +04:00
|
|
|
if(OMPI_SUCCESS != ret) {
|
2014-01-22 19:39:19 +04:00
|
|
|
fprintf(stderr,"Error setting up fanin tree \n");
|
|
|
|
fflush(stderr);
|
|
|
|
return NULL;
|
2012-08-16 23:11:35 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* setup the fanout tree - this is used only as part of a hierarchical
|
|
|
|
* barrier, so will set this up with rank 0 as the root */
|
2013-02-06 01:52:55 +04:00
|
|
|
ret=netpatterns_setup_narray_tree(cs->radix_fanout,
|
2014-01-22 19:39:19 +04:00
|
|
|
my_rank,module->group_size,&(sm_module->fanout_node));
|
2012-08-16 23:11:35 +04:00
|
|
|
if(OMPI_SUCCESS != ret) {
|
2014-01-22 19:39:19 +04:00
|
|
|
fprintf(stderr,"Error setting up fanout tree \n");
|
|
|
|
fflush(stderr);
|
|
|
|
return NULL;
|
2012-08-16 23:11:35 +04:00
|
|
|
}
|
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
/*
|
2012-08-16 23:11:35 +04:00
|
|
|
* Setup the broadcast tree - this is used only as part of a hierarchical
|
|
|
|
* bcast, so will set this up with rank 0 as the root.
|
|
|
|
*/
|
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
/* set the radix of the bcast tree */
|
|
|
|
bcast_radix = cs->radix_read_tree;
|
|
|
|
|
|
|
|
/* initialize fan-out read tree */
|
|
|
|
sm_module->fanout_read_tree=(netpatterns_tree_node_t*) malloc(
|
|
|
|
sizeof(netpatterns_tree_node_t)*module->group_size);
|
|
|
|
if( NULL == sm_module->fanout_read_tree ) {
|
|
|
|
goto Error;
|
|
|
|
}
|
|
|
|
|
|
|
|
for(i = 0; i < module->group_size; i++){
|
|
|
|
ret = netpatterns_setup_narray_tree(bcast_radix,
|
|
|
|
i, module->group_size, &(sm_module->fanout_read_tree[i]));
|
|
|
|
if(OMPI_SUCCESS != ret) {
|
|
|
|
goto Error;
|
|
|
|
}
|
|
|
|
}
|
2012-08-16 23:11:35 +04:00
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
ret = load_recursive_knomial_info(sm_module);
|
2012-08-16 23:11:35 +04:00
|
|
|
if (OMPI_SUCCESS != ret) {
|
2014-01-22 19:39:19 +04:00
|
|
|
BASESMUMA_VERBOSE(10, ("Failed to load recursive knomial tree"));
|
2012-08-16 23:11:35 +04:00
|
|
|
goto Error;
|
2014-01-22 19:39:19 +04:00
|
|
|
}
|
2012-08-16 23:11:35 +04:00
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
/* Allocate offsets array for lmsg reduce */
|
|
|
|
ret = alloc_lmsg_reduce_offsets_array(sm_module);
|
|
|
|
if (OMPI_SUCCESS != ret) {
|
|
|
|
BASESMUMA_VERBOSE(10, ("Failed to allocate reduce offsets array"));
|
2012-08-16 23:11:35 +04:00
|
|
|
goto Error;
|
2014-01-22 19:39:19 +04:00
|
|
|
}
|
2012-08-16 23:11:35 +04:00
|
|
|
|
|
|
|
/* initialize reduction tree */
|
2013-02-06 01:52:55 +04:00
|
|
|
sm_module->reduction_tree=(netpatterns_tree_node_t *) malloc(
|
2014-01-22 19:39:19 +04:00
|
|
|
sizeof(netpatterns_tree_node_t )*module->group_size);
|
|
|
|
if( NULL == sm_module->reduction_tree ) {
|
2012-08-16 23:11:35 +04:00
|
|
|
goto Error;
|
|
|
|
}
|
2014-01-22 19:39:19 +04:00
|
|
|
|
2013-02-06 01:52:55 +04:00
|
|
|
ret=netpatterns_setup_multinomial_tree(
|
2014-01-22 19:39:19 +04:00
|
|
|
cs->order_reduction_tree,module->group_size,
|
|
|
|
sm_module->reduction_tree);
|
2012-08-16 23:11:35 +04:00
|
|
|
if( MPI_SUCCESS != ret ) {
|
|
|
|
goto Error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* get largest power of k for given group size */
|
|
|
|
sm_module->pow_k_levels = pow_sm_k(cs->k_nomial_radix,
|
2014-01-22 19:39:19 +04:00
|
|
|
sm_module->super.sbgp_partner_module->group_size,
|
|
|
|
&(sm_module->pow_k));
|
2012-08-16 23:11:35 +04:00
|
|
|
|
|
|
|
/* get largest power of 2 for a given group size
|
2014-01-22 19:39:19 +04:00
|
|
|
* used in scatter allgather
|
2012-08-16 23:11:35 +04:00
|
|
|
*/
|
|
|
|
sm_module->pow_2_levels = pow_sm_k(2,
|
2014-01-22 19:39:19 +04:00
|
|
|
sm_module->super.sbgp_partner_module->group_size,
|
|
|
|
&(sm_module->pow_2));
|
2012-08-16 23:11:35 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* setup scatter data
|
|
|
|
*/
|
|
|
|
sm_module->scatter_kary_radix=cs->scatter_kary_radix;
|
|
|
|
sm_module->scatter_kary_tree=NULL;
|
2013-02-06 01:52:55 +04:00
|
|
|
ret=netpatterns_setup_narray_tree_contigous_ranks(
|
2014-01-22 19:39:19 +04:00
|
|
|
sm_module->scatter_kary_radix,
|
|
|
|
sm_module->super.sbgp_partner_module->group_size,
|
|
|
|
&(sm_module->scatter_kary_tree));
|
2012-08-16 23:11:35 +04:00
|
|
|
if(OMPI_SUCCESS != ret) {
|
2014-01-22 19:39:19 +04:00
|
|
|
fprintf(stderr,"In base_bcol_masesmuma_setup_library_buffers and scatter k-ary tree setup failed \n");
|
|
|
|
fflush(stderr);
|
|
|
|
return NULL;
|
2012-08-16 23:11:35 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* setup the module shared memory management */
|
|
|
|
ret=base_bcol_basesmuma_setup_library_buffers(sm_module, cs);
|
2014-01-22 19:39:19 +04:00
|
|
|
|
2012-08-16 23:11:35 +04:00
|
|
|
if(OMPI_SUCCESS != ret) {
|
2014-01-22 19:39:19 +04:00
|
|
|
fprintf(stderr,"In base_bcol_masesmuma_setup_library_buffers and mpool was not successfully setup!\n");
|
|
|
|
fflush(stderr);
|
|
|
|
return NULL;
|
2012-08-16 23:11:35 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* setup the collectives and memory management */
|
|
|
|
|
|
|
|
/* check to see whether or not the mpool has been inited */
|
|
|
|
/* allocate some space for the network contexts */
|
|
|
|
if(!cs->mpool_inited) {
|
2014-01-22 19:39:19 +04:00
|
|
|
/* if it's empty, then fill it for first time */
|
|
|
|
cs->super.network_contexts = (bcol_base_network_context_t **)
|
|
|
|
malloc((cs->super.n_net_contexts)*
|
|
|
|
sizeof(bcol_base_network_context_t *));
|
|
|
|
/* you need to do some basic setup - define the file name,
|
|
|
|
* set data seg alignment and size of cntl structure in sm
|
|
|
|
* file.
|
|
|
|
*/
|
|
|
|
/* give the payload sm file a name */
|
|
|
|
name_length=asprintf(&name,
|
|
|
|
"%s"OPAL_PATH_SEP"0%s%0d",
|
|
|
|
ompi_process_info.job_session_dir,
|
|
|
|
cs->payload_base_fname,
|
|
|
|
(int)getpid());
|
|
|
|
if( 0 > name_length ) {
|
|
|
|
fprintf(stderr,"Failed to assign the shared memory payload file a name\n");
|
|
|
|
fflush(stderr);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
/* make sure name is not too long */
|
|
|
|
if ( OPAL_PATH_MAX < (name_length-1) ) {
|
|
|
|
fprintf(stderr,"Shared memory file name is too long!\n");
|
|
|
|
fflush(stderr);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
/* set the name and alignment characteristics */
|
|
|
|
sm_reg_data = (bcol_basesmuma_registration_data_t *) malloc(
|
|
|
|
sizeof(bcol_basesmuma_registration_data_t));
|
|
|
|
sm_reg_data->file_name = name;
|
|
|
|
|
|
|
|
sm_reg_data->data_seg_alignment = getpagesize();
|
|
|
|
sm_reg_data->size_ctl_structure = 0;
|
|
|
|
cs->super.network_contexts[0] = (bcol_base_network_context_t *)
|
|
|
|
malloc(sizeof(bcol_base_network_context_t));
|
|
|
|
cs->super.network_contexts[0]->context_data =
|
|
|
|
(void *) sm_reg_data;
|
|
|
|
cs->super.network_contexts[0]->
|
|
|
|
register_memory_fn = mca_bcol_basesmuma_register_sm;
|
|
|
|
cs->super.network_contexts[0]->
|
|
|
|
deregister_memory_fn = mca_bcol_basesmuma_deregister_sm;
|
|
|
|
sm_module->super.network_context = cs->super.network_contexts[0];
|
2012-08-16 23:11:35 +04:00
|
|
|
} else {
|
2014-01-22 19:39:19 +04:00
|
|
|
|
|
|
|
sm_module->super.network_context = cs->super.network_contexts[0];
|
2012-08-16 23:11:35 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Set the header size */
|
|
|
|
sm_module->super.header_size = sizeof(mca_bcol_basesmuma_header_t);
|
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
/*initialize the hdl module if it's to be enabled*/
|
|
|
|
#if 0
|
|
|
|
if (module->use_hdl) {
|
|
|
|
sm_module->super.use_hdl = module->use_hdl;
|
|
|
|
hdl_cli = (mca_base_component_list_item_t *)
|
|
|
|
opal_list_get_first(&mca_hdl_base_components_in_use);
|
|
|
|
sm_module->hdl_module = ((mca_hdl_base_component_t*)
|
|
|
|
hdl_cli->cli_component)->hdl_comm_query(sm_module, &hdl_num);
|
|
|
|
if (1 != hdl_num || sm_module->hdl_module == NULL) {
|
|
|
|
ML_ERROR(("hdl modules are not successfully initialized!\n"));
|
|
|
|
goto Error;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
sm_module->hdl_module = NULL;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
sm_module->hdl_module = NULL;
|
|
|
|
#endif
|
|
|
|
|
2012-08-16 23:11:35 +04:00
|
|
|
|
|
|
|
/* collective setup */
|
2014-01-22 19:39:19 +04:00
|
|
|
load_func(&(sm_module->super));
|
2012-08-16 23:11:35 +04:00
|
|
|
load_func_with_choices(&(sm_module->super));
|
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
/*
|
|
|
|
* This initializes all collective algorithms
|
|
|
|
*/
|
|
|
|
|
|
|
|
ret = mca_bcol_base_bcol_fns_table_init(&(sm_module->super));
|
|
|
|
|
|
|
|
if (OMPI_SUCCESS != ret) {
|
|
|
|
|
|
|
|
goto Error;
|
|
|
|
}
|
2012-08-16 23:11:35 +04:00
|
|
|
|
|
|
|
sm_module->super.supported_mode = 0;
|
|
|
|
|
2013-08-27 20:36:54 +04:00
|
|
|
/* NTH: this is not set anywhere on the trunk as of 08/13/13 */
|
|
|
|
#if 0
|
2012-08-16 23:11:35 +04:00
|
|
|
if (module->use_hdl) {
|
|
|
|
sm_module->super.supported_mode = MCA_BCOL_BASE_ZERO_COPY;
|
|
|
|
}
|
2013-08-27 20:36:54 +04:00
|
|
|
#endif
|
2012-08-16 23:11:35 +04:00
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
/* Initializes portals library required for basesmuma large message */
|
2012-08-16 23:11:35 +04:00
|
|
|
#ifdef __PORTALS_AVAIL__
|
|
|
|
/* Enable zero copy mode */
|
|
|
|
sm_module->super.supported_mode = MCA_BCOL_BASE_ZERO_COPY;
|
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
ret = mca_bcol_basesmuma_portals_init(cs);
|
|
|
|
if (OMPI_SUCCESS != ret) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
sm_module->sg_state.phase = INIT;
|
2012-08-16 23:11:35 +04:00
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
ret = PtlEQAlloc(((mca_bcol_basesmuma_portal_proc_info_t*)
|
|
|
|
cs->portals_info)->ni_h, MAX_PORTAL_EVENTS_IN_Q,
|
|
|
|
PTL_EQ_HANDLER_NONE, &sm_module->sg_state.read_eq);
|
2012-08-16 23:11:35 +04:00
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
if (ret != PTL_OK) {
|
|
|
|
BASESMUMA_VERBOSE(10,( "PtlEQAlloc() failed: %d",ret));
|
|
|
|
return NULL;
|
2012-08-16 23:11:35 +04:00
|
|
|
}
|
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
#endif
|
2012-08-16 23:11:35 +04:00
|
|
|
/* blocking recursive double barrier test */
|
|
|
|
/*
|
2014-01-22 19:39:19 +04:00
|
|
|
{
|
|
|
|
fprintf(stderr,"BBB About to hit the barrier test\n");
|
|
|
|
fflush(stderr);
|
|
|
|
int rc;
|
|
|
|
bcol_function_args_t bogus;
|
|
|
|
rc = bcol_basesmuma_rd_barrier_init(&(sm_module->super));
|
|
|
|
rc = bcol_basesmuma_recursive_double_barrier(
|
|
|
|
&bogus, &(sm_module->super));
|
|
|
|
}
|
2012-08-16 23:11:35 +04:00
|
|
|
*/
|
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
/* in this case we only expect a single network context.
|
2012-08-16 23:11:35 +04:00
|
|
|
in the future we should loop around this */
|
|
|
|
sm_modules[0] = &(sm_module->super);
|
|
|
|
|
|
|
|
#if 0
|
2014-01-22 19:39:19 +04:00
|
|
|
/* debug */
|
|
|
|
/* test resource recycling */
|
|
|
|
test_sm_module=sm_module;
|
|
|
|
/* debug */
|
|
|
|
fprintf(stderr," ZZZZ sn %lld \n",sm_module->squence_number_offset);
|
|
|
|
fflush(stderr);
|
|
|
|
/* end debug */
|
|
|
|
test_resrouce_recycle();
|
|
|
|
|
|
|
|
/* end debug */
|
2012-08-16 23:11:35 +04:00
|
|
|
#endif
|
|
|
|
|
|
|
|
return sm_modules;
|
|
|
|
|
2014-01-22 19:39:19 +04:00
|
|
|
Error:
|
2012-08-16 23:11:35 +04:00
|
|
|
|
|
|
|
/* cleanup */
|
|
|
|
if( sm_module->reduction_tree ) {
|
|
|
|
free(sm_module->reduction_tree);
|
|
|
|
sm_module->reduction_tree=NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
/*
|
|
|
|
* Init module on the communicator
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
basesmuma_module_enable(mca_bcol_base_module_t *module,
|
2014-01-22 19:39:19 +04:00
|
|
|
struct ompi_communicator_t *comm)
|
2012-08-16 23:11:35 +04:00
|
|
|
{
|
|
|
|
/* local variables */
|
|
|
|
char output_buffer[2*MPI_MAX_OBJECT_NAME];
|
|
|
|
|
|
|
|
memset(&output_buffer[0],0,sizeof(output_buffer));
|
|
|
|
snprintf(output_buffer,sizeof(output_buffer),"%s (cid %d)", comm->c_name,
|
2014-01-22 19:39:19 +04:00
|
|
|
comm->c_contextid);
|
2013-03-28 01:17:31 +04:00
|
|
|
opal_output_verbose(10, ompi_bcol_base_framework.framework_output,
|
2014-01-22 19:39:19 +04:00
|
|
|
"bcol:basesmuma:enable: new communicator: %s", output_buffer);
|
2012-08-16 23:11:35 +04:00
|
|
|
|
|
|
|
/* All done */
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
OBJ_CLASS_INSTANCE(mca_bcol_basesmuma_module_t,
|
2014-01-22 19:39:19 +04:00
|
|
|
mca_bcol_base_module_t,
|
|
|
|
mca_bcol_basesmuma_module_construct,
|
|
|
|
mca_bcol_basesmuma_module_destruct);
|