2008-02-15 00:57:44 +00:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
/**
|
|
|
|
* @file
|
|
|
|
*
|
|
|
|
* Most of the description of the data layout is in the
|
|
|
|
* coll_sm_module.c file.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "ompi_config.h"
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#include <fcntl.h>
|
2008-08-12 01:34:13 +00:00
|
|
|
#include <errno.h>
|
2008-02-15 00:57:44 +00:00
|
|
|
|
|
|
|
#include "ompi/constants.h"
|
|
|
|
#include "ompi/communicator/communicator.h"
|
|
|
|
#include "ompi/mca/coll/coll.h"
|
2008-05-22 20:53:35 +00:00
|
|
|
#include "opal/util/show_help.h"
|
2008-02-15 00:57:44 +00:00
|
|
|
#include "coll_sm2.h"
|
|
|
|
#include "ompi/mca/coll/base/base.h"
|
2008-03-25 13:30:48 +00:00
|
|
|
#include "ompi/mca/dpm/dpm.h"
|
2008-02-15 00:57:44 +00:00
|
|
|
#include "orte/mca/rml/rml.h"
|
2008-02-29 22:28:57 +00:00
|
|
|
#include "orte/util/proc_info.h"
|
2008-02-15 00:57:44 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Local functions
|
|
|
|
*/
|
2008-07-28 22:40:57 +00:00
|
|
|
static int sm2_module_enable(mca_coll_base_module_t *module,
|
2008-02-15 00:57:44 +00:00
|
|
|
struct ompi_communicator_t *comm);
|
|
|
|
|
2008-04-19 01:34:04 +00:00
|
|
|
#if 0
|
2008-04-07 21:03:23 +00:00
|
|
|
/* debug */
|
|
|
|
extern int debug_print;
|
|
|
|
extern int my_debug_rank;
|
|
|
|
extern int my_debug_comm_size;
|
|
|
|
extern void debug_module(void);
|
2008-04-09 13:32:01 +00:00
|
|
|
extern int last_root;
|
2008-04-09 15:10:58 +00:00
|
|
|
extern int node_type;
|
2008-04-09 16:55:16 +00:00
|
|
|
long long free_buff_free_index=-1;
|
2008-04-07 21:03:23 +00:00
|
|
|
static mca_coll_sm2_module_t *module_dbg;
|
2008-04-08 20:38:20 +00:00
|
|
|
static int blocking_cnt=0;
|
2008-04-07 21:03:23 +00:00
|
|
|
void debug_module(void) {
|
|
|
|
int i,j,k;
|
|
|
|
char *ptr;
|
2008-04-10 17:20:52 +00:00
|
|
|
int barrier_index,index;
|
|
|
|
long long br_tag;
|
|
|
|
|
2008-04-07 21:03:23 +00:00
|
|
|
mca_coll_sm2_nb_request_process_shared_mem_t * ctl_ptr;
|
|
|
|
/* control regions */
|
|
|
|
if ( 0 == my_debug_rank ) {
|
|
|
|
for( i=0 ; i < 2 ; i++ ) {
|
|
|
|
for( j=0 ; j < 2 ; j++ ) {
|
|
|
|
fprintf(stderr," bank %d index %d \n", i,j);
|
|
|
|
for( k=0 ; k < my_debug_comm_size ; k++ ) {
|
|
|
|
ctl_ptr=module_dbg->barrier_request[i].barrier_base_address[j];
|
|
|
|
ctl_ptr=(mca_coll_sm2_nb_request_process_shared_mem_t *) (
|
|
|
|
(char *)ctl_ptr+k*module_dbg->sm2_size_management_region_per_proc
|
|
|
|
);
|
|
|
|
fprintf(stderr," bank %d index %d flag %lld \n",
|
|
|
|
i,j,ctl_ptr->flag);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* data regions */
|
2008-04-10 17:20:52 +00:00
|
|
|
|
2008-04-09 15:10:58 +00:00
|
|
|
fprintf(stderr," my_debug_rank %d current index %d freed index %d coll_tag %lld debug stat %d blocking_cnt %d last_root %d free_buff_free_index %lld node_type %d \n",
|
2008-04-07 21:03:23 +00:00
|
|
|
my_debug_rank,
|
2008-04-08 03:04:20 +00:00
|
|
|
module_dbg->sm2_allocated_buffer_index,module_dbg->sm2_freed_buffer_index,
|
2008-04-08 18:44:50 +00:00
|
|
|
module_dbg->collective_tag,
|
2008-04-09 15:10:58 +00:00
|
|
|
module_dbg->blocked_on_barrier,blocking_cnt,last_root,
|
|
|
|
free_buff_free_index,node_type);
|
2008-04-10 17:20:52 +00:00
|
|
|
|
|
|
|
barrier_index=(module_dbg->num_nb_barriers_completed%
|
|
|
|
module_dbg->sm2_module_num_memory_banks);
|
|
|
|
index=module_dbg->barrier_request[barrier_index].sm_index;
|
|
|
|
fprintf(stderr," my_debug_rank %d started %lld completed %lld bank %d index %d br_tag %lld \n",
|
|
|
|
my_debug_rank,
|
|
|
|
module_dbg->num_nb_barriers_started,
|
|
|
|
module_dbg->num_nb_barriers_completed,
|
|
|
|
barrier_index,index,
|
|
|
|
module_dbg->barrier_request[barrier_index].tag);
|
2008-04-08 20:38:20 +00:00
|
|
|
fprintf(stderr," my_debug_rank %d barrier_bank_cntr %lld ",
|
|
|
|
my_debug_rank,module_dbg->barrier_bank_cntr);
|
|
|
|
for( i=0 ; i < BARRIER_BANK_LIST_SIZE ; i++ )
|
|
|
|
fprintf(stderr,"%2d",module_dbg->barrier_bank_list[i]);
|
|
|
|
fprintf(stderr," \n");
|
2008-04-07 21:03:23 +00:00
|
|
|
if( 0 == my_debug_rank ) {
|
|
|
|
for( i=0 ; i < module_dbg->sm2_module_num_buffers ; i++ ) {
|
|
|
|
for( j=0 ; j < my_debug_comm_size ; j++ ) {
|
2008-04-08 23:34:06 +00:00
|
|
|
fprintf(stderr," buffer index %d tag %lld ptr %p \n",
|
2008-04-07 21:03:23 +00:00
|
|
|
i,
|
2008-04-08 23:34:06 +00:00
|
|
|
module_dbg->sm_buffer_descriptor[i].proc_memory[j].control_region->flag,
|
|
|
|
module_dbg->sm_buffer_descriptor[i].proc_memory[j].control_region);
|
2008-04-07 21:03:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fflush(stderr);
|
2008-04-08 18:44:50 +00:00
|
|
|
return;
|
2008-04-07 21:03:23 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
/* end debug */
|
2008-04-19 01:34:04 +00:00
|
|
|
#endif
|
2008-04-07 21:03:23 +00:00
|
|
|
|
2008-02-15 00:57:44 +00:00
|
|
|
/*
|
|
|
|
* Local functions
|
|
|
|
*/
|
2008-02-19 20:01:42 +00:00
|
|
|
static void
|
|
|
|
mca_coll_sm2_module_construct(mca_coll_sm2_module_t *module)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
mca_coll_sm2_module_destruct(mca_coll_sm2_module_t *module)
|
|
|
|
{
|
2008-02-24 03:25:40 +00:00
|
|
|
int i,ret;
|
2008-02-19 20:01:42 +00:00
|
|
|
/* free the mmaped shared file */
|
|
|
|
if( module->shared_memory_region) {
|
|
|
|
ret=munmap(module->shared_memory_region,
|
|
|
|
module->size_sm2_backing_file);
|
|
|
|
/* this is cleanup, no recovery will be done */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* free list of children in the barrier-tree */
|
2008-02-24 03:25:40 +00:00
|
|
|
if( NULL != module->sm_buffer_mgmt_barrier_tree.children_ranks ) {
|
|
|
|
free(module->sm_buffer_mgmt_barrier_tree.children_ranks);
|
2008-02-19 20:01:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* free non-blocking barrier request objects */
|
|
|
|
if( NULL != module->barrier_request ) {
|
|
|
|
free(module->barrier_request);
|
|
|
|
}
|
2008-02-24 03:25:40 +00:00
|
|
|
|
|
|
|
/* free reduction tree */
|
|
|
|
if( NULL != module->reduction_tree ) {
|
|
|
|
for( i=0 ; i < module->comm_size ; i++ ) {
|
|
|
|
if( NULL != module->reduction_tree[i].children_ranks) {
|
|
|
|
free(module->reduction_tree[i].children_ranks);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
free(module->reduction_tree);
|
|
|
|
}
|
2008-02-27 14:56:36 +00:00
|
|
|
|
|
|
|
/* free fan-out read tree */
|
|
|
|
if( NULL != module->fanout_read_tree ) {
|
|
|
|
for( i=0 ; i < module->comm_size ; i++ ) {
|
|
|
|
if( NULL != module->fanout_read_tree[i].children_ranks) {
|
|
|
|
free(module->fanout_read_tree[i].children_ranks);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
free(module->fanout_read_tree);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* done */
|
2008-02-19 20:01:42 +00:00
|
|
|
}
|
2008-02-15 00:57:44 +00:00
|
|
|
|
|
|
|
static bool have_local_peers(ompi_group_t *group, size_t size)
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
ompi_proc_t *proc;
|
|
|
|
|
|
|
|
for (i = 0; i < size; ++i) {
|
|
|
|
proc = ompi_group_peer_lookup(group,i);
|
Per the RFC, extend the current use of the ompi_proc_t flags field (without changing the field itself).
The prior ompi_proc_t structure had a uint8_t flag field in it, where only one
bit was used to flag that a proc was "local". In that context, "local" was
constrained to mean "local to this node".
This commit provides a greater degree of granularity on the term "local", to include tests
to see if the proc is on the same socket, PC board, node, switch, CU (computing
unit), and cluster.
Add #define's to designate which bits stand for which local condition. This
was added to the OPAL layer to avoid conflicting with the proposed movement of
the BTLs. To make it easier to use, a set of macros have been defined - e.g.,
OPAL_PROC_ON_LOCAL_SOCKET - that test the specific bit. These can be used in
the code base to clearly indicate which sense of locality is being considered.
All locations in the code base that looked at the current proc_t field have
been changed to use the new macros.
Also modify the orte_ess modules so that each returns a uint8_t (to match the
ompi_proc_t field) that contains a complete description of the locality of this
proc. Obviously, not all environments will be capable of providing such detailed
info. Thus, getting a "false" from a test for "on_local_socket" may simply
indicate a lack of knowledge.
This commit was SVN r20496.
2009-02-10 02:20:16 +00:00
|
|
|
if (OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) {
|
2008-02-15 00:57:44 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Create mmaped shared file
|
|
|
|
*/
|
|
|
|
|
2008-03-20 23:51:16 +00:00
|
|
|
static int allocate_shared_file(size_t size, char **file_name,
|
2008-02-15 00:57:44 +00:00
|
|
|
struct ompi_communicator_t *comm, char **sm_backing_file)
|
|
|
|
{
|
|
|
|
int fd = -1;
|
|
|
|
int group_size,my_rank;
|
2008-03-20 23:51:16 +00:00
|
|
|
int unique_comm_id;
|
|
|
|
size_t len;
|
|
|
|
char *f_name;
|
2008-02-15 00:57:44 +00:00
|
|
|
|
|
|
|
bool i_create_shared_file=false;
|
2008-02-28 22:01:19 +00:00
|
|
|
ssize_t p;
|
2008-02-15 00:57:44 +00:00
|
|
|
int rc=0, sm_file_inited=0;
|
2008-03-20 23:51:16 +00:00
|
|
|
struct iovec iov[3];
|
2008-02-15 00:57:44 +00:00
|
|
|
int sm_file_created;
|
|
|
|
ompi_proc_t **comm_proc_list;
|
|
|
|
|
|
|
|
/* get the list of procs */
|
|
|
|
comm_proc_list=comm->c_local_group->grp_proc_pointers;
|
|
|
|
|
|
|
|
group_size=ompi_comm_size(comm);
|
|
|
|
my_rank=ompi_comm_rank(comm);
|
|
|
|
|
|
|
|
/* determine who will actually create the file */
|
|
|
|
if( my_rank == 0 ) {
|
|
|
|
i_create_shared_file=true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* open the backing file. */
|
|
|
|
if( i_create_shared_file ) {
|
2008-03-20 23:51:16 +00:00
|
|
|
/*
|
|
|
|
* set file name
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* generate id that will be different for non-overlapping
|
|
|
|
* communicators.
|
|
|
|
*/
|
|
|
|
unique_comm_id=(int)getpid();
|
|
|
|
len=asprintf(&f_name,
|
2008-03-28 15:10:07 +00:00
|
|
|
"%s"OPAL_PATH_SEP"sm_coll_v2_%0d_%0d",orte_process_info.job_session_dir,
|
|
|
|
ompi_comm_get_cid(comm),unique_comm_id);
|
2008-03-20 23:51:16 +00:00
|
|
|
if( 0 > len ) {
|
|
|
|
return OMPI_ERROR;
|
|
|
|
}
|
|
|
|
*file_name=f_name;
|
|
|
|
|
2008-02-15 00:57:44 +00:00
|
|
|
/* process initializing the file */
|
2008-03-20 23:51:16 +00:00
|
|
|
fd = open(*file_name, O_CREAT|O_RDWR, 0600);
|
2008-02-15 00:57:44 +00:00
|
|
|
if (fd < 0) {
|
2008-06-09 14:53:58 +00:00
|
|
|
opal_output(0,"mca_common_sm_mmap_init: open %s len %ld failed with errno=%d\n",
|
2008-03-28 15:10:07 +00:00
|
|
|
*file_name, len, errno);
|
2008-02-15 00:57:44 +00:00
|
|
|
goto file_opened;
|
|
|
|
}
|
|
|
|
/* map the file and initialize segment state */
|
|
|
|
*sm_backing_file = (char *)
|
|
|
|
mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
|
|
|
|
if( (void*)-1 == sm_backing_file ) {
|
2008-06-09 14:53:58 +00:00
|
|
|
opal_output(0, "mca_common_sm_mmap_init: mmap failed with errno=%d\n",
|
2008-02-15 00:57:44 +00:00
|
|
|
errno);
|
|
|
|
goto file_opened;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* truncate the file to the requested size */
|
|
|
|
if(ftruncate(fd, size) != 0) {
|
2008-06-09 14:53:58 +00:00
|
|
|
opal_output(0,
|
2008-02-15 00:57:44 +00:00
|
|
|
"mca_common_sm_mmap_init: ftruncate failed with errno=%d\n",
|
|
|
|
errno);
|
|
|
|
goto file_opened;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* if we got this far, the file has been initialized correctly */
|
|
|
|
sm_file_inited=1;
|
|
|
|
|
|
|
|
file_opened:
|
|
|
|
|
|
|
|
/* signal the rest of the local procs that the backing file
|
|
|
|
* has been created - not very scalable, but for small shared
|
|
|
|
* memory nodes is adequate for now
|
|
|
|
*/
|
|
|
|
for(p=1 ; p < group_size ; p++ ) {
|
|
|
|
sm_file_created=OMPI_RML_TAG_COLL_SM2_BACK_FILE_CREATED;
|
|
|
|
iov[0].iov_base=&sm_file_created;
|
|
|
|
iov[0].iov_len=sizeof(sm_file_created);
|
|
|
|
iov[1].iov_base=&sm_file_inited;
|
|
|
|
iov[1].iov_len=sizeof(sm_file_inited);
|
2008-03-20 23:51:16 +00:00
|
|
|
iov[2].iov_base=&unique_comm_id;
|
|
|
|
iov[2].iov_len=sizeof(unique_comm_id);
|
|
|
|
rc=orte_rml.send(&(comm_proc_list[p]->proc_name),iov,3,
|
2008-02-15 00:57:44 +00:00
|
|
|
OMPI_RML_TAG_COLL_SM2_BACK_FILE_CREATED,0);
|
|
|
|
if( rc < 0 ) {
|
2008-06-09 14:53:58 +00:00
|
|
|
opal_output(0,
|
2008-02-15 00:57:44 +00:00
|
|
|
"allocate_shared_file: orte_rml.send failed to %lu with errno=%d\n",
|
|
|
|
(unsigned long)p, errno);
|
|
|
|
goto return_error;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if ( 0 == sm_file_inited ) {
|
|
|
|
/* error - the sm backing file did not get opened correctly */
|
|
|
|
goto return_error;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* all other procs wait for the file to be initialized
|
|
|
|
before using the backing file */
|
|
|
|
iov[0].iov_base=&sm_file_created;
|
|
|
|
iov[0].iov_len=sizeof(sm_file_created);
|
|
|
|
iov[1].iov_base=&sm_file_inited;
|
|
|
|
iov[1].iov_len=sizeof(sm_file_inited);
|
2008-03-20 23:51:16 +00:00
|
|
|
iov[2].iov_base=&unique_comm_id;
|
|
|
|
iov[2].iov_len=sizeof(unique_comm_id);
|
|
|
|
rc=orte_rml.recv(&(comm_proc_list[0]->proc_name),iov,3,
|
2008-02-15 00:57:44 +00:00
|
|
|
OMPI_RML_TAG_COLL_SM2_BACK_FILE_CREATED,0);
|
|
|
|
if( rc < 0 ) {
|
2008-06-09 14:53:58 +00:00
|
|
|
opal_output(0, "allocate_shared_file: orte_rml.recv failed from %ld with errno=%d\n",
|
2008-02-15 00:57:44 +00:00
|
|
|
0L, errno);
|
|
|
|
goto return_error;
|
|
|
|
}
|
|
|
|
/* check to see if file inited correctly */
|
|
|
|
if( 0 == sm_file_inited ) {
|
|
|
|
goto return_error;
|
|
|
|
}
|
2008-03-20 23:51:16 +00:00
|
|
|
/* set file name - we need the unique id for non-overlapping
|
|
|
|
* communicators, that could have the same communicator id
|
|
|
|
*/
|
|
|
|
len=asprintf(&f_name,
|
2008-03-28 15:10:07 +00:00
|
|
|
"%s"OPAL_PATH_SEP"sm_coll_v2_%0d_%0d",orte_process_info.job_session_dir,
|
|
|
|
ompi_comm_get_cid(comm),unique_comm_id);
|
2008-03-20 23:51:16 +00:00
|
|
|
if( 0 > len ) {
|
|
|
|
return OMPI_ERROR;
|
|
|
|
}
|
|
|
|
*file_name=f_name;
|
2008-02-15 00:57:44 +00:00
|
|
|
|
|
|
|
/* open backing file */
|
2008-03-20 23:51:16 +00:00
|
|
|
fd = open(*file_name, O_RDWR, 0600);
|
2008-02-15 00:57:44 +00:00
|
|
|
if (fd < 0) {
|
2008-06-09 14:53:58 +00:00
|
|
|
opal_output(0,"mca_common_sm_mmap_init: open %s len %ld failed with errno=%d\n",
|
2008-03-28 15:10:07 +00:00
|
|
|
*file_name, len, errno);
|
2008-02-15 00:57:44 +00:00
|
|
|
goto return_error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* map the file and initialize segment state */
|
|
|
|
*sm_backing_file = (char *)
|
|
|
|
mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
|
|
|
|
if( (void*)-1 == sm_backing_file ) {
|
2008-06-09 14:53:58 +00:00
|
|
|
opal_output(0, "mca_common_sm_mmap_init: mmap failed with errno=%d\n",
|
2008-02-15 00:57:44 +00:00
|
|
|
errno);
|
|
|
|
goto return_error;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/* enable access by other processes on this host */
|
|
|
|
close(fd);
|
|
|
|
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
|
|
|
|
return_error:
|
|
|
|
if( -1 != fd ) {
|
|
|
|
close(fd);
|
|
|
|
}
|
|
|
|
|
|
|
|
if( NULL != sm_backing_file ) munmap((void*) sm_backing_file,size);
|
|
|
|
|
|
|
|
return OMPI_ERROR;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2008-04-09 03:44:40 +00:00
|
|
|
static
|
|
|
|
int barrier( struct ompi_communicator_t *comm ,
|
|
|
|
tree_node_t *multinomial_tree)
|
|
|
|
{
|
|
|
|
int group_size,my_rank,n_children,child,n_parents,my_fanout_parent;
|
|
|
|
int child_rank, dummy;
|
|
|
|
tree_node_t *my_node;
|
|
|
|
int rc=0;
|
|
|
|
struct iovec iov;
|
|
|
|
ompi_proc_t **comm_proc_list;
|
|
|
|
|
|
|
|
/* get the list of procs */
|
|
|
|
comm_proc_list=comm->c_local_group->grp_proc_pointers;
|
|
|
|
|
|
|
|
group_size=ompi_comm_size(comm);
|
|
|
|
my_rank=ompi_comm_rank(comm);
|
|
|
|
my_node=&(multinomial_tree[my_rank]);
|
|
|
|
n_children=my_node->n_children;
|
|
|
|
n_parents=my_node->n_parents;
|
|
|
|
my_fanout_parent=my_node->parent_rank;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* fan in
|
|
|
|
*/
|
|
|
|
/* receive from the children */
|
|
|
|
for( child=0 ; child < n_children ; child++ ) {
|
|
|
|
child_rank=my_node->children_ranks[child];
|
|
|
|
iov.iov_base=&dummy;
|
|
|
|
iov.iov_len=sizeof(dummy);
|
|
|
|
rc=orte_rml.recv(&(comm_proc_list[child_rank]->proc_name),&iov,1,
|
|
|
|
OMPI_RML_TAG_COLL_SM2_BACK_FILE_CREATED,0);
|
|
|
|
if( rc < 0 ) {
|
2008-06-09 14:53:58 +00:00
|
|
|
opal_output(0,
|
2008-04-09 03:44:40 +00:00
|
|
|
"sm barrier fan-in: orte_rml.recv failed to %lu with errno=%d\n",
|
|
|
|
(unsigned long)child_rank, errno);
|
|
|
|
goto return_error;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* send to parent */
|
|
|
|
if( 0 < n_parents ) {
|
|
|
|
iov.iov_base=&dummy;
|
|
|
|
iov.iov_len=sizeof(dummy);
|
|
|
|
rc=orte_rml.send(&(comm_proc_list[my_fanout_parent]->proc_name),&iov,1,
|
|
|
|
OMPI_RML_TAG_COLL_SM2_BACK_FILE_CREATED,0);
|
|
|
|
if( rc < 0 ) {
|
2008-06-09 14:53:58 +00:00
|
|
|
opal_output(0,
|
2008-04-09 03:44:40 +00:00
|
|
|
"sm barrier fan-in: orte_rml.send failed to %lu with errno=%d\n",
|
|
|
|
(unsigned long)my_fanout_parent, errno);
|
|
|
|
goto return_error;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Fan out
|
|
|
|
*/
|
|
|
|
/* receive from parent */
|
|
|
|
if( 0 < n_parents ) {
|
|
|
|
iov.iov_base=&dummy;
|
|
|
|
iov.iov_len=sizeof(dummy);
|
|
|
|
rc=orte_rml.recv(&(comm_proc_list[my_fanout_parent]->proc_name),&iov,1,
|
|
|
|
OMPI_RML_TAG_COLL_SM2_BACK_FILE_CREATED,0);
|
|
|
|
if( rc < 0 ) {
|
2008-06-09 14:53:58 +00:00
|
|
|
opal_output(0,
|
2008-04-09 03:44:40 +00:00
|
|
|
"sm barrier fan-out: orte_rml.recv failed to %lu with errno=%d\n",
|
|
|
|
(unsigned long)my_fanout_parent, errno);
|
|
|
|
goto return_error;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* send to children */
|
|
|
|
for( child=0 ; child < n_children ; child++ ) {
|
|
|
|
child_rank=my_node->children_ranks[child];
|
|
|
|
iov.iov_base=&dummy;
|
|
|
|
iov.iov_len=sizeof(dummy);
|
|
|
|
rc=orte_rml.send(&(comm_proc_list[child_rank]->proc_name),&iov,1,
|
|
|
|
OMPI_RML_TAG_COLL_SM2_BACK_FILE_CREATED,0);
|
|
|
|
if( rc < 0 ) {
|
2008-06-09 14:53:58 +00:00
|
|
|
opal_output(0,
|
2008-04-09 03:44:40 +00:00
|
|
|
"sm barrier fan-out: orte_rml.send failed to %lu with errno=%d\n",
|
|
|
|
(unsigned long)child_rank, errno);
|
|
|
|
goto return_error;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
|
|
|
|
return_error:
|
|
|
|
|
|
|
|
return OMPI_ERROR;
|
|
|
|
|
|
|
|
}
|
2008-02-15 00:57:44 +00:00
|
|
|
/* setup an n-array tree */
|
|
|
|
|
|
|
|
static int setup_nary_tree(int tree_order, int my_rank, int num_nodes,
|
|
|
|
tree_node_t *my_node)
|
|
|
|
{
|
|
|
|
/* local variables */
|
|
|
|
int n_levels, result;
|
|
|
|
int my_level_in_tree, cnt, parent_cnt;
|
|
|
|
int lvl,cum_cnt, my_rank_in_my_level,n_lvls_in_tree;
|
|
|
|
int start_index,end_index;
|
|
|
|
|
|
|
|
/* sanity check */
|
|
|
|
if( 1 >= tree_order ) {
|
|
|
|
goto Error;
|
|
|
|
}
|
|
|
|
|
|
|
|
my_node->my_rank=my_rank;
|
|
|
|
my_node->tree_size=num_nodes;
|
|
|
|
|
|
|
|
/* figure out number of levels in tree */
|
|
|
|
n_levels=0;
|
|
|
|
result=num_nodes-1;
|
|
|
|
while (0 < result ) {
|
|
|
|
result/=tree_order;
|
|
|
|
n_levels++;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* figure out who my children and parents are */
|
|
|
|
my_level_in_tree=-1;
|
|
|
|
result=my_rank;
|
|
|
|
/* cnt - number of ranks in given level */
|
|
|
|
cnt=1;
|
|
|
|
/* parent_cnt - cummulative count of ranks */
|
|
|
|
parent_cnt=0;
|
|
|
|
while( 0 <= result ) {
|
|
|
|
result-=cnt;
|
|
|
|
cnt*=tree_order;
|
|
|
|
my_level_in_tree++;
|
|
|
|
};
|
|
|
|
/* int my_level_in_tree, n_children, n_parents; */
|
|
|
|
|
|
|
|
if( 0 == my_rank ) {
|
|
|
|
my_node->n_parents=0;
|
|
|
|
my_node->parent_rank=-1;
|
|
|
|
my_rank_in_my_level=0;
|
|
|
|
} else {
|
|
|
|
my_node->n_parents=1;
|
|
|
|
cnt=1;
|
|
|
|
cum_cnt=0;
|
|
|
|
for (lvl = 0 ; lvl < my_level_in_tree ; lvl ++ ) {
|
|
|
|
/* cummulative count up to this level */
|
|
|
|
cum_cnt+=cnt;
|
|
|
|
/* number of ranks in this level */
|
|
|
|
cnt*=tree_order;
|
|
|
|
}
|
|
|
|
my_rank_in_my_level=my_rank-cum_cnt;
|
|
|
|
/* tree_order consecutive ranks have the same parent */
|
|
|
|
my_node->parent_rank=cum_cnt-cnt/tree_order+my_rank_in_my_level/tree_order;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* figure out number of levels in the tree */
|
|
|
|
n_lvls_in_tree=0;
|
|
|
|
result=num_nodes;
|
|
|
|
/* cnt - number of ranks in given level */
|
|
|
|
cnt=1;
|
|
|
|
/* parent_cnt - cummulative count of ranks */
|
|
|
|
parent_cnt=0;
|
|
|
|
while( 0 < result ) {
|
|
|
|
result-=cnt;
|
|
|
|
cnt*=tree_order;
|
|
|
|
n_lvls_in_tree++;
|
|
|
|
};
|
|
|
|
|
2008-02-19 20:01:42 +00:00
|
|
|
my_node->children_ranks=(int *)NULL;
|
|
|
|
|
2008-02-15 00:57:44 +00:00
|
|
|
/* get list of children */
|
|
|
|
if( my_level_in_tree == (n_lvls_in_tree -1 ) ) {
|
|
|
|
/* last level has no children */
|
|
|
|
my_node->n_children=0;
|
|
|
|
} else {
|
|
|
|
cum_cnt=0;
|
|
|
|
cnt=1;
|
|
|
|
for( lvl=0 ; lvl <= my_level_in_tree ; lvl++ ) {
|
|
|
|
cum_cnt+=cnt;
|
|
|
|
cnt*=tree_order;
|
|
|
|
}
|
|
|
|
start_index=cum_cnt+my_rank_in_my_level*tree_order;
|
|
|
|
end_index=start_index+tree_order-1;
|
|
|
|
|
|
|
|
/* don't go out of bounds at the end of the list */
|
|
|
|
if( end_index >= num_nodes ) {
|
|
|
|
end_index = num_nodes-1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if( start_index <= (num_nodes-1) ) {
|
|
|
|
my_node->n_children=end_index-start_index+1;
|
|
|
|
} else {
|
|
|
|
my_node->n_children=0;
|
|
|
|
}
|
|
|
|
|
|
|
|
my_node->children_ranks=NULL;
|
|
|
|
if( 0 < my_node->n_children ) {
|
|
|
|
my_node->children_ranks=
|
|
|
|
(int *)malloc( sizeof(int)*my_node->n_children);
|
|
|
|
if( NULL == my_node->children_ranks) {
|
|
|
|
goto Error;
|
|
|
|
}
|
|
|
|
for (lvl= start_index ; lvl <= end_index ; lvl++ ) {
|
|
|
|
my_node->children_ranks[lvl-start_index]=lvl;
|
|
|
|
}
|
2008-02-19 20:01:42 +00:00
|
|
|
}
|
2008-02-15 00:57:44 +00:00
|
|
|
}
|
2008-02-20 16:19:49 +00:00
|
|
|
/* set node type */
|
|
|
|
if( 0 == my_node->n_parents ) {
|
|
|
|
my_node->my_node_type=ROOT_NODE;
|
|
|
|
} else if ( 0 == my_node->n_children ) {
|
|
|
|
my_node->my_node_type=LEAF_NODE;
|
|
|
|
} else {
|
|
|
|
my_node->my_node_type=INTERIOR_NODE;
|
|
|
|
}
|
|
|
|
|
2008-02-15 00:57:44 +00:00
|
|
|
|
|
|
|
/* successful return */
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
|
|
|
|
Error:
|
|
|
|
|
|
|
|
/* error return */
|
|
|
|
return OMPI_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* initialize barrier structures */
|
|
|
|
static int init_sm2_barrier(struct ompi_communicator_t *comm,
|
|
|
|
mca_coll_sm2_component_t *component,
|
|
|
|
mca_coll_sm2_module_t *module) {
|
|
|
|
|
|
|
|
/*local variables */
|
2008-04-09 13:32:01 +00:00
|
|
|
int i,j,k,comm_size, my_rank, tree_order, rc;
|
|
|
|
mca_coll_sm2_nb_request_process_shared_mem_t *sm_address;
|
2008-02-15 00:57:44 +00:00
|
|
|
|
|
|
|
/* get order of fan-in and fan-out tree */
|
|
|
|
tree_order=component->order_barrier_tree;
|
|
|
|
|
|
|
|
/* get communicator size */
|
|
|
|
comm_size=ompi_comm_size(comm);
|
|
|
|
|
|
|
|
/* get rank within communictor */
|
|
|
|
my_rank=ompi_comm_rank(comm);
|
|
|
|
|
|
|
|
/* initialize fan-in/fan-out tree */
|
|
|
|
rc=setup_nary_tree(tree_order, my_rank, comm_size,
|
2008-02-24 03:25:40 +00:00
|
|
|
&(module->sm_buffer_mgmt_barrier_tree));
|
2008-02-15 00:57:44 +00:00
|
|
|
if( OMPI_SUCCESS != rc ) {
|
|
|
|
goto Error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allocate barrier control structures - allocating one barrier structure
|
|
|
|
* per memory bank. Allocating two shared memory regions per bank. */
|
2008-02-19 20:01:42 +00:00
|
|
|
module->barrier_request=(mca_coll_sm2_nb_request_process_private_mem_t *)
|
|
|
|
malloc(sizeof(mca_coll_sm2_nb_request_process_private_mem_t) *
|
2008-02-15 00:57:44 +00:00
|
|
|
component->sm2_num_mem_banks);
|
|
|
|
if( NULL == module->barrier_request ){
|
|
|
|
rc=OMPI_ERROR;
|
|
|
|
goto Error;
|
|
|
|
}
|
|
|
|
|
|
|
|
module->nb_barrier_tag=0;
|
|
|
|
/* initialize barrier control structures */
|
|
|
|
for(i=0 ; i < component->sm2_num_mem_banks ; i++ ) {
|
|
|
|
|
|
|
|
module->barrier_request[i].tag=0;
|
|
|
|
module->barrier_request[i].sm_index=0;
|
|
|
|
module->barrier_request[i].sm2_barrier_phase=NB_BARRIER_INACTIVE;
|
|
|
|
|
|
|
|
/* set the base address of each barrier's shared memory regions */
|
|
|
|
for( j =0 ; j < 2 ; j++ ) {
|
|
|
|
module->barrier_request[i].barrier_base_address[j]=
|
|
|
|
(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
2008-03-15 22:46:19 +00:00
|
|
|
(module->shared_memory_region +
|
|
|
|
/* there are 2 barrier structs per bank */
|
|
|
|
(2*i+j)*CACHE_LINE_SIZE);
|
2008-04-09 13:32:01 +00:00
|
|
|
/* initialize per-process flags */
|
|
|
|
for(k=0 ; k < comm_size ; k++ ) {
|
|
|
|
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
|
|
|
((char *)
|
|
|
|
(module->barrier_request[i].barrier_base_address[j])+
|
|
|
|
k*module->sm2_size_management_region_per_proc);
|
|
|
|
sm_address->flag=0;
|
|
|
|
}
|
2008-02-15 00:57:44 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-04-10 17:20:52 +00:00
|
|
|
module->num_nb_barriers_started=0;
|
|
|
|
module->num_nb_barriers_completed=0;
|
|
|
|
|
2008-02-15 00:57:44 +00:00
|
|
|
/* set pointer to the collective operation buffers */
|
|
|
|
module->collective_buffer_region=module->shared_memory_region+
|
|
|
|
module->sm2_size_management_region_per_proc*
|
2008-02-24 03:25:40 +00:00
|
|
|
module->sm_buffer_mgmt_barrier_tree.tree_size;
|
2008-02-15 00:57:44 +00:00
|
|
|
|
|
|
|
/* set the pointer to the request that needs to be completed first */
|
|
|
|
module->current_request_index=0;
|
|
|
|
|
2008-03-07 00:39:59 +00:00
|
|
|
/* set starting collective tag */
|
2008-04-08 20:38:20 +00:00
|
|
|
module->collective_tag=1;
|
2008-02-24 20:54:16 +00:00
|
|
|
|
2008-02-15 00:57:44 +00:00
|
|
|
/* return - successful */
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
|
|
|
|
Error:
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* query to see if the module is available for use on the given
|
|
|
|
* communicator, and if so, what it's priority is. This is where
|
|
|
|
* the backing shared-memory file is created.
|
|
|
|
*/
|
2008-07-28 22:40:57 +00:00
|
|
|
mca_coll_base_module_t *
|
2008-02-15 00:57:44 +00:00
|
|
|
mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority)
|
|
|
|
{
|
|
|
|
/* local variables */
|
|
|
|
mca_coll_sm2_module_t *sm_module;
|
2008-03-15 13:31:21 +00:00
|
|
|
int i,j,group_size,ret;
|
2008-02-28 22:01:19 +00:00
|
|
|
size_t alignment,size;
|
2008-02-15 00:57:44 +00:00
|
|
|
size_t tot_size_mem_banks;
|
|
|
|
size_t ctl_memory_per_proc_per_segment;
|
|
|
|
size_t mem_management_per_proc_per_block;
|
|
|
|
size_t mem_management_per_proc;
|
|
|
|
size_t mem_management_total;
|
|
|
|
size_t size_sm2_backing_file;
|
2008-03-15 22:46:19 +00:00
|
|
|
size_t size_buff_ctl_per_proc,size_data_buff_per_proc;
|
2008-02-15 00:57:44 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This is activated only for intra-communicators
|
|
|
|
*/
|
|
|
|
if (OMPI_COMM_IS_INTER(comm) ) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Use only if more than on proc in the communicator
|
|
|
|
*/
|
|
|
|
if (1 == ompi_comm_size(comm) ) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check to see if all procs are on the same node, and therefore
|
|
|
|
* can communicate using shared memory
|
|
|
|
*/
|
|
|
|
if ( !have_local_peers(comm->c_local_group, ompi_comm_size(comm))) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Get our priority */
|
|
|
|
*priority = mca_coll_sm2_component.sm2_priority;
|
|
|
|
|
|
|
|
/* allocate and initialize an sm-v2 module */
|
|
|
|
sm_module = OBJ_NEW(mca_coll_sm2_module_t);
|
|
|
|
|
|
|
|
sm_module->super.coll_module_enable = sm2_module_enable;
|
|
|
|
sm_module->super.ft_event = NULL;
|
|
|
|
sm_module->super.coll_allgather = NULL;
|
|
|
|
sm_module->super.coll_allgatherv = NULL;
|
2008-02-15 04:13:00 +00:00
|
|
|
sm_module->super.coll_allreduce = mca_coll_sm2_allreduce_intra;
|
2008-02-15 00:57:44 +00:00
|
|
|
sm_module->super.coll_alltoall = NULL;
|
|
|
|
sm_module->super.coll_alltoallv = NULL;
|
|
|
|
sm_module->super.coll_alltoallw = NULL;
|
2008-04-16 22:10:23 +00:00
|
|
|
sm_module->super.coll_barrier = mca_coll_sm2_barrier_intra;
|
2008-04-02 19:02:33 +00:00
|
|
|
sm_module->super.coll_bcast = mca_coll_sm2_bcast_intra;
|
2008-02-15 00:57:44 +00:00
|
|
|
sm_module->super.coll_exscan = NULL;
|
|
|
|
sm_module->super.coll_gather = NULL;
|
|
|
|
sm_module->super.coll_gatherv = NULL;
|
2008-04-01 22:56:18 +00:00
|
|
|
sm_module->super.coll_reduce = mca_coll_sm2_reduce_intra;
|
2008-02-15 00:57:44 +00:00
|
|
|
sm_module->super.coll_reduce_scatter = NULL;
|
|
|
|
sm_module->super.coll_scan = NULL;
|
|
|
|
sm_module->super.coll_scatter = NULL;
|
|
|
|
sm_module->super.coll_scatterv = NULL;
|
|
|
|
|
2008-04-19 22:40:04 +00:00
|
|
|
/*
|
|
|
|
* set up specific function to be used
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* barrier */
|
|
|
|
sm_module->barrier_functions[FANIN_FAN_OUT_BARRIER_FN]=
|
|
|
|
mca_coll_sm2_barrier_intra_fanin_fanout;
|
|
|
|
sm_module->barrier_functions[RECURSIVE_DOUBLING_BARRIER_FN]=
|
|
|
|
mca_coll_sm2_barrier_intra_fanin_fanout;
|
|
|
|
if( ( 0 <= mca_coll_sm2_component.force_barrier ) &&
|
|
|
|
( N_BARRIER_FNS > mca_coll_sm2_component.force_barrier ) ) {
|
|
|
|
/* set user specifed function */
|
|
|
|
mca_coll_base_module_barrier_fn_t tmp_fn=
|
|
|
|
sm_module->barrier_functions[mca_coll_sm2_component.force_barrier];
|
|
|
|
sm_module->barrier_functions[FANIN_FAN_OUT_BARRIER_FN]=tmp_fn;
|
|
|
|
sm_module->barrier_functions[RECURSIVE_DOUBLING_BARRIER_FN]=tmp_fn;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* reduce */
|
|
|
|
sm_module->list_reduce_functions[FANIN_REDUCE_FN]=
|
|
|
|
mca_coll_sm2_reduce_intra_fanin;
|
|
|
|
sm_module->list_reduce_functions[REDUCE_SCATTER_GATHER_FN]=
|
|
|
|
mca_coll_sm2_reduce_intra_reducescatter_gather;
|
2008-05-22 20:53:35 +00:00
|
|
|
sm_module->reduce_functions[SHORT_DATA_FN_REDUCE]=
|
2008-04-19 22:40:04 +00:00
|
|
|
sm_module->list_reduce_functions[FANIN_REDUCE_FN];
|
2008-05-22 20:53:35 +00:00
|
|
|
sm_module->reduce_functions[LONG_DATA_FN_REDUCE]=
|
2008-04-19 22:40:04 +00:00
|
|
|
sm_module->list_reduce_functions[REDUCE_SCATTER_GATHER_FN];
|
|
|
|
if( ( 0 <= mca_coll_sm2_component.force_reduce ) &&
|
|
|
|
( N_REDUCE_FNS > mca_coll_sm2_component.force_reduce ) ) {
|
|
|
|
/* set user specifed function */
|
2008-05-22 20:53:35 +00:00
|
|
|
mca_coll_base_module_reduce_fn_t tmp_fn=sm_module->
|
|
|
|
list_reduce_functions[mca_coll_sm2_component.force_reduce];
|
|
|
|
sm_module->reduce_functions[SHORT_DATA_FN_REDUCE]=tmp_fn;
|
|
|
|
sm_module->reduce_functions[LONG_DATA_FN_REDUCE]=tmp_fn;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* allreduce */
|
|
|
|
sm_module->list_allreduce_functions[FANIN_FANOUT_ALLREDUCE_FN]=
|
|
|
|
mca_coll_sm2_allreduce_intra_fanin_fanout;
|
|
|
|
sm_module->list_allreduce_functions[REDUCE_SCATTER_ALLGATHER_FN]=
|
|
|
|
mca_coll_sm2_allreduce_intra_reducescatter_allgather;
|
|
|
|
sm_module->allreduce_functions[SHORT_DATA_FN_ALLREDUCE]=
|
|
|
|
sm_module->list_allreduce_functions[FANIN_FANOUT_ALLREDUCE_FN];
|
|
|
|
sm_module->allreduce_functions[LONG_DATA_FN_ALLREDUCE]=
|
|
|
|
sm_module->list_allreduce_functions[REDUCE_SCATTER_ALLGATHER_FN];
|
|
|
|
if( ( 0 <= mca_coll_sm2_component.force_allreduce ) &&
|
|
|
|
( N_ALLREDUCE_FNS > mca_coll_sm2_component.force_allreduce ) ) {
|
|
|
|
/* set user specifed function */
|
|
|
|
mca_coll_base_module_allreduce_fn_t tmp_fn=sm_module->
|
|
|
|
list_allreduce_functions[mca_coll_sm2_component.force_allreduce];
|
|
|
|
sm_module->allreduce_functions[SHORT_DATA_FN_ALLREDUCE]=tmp_fn;
|
|
|
|
sm_module->allreduce_functions[LONG_DATA_FN_ALLREDUCE]=tmp_fn;
|
2008-04-19 22:40:04 +00:00
|
|
|
}
|
|
|
|
|
2008-02-24 03:25:40 +00:00
|
|
|
/*
|
|
|
|
* Some initialization
|
|
|
|
*/
|
|
|
|
sm_module->reduction_tree=NULL;
|
2008-02-27 14:56:36 +00:00
|
|
|
sm_module->fanout_read_tree=NULL;
|
2008-02-24 03:25:40 +00:00
|
|
|
|
2008-02-15 00:57:44 +00:00
|
|
|
/*
|
|
|
|
* create backing file
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* set group size
|
|
|
|
*/
|
|
|
|
group_size=ompi_comm_size(comm);
|
|
|
|
|
|
|
|
sm_module->module_comm=comm;
|
2008-02-24 03:25:40 +00:00
|
|
|
sm_module->comm_size=group_size;
|
2008-03-27 01:16:32 +00:00
|
|
|
sm_module->n_poll_loops=mca_coll_sm2_component.n_poll_loops;
|
2008-02-15 00:57:44 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* set memory region parameters
|
|
|
|
*/
|
|
|
|
sm_module->sm2_module_num_memory_banks=
|
|
|
|
mca_coll_sm2_component.sm2_num_mem_banks;
|
|
|
|
sm_module->sm2_module_num_regions_per_bank=
|
|
|
|
mca_coll_sm2_component.sm2_num_regions_per_bank;
|
2008-03-15 13:31:21 +00:00
|
|
|
sm_module->sm2_module_num_buffers=
|
|
|
|
mca_coll_sm2_component.sm2_num_regions_per_bank *
|
|
|
|
mca_coll_sm2_component.sm2_num_mem_banks;
|
|
|
|
|
|
|
|
|
|
|
|
/* allocate the array of memory descriptors used to describe the
|
|
|
|
* shared memory buffers. This structure resides in process
|
|
|
|
* private memory, but describes the shared memory.
|
|
|
|
*/
|
|
|
|
sm_module->sm_buffer_descriptor=(sm_work_buffer_t *)malloc(
|
|
|
|
sizeof(sm_work_buffer_t)*sm_module->sm2_module_num_buffers);
|
|
|
|
if( NULL == sm_module->sm_buffer_descriptor ) {
|
|
|
|
goto CLEANUP;
|
|
|
|
}
|
2008-02-15 00:57:44 +00:00
|
|
|
|
2008-03-15 22:46:19 +00:00
|
|
|
#if 0 /* data buffers and management buffers are allocated in a single
|
|
|
|
* contigous region */
|
2008-03-15 13:31:21 +00:00
|
|
|
/*
|
|
|
|
* Now figure out how much memory to allocate for use as
|
|
|
|
* working memory for the shared memory collectives.
|
|
|
|
*/
|
2008-02-15 00:57:44 +00:00
|
|
|
/*
|
|
|
|
* get control region size
|
|
|
|
*/
|
2008-02-28 19:40:19 +00:00
|
|
|
/* just enough place for two flags per process */
|
|
|
|
ctl_memory_per_proc_per_segment=2*sizeof(long long);
|
2008-02-15 00:57:44 +00:00
|
|
|
if( mca_coll_sm2_component.sm2_ctl_size_per_proc > ctl_memory_per_proc_per_segment )
|
|
|
|
ctl_memory_per_proc_per_segment=mca_coll_sm2_component.sm2_ctl_size_per_proc;
|
|
|
|
|
|
|
|
/* pad this up to the alignment needed by the data segment, as the
|
|
|
|
* that data segment will directly follow the control segment in
|
|
|
|
* memory.
|
|
|
|
*/
|
|
|
|
alignment=mca_coll_sm2_component.sm2_data_alignment;
|
|
|
|
ctl_memory_per_proc_per_segment=
|
|
|
|
(alignment + ctl_memory_per_proc_per_segment -1) / alignment;
|
|
|
|
ctl_memory_per_proc_per_segment*=alignment;
|
|
|
|
mca_coll_sm2_component.sm2_ctl_size_allocated=ctl_memory_per_proc_per_segment;
|
2008-02-24 20:54:16 +00:00
|
|
|
sm_module->ctl_memory_per_proc_per_segment=ctl_memory_per_proc_per_segment;
|
2008-02-15 00:57:44 +00:00
|
|
|
|
|
|
|
/* get data region size - allocation happens on a page granularity, with
|
|
|
|
* a minimum of a page allocated per proc, so adjust to this
|
|
|
|
*/
|
|
|
|
size=mca_coll_sm2_component.sm2_data_seg_size;
|
|
|
|
if( size > mca_coll_sm2_component.sm2_max_data_seg_size )
|
|
|
|
size=mca_coll_sm2_component.sm2_max_data_seg_size;
|
|
|
|
size_tot_per_proc_per_seg=size+ mca_coll_sm2_component.sm2_ctl_size_per_proc;
|
|
|
|
if( size_tot_per_proc_per_seg < getpagesize())
|
|
|
|
size_tot_per_proc_per_seg=getpagesize();
|
|
|
|
/* round this up to the nearest integer page-size multiple */
|
|
|
|
size_tot_per_proc_per_seg= ( size_tot_per_proc_per_seg + getpagesize() - 1)/
|
|
|
|
getpagesize();
|
|
|
|
size_tot_per_proc_per_seg*=getpagesize();
|
|
|
|
|
|
|
|
/* compute segment memory needed */
|
|
|
|
size_tot_per_segment=group_size * size_tot_per_proc_per_seg ;
|
|
|
|
|
|
|
|
sm_module->segement_size_per_process=size_tot_per_proc_per_seg;
|
|
|
|
sm_module->segment_size=size_tot_per_segment;
|
2008-02-24 20:54:16 +00:00
|
|
|
sm_module->data_memory_per_proc_per_segment=size_tot_per_proc_per_seg-
|
|
|
|
ctl_memory_per_proc_per_segment;
|
2008-02-15 00:57:44 +00:00
|
|
|
|
|
|
|
/* compute memory per bank */
|
|
|
|
tot_size_per_bank=size_tot_per_segment*mca_coll_sm2_component.sm2_num_regions_per_bank;
|
|
|
|
|
|
|
|
/* compute total memory in the memory banks */
|
|
|
|
tot_size_mem_banks=tot_size_per_bank*mca_coll_sm2_component.sm2_num_mem_banks;
|
2008-03-15 22:46:19 +00:00
|
|
|
sm_module->data_memory_per_proc_per_segment=size_tot_per_proc_per_seg-
|
|
|
|
ctl_memory_per_proc_per_segment;
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* management structures are allocated is a one segment, and data buffers
|
|
|
|
* in a separate segment
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
* Now figure out how much memory to allocate for use as
|
|
|
|
* working memory for the shared memory collectives.
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
* get control region size
|
|
|
|
*/
|
|
|
|
/* just enough place for two flags per process */
|
|
|
|
ctl_memory_per_proc_per_segment=2*sizeof(long long);
|
|
|
|
if( mca_coll_sm2_component.sm2_ctl_size_per_proc > ctl_memory_per_proc_per_segment )
|
|
|
|
ctl_memory_per_proc_per_segment=mca_coll_sm2_component.sm2_ctl_size_per_proc;
|
|
|
|
|
|
|
|
/* pad this up to the alignment needed by the data segment, as the
|
|
|
|
* that data segment will directly follow the control segment in
|
|
|
|
* memory.
|
|
|
|
*/
|
|
|
|
alignment=mca_coll_sm2_component.sm2_data_alignment;
|
|
|
|
ctl_memory_per_proc_per_segment=
|
|
|
|
(alignment + ctl_memory_per_proc_per_segment -1) / alignment;
|
|
|
|
ctl_memory_per_proc_per_segment*=alignment;
|
|
|
|
mca_coll_sm2_component.sm2_ctl_size_allocated=ctl_memory_per_proc_per_segment;
|
|
|
|
sm_module->ctl_memory_per_proc_per_segment=ctl_memory_per_proc_per_segment;
|
|
|
|
|
|
|
|
/* get data region size - allocation happens on a page granularity, with
|
|
|
|
* a minimum of a page allocated per proc, so adjust to this
|
|
|
|
*/
|
|
|
|
size=mca_coll_sm2_component.sm2_data_seg_size;
|
|
|
|
if( size < getpagesize() )
|
|
|
|
size=getpagesize();
|
|
|
|
if( size > mca_coll_sm2_component.sm2_max_data_seg_size )
|
|
|
|
size=mca_coll_sm2_component.sm2_max_data_seg_size;
|
|
|
|
size= ( size + getpagesize() - 1)/getpagesize();
|
|
|
|
size*=getpagesize();
|
|
|
|
sm_module->segment_size=size*group_size;
|
|
|
|
size_data_buff_per_proc=size;
|
|
|
|
|
|
|
|
/* compute size of management region - per proc */
|
|
|
|
size_buff_ctl_per_proc=
|
|
|
|
ctl_memory_per_proc_per_segment*sm_module->sm2_module_num_buffers;
|
|
|
|
size_buff_ctl_per_proc= ( size_buff_ctl_per_proc + getpagesize() - 1)/
|
|
|
|
getpagesize();
|
|
|
|
size_buff_ctl_per_proc*=getpagesize();
|
|
|
|
|
|
|
|
tot_size_mem_banks=
|
|
|
|
/* size of buffer conrol region */
|
|
|
|
size_buff_ctl_per_proc*group_size+
|
|
|
|
/* size of data buffers */
|
|
|
|
size*sm_module->sm2_module_num_buffers*group_size;
|
2008-04-19 01:34:04 +00:00
|
|
|
sm_module->size_of_collective_buffer_region=tot_size_mem_banks;
|
2008-03-15 22:46:19 +00:00
|
|
|
sm_module->data_memory_per_proc_per_segment=size;
|
2008-02-15 00:57:44 +00:00
|
|
|
|
2008-03-15 22:46:19 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* compute the amount of memory needed for the anynchromous barriers used to
|
2008-02-15 00:57:44 +00:00
|
|
|
* manage the memory resources.
|
|
|
|
*/
|
|
|
|
/* for each bank, 2 sets of barrier buffers */
|
|
|
|
mem_management_per_proc_per_block= 2 * CACHE_LINE_SIZE ;
|
|
|
|
/* add in number of banks */
|
|
|
|
mem_management_per_proc= mem_management_per_proc_per_block *
|
|
|
|
mca_coll_sm2_component.sm2_num_mem_banks;
|
|
|
|
/* round up to page multiples */
|
|
|
|
mem_management_per_proc=(mem_management_per_proc +
|
|
|
|
getpagesize() -1 ) / getpagesize();
|
|
|
|
mem_management_per_proc*=getpagesize();
|
|
|
|
|
|
|
|
/* size of memory region, per process, for memory bank management */
|
|
|
|
sm_module->sm2_size_management_region_per_proc=
|
|
|
|
mem_management_per_proc;
|
|
|
|
|
|
|
|
/* total memory management required */
|
|
|
|
mem_management_total=mem_management_per_proc * group_size;
|
2008-04-19 01:34:04 +00:00
|
|
|
sm_module->size_mem_banks_ctl_region=mem_management_total;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Memory for blocking collectives - need two sets of memory
|
|
|
|
* regions for this.
|
|
|
|
*/
|
|
|
|
/* size per proc */
|
|
|
|
size=2*sizeof(mca_coll_sm2_nb_request_process_shared_mem_t);
|
|
|
|
/* page align */
|
|
|
|
size=(size +
|
|
|
|
getpagesize() -1 ) / getpagesize();
|
|
|
|
size*=getpagesize();
|
|
|
|
sm_module->per_proc_size_of_blocking_barrier_region=size;
|
|
|
|
sm_module->size_of_blocking_barrier_region=size*group_size;
|
|
|
|
|
2008-02-15 00:57:44 +00:00
|
|
|
|
|
|
|
/* total size of backing file - this assumes the mmap allocation
|
|
|
|
* occurs on page boundaries, and that all segments are paged
|
|
|
|
* aligned
|
|
|
|
*/
|
2008-04-19 01:34:04 +00:00
|
|
|
size_sm2_backing_file=sm_module->size_mem_banks_ctl_region+
|
|
|
|
sm_module->size_of_collective_buffer_region+
|
|
|
|
sm_module->size_of_blocking_barrier_region;
|
2008-02-15 00:57:44 +00:00
|
|
|
sm_module->size_sm2_backing_file=size_sm2_backing_file;
|
|
|
|
|
|
|
|
/* set file name */
|
2008-03-20 23:51:16 +00:00
|
|
|
/*
|
2008-02-15 00:57:44 +00:00
|
|
|
len=asprintf(&(sm_module->coll_sm2_file_name),
|
2008-03-20 23:51:16 +00:00
|
|
|
"%s"OPAL_PATH_SEP"sm_coll_v2%s_%0d\0",orte_process_info.job_session_dir,
|
2008-03-23 23:10:15 +00:00
|
|
|
orte_process_info.nodename,ompi_comm_get_cid(comm));
|
2008-02-15 00:57:44 +00:00
|
|
|
if( 0 > len ) {
|
|
|
|
goto CLEANUP;
|
|
|
|
}
|
2008-03-20 23:51:16 +00:00
|
|
|
*/
|
2008-02-15 00:57:44 +00:00
|
|
|
|
|
|
|
/* allocate backing file */
|
|
|
|
ret=allocate_shared_file(size_sm2_backing_file,
|
2008-03-20 23:51:16 +00:00
|
|
|
&(sm_module->coll_sm2_file_name), comm,
|
2008-02-15 00:57:44 +00:00
|
|
|
&(sm_module->shared_memory_region));
|
|
|
|
if( MPI_SUCCESS != ret ) {
|
|
|
|
goto CLEANUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* intialize barrier structures */
|
|
|
|
ret=init_sm2_barrier(comm, &mca_coll_sm2_component,
|
|
|
|
sm_module);
|
|
|
|
if( MPI_SUCCESS != ret ) {
|
|
|
|
goto CLEANUP;
|
2008-02-24 03:25:40 +00:00
|
|
|
}
|
|
|
|
|
2008-02-27 14:56:36 +00:00
|
|
|
/* initialize reduction tree */
|
2008-02-24 03:25:40 +00:00
|
|
|
sm_module->reduction_tree=(tree_node_t *) malloc(
|
|
|
|
sizeof(tree_node_t )*group_size);
|
|
|
|
if( NULL == sm_module->reduction_tree ) {
|
|
|
|
goto CLEANUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret=setup_multinomial_tree(mca_coll_sm2_component.order_reduction_tree,
|
|
|
|
group_size,sm_module->reduction_tree);
|
|
|
|
if( MPI_SUCCESS != ret ) {
|
|
|
|
goto CLEANUP;
|
|
|
|
}
|
|
|
|
|
2008-02-27 14:56:36 +00:00
|
|
|
/* initialize fan-out read tree */
|
|
|
|
sm_module->fanout_read_tree=(tree_node_t *) malloc(
|
|
|
|
sizeof(tree_node_t )*group_size);
|
|
|
|
if( NULL == sm_module->fanout_read_tree ) {
|
|
|
|
goto CLEANUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret=setup_multinomial_tree(mca_coll_sm2_component.order_fanout_read_tree,
|
|
|
|
group_size,sm_module->fanout_read_tree);
|
|
|
|
if( MPI_SUCCESS != ret ) {
|
|
|
|
goto CLEANUP;
|
|
|
|
}
|
2008-03-06 18:38:58 +00:00
|
|
|
|
|
|
|
/* initialize recursive doubling tree */
|
|
|
|
ret=setup_recursive_doubling_tree_node(group_size, ompi_comm_rank(comm),
|
|
|
|
&(sm_module->recursive_doubling_tree));
|
|
|
|
if( MPI_SUCCESS != ret ) {
|
|
|
|
goto CLEANUP;
|
|
|
|
}
|
|
|
|
|
2008-02-15 00:57:44 +00:00
|
|
|
/* initialize local counters */
|
|
|
|
sm_module->sm2_allocated_buffer_index=-1;
|
|
|
|
sm_module->sm2_freed_buffer_index=-1;
|
2008-04-08 22:16:39 +00:00
|
|
|
|
2008-03-15 22:46:19 +00:00
|
|
|
/* setup shared memory memory descriptors */
|
|
|
|
for( i=0 ; i < sm_module->sm2_module_num_buffers ; i++ ) {
|
|
|
|
|
|
|
|
char *base_buffer;
|
|
|
|
volatile mca_coll_sm2_nb_request_process_shared_mem_t *ctl_ptr;
|
|
|
|
|
|
|
|
/* set the base address for this working buffer */
|
|
|
|
base_buffer= sm_module->collective_buffer_region+
|
|
|
|
/* offset past control data structures */
|
|
|
|
size_buff_ctl_per_proc*group_size +
|
|
|
|
i*sm_module->segment_size;
|
|
|
|
sm_module->sm_buffer_descriptor[i].base_segment_address=base_buffer;
|
|
|
|
|
|
|
|
/* allocate array to keep data on each segment in the buffer.
|
|
|
|
* One segment per process in the group.
|
|
|
|
*/
|
|
|
|
sm_module->sm_buffer_descriptor[i].proc_memory=
|
|
|
|
(sm_memory_region_desc_t *)malloc(sizeof(sm_memory_region_desc_t)*
|
|
|
|
group_size);
|
|
|
|
if( NULL == sm_module->sm_buffer_descriptor[i].proc_memory ) {
|
|
|
|
goto CLEANUP;
|
|
|
|
}
|
2008-04-01 22:56:18 +00:00
|
|
|
|
|
|
|
/* set bank index */
|
|
|
|
sm_module->sm_buffer_descriptor[i].bank_index=
|
|
|
|
i/sm_module->sm2_module_num_regions_per_bank;
|
2008-04-10 17:20:52 +00:00
|
|
|
sm_module->sm_buffer_descriptor[i].index_first_buffer_in_bank=
|
|
|
|
sm_module->sm_buffer_descriptor[i].bank_index *
|
|
|
|
sm_module->sm2_module_num_regions_per_bank;
|
|
|
|
sm_module->sm_buffer_descriptor[i].index_last_buffer_in_bank=
|
|
|
|
((sm_module->sm_buffer_descriptor[i].bank_index+1) *
|
|
|
|
sm_module->sm2_module_num_regions_per_bank)-1;
|
2008-04-01 22:56:18 +00:00
|
|
|
|
2008-03-15 22:46:19 +00:00
|
|
|
for(j=0 ; j < group_size ; j++ ) {
|
|
|
|
ctl_ptr=(volatile mca_coll_sm2_nb_request_process_shared_mem_t *)
|
|
|
|
(base_buffer+j* sm_module->segement_size_per_process);
|
|
|
|
sm_module->sm_buffer_descriptor[i].proc_memory[j].control_region=
|
|
|
|
(volatile mca_coll_sm2_nb_request_process_shared_mem_t *)
|
|
|
|
/* offset to temp space */
|
|
|
|
(sm_module->collective_buffer_region+
|
|
|
|
/* offset to the per-proc control region */
|
|
|
|
size_buff_ctl_per_proc*j+
|
|
|
|
/* offset to control structure for the i'th buffer */
|
|
|
|
ctl_memory_per_proc_per_segment*i);
|
|
|
|
sm_module->sm_buffer_descriptor[i].proc_memory[j].data_segment=
|
|
|
|
(char *)base_buffer+
|
|
|
|
/* offset to data segment for the j'th proc */
|
|
|
|
j*size_data_buff_per_proc;
|
2008-04-08 22:16:39 +00:00
|
|
|
/* initialize the control region */
|
|
|
|
sm_module->sm_buffer_descriptor[i].proc_memory[j].control_region->
|
|
|
|
flag=0;
|
2008-04-08 23:34:06 +00:00
|
|
|
sm_module->sm_buffer_descriptor[i].proc_memory[j].control_region->flag=0;
|
2008-03-15 22:46:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
2008-02-15 00:57:44 +00:00
|
|
|
|
2008-04-11 04:06:29 +00:00
|
|
|
/* allocate process private scratch space */
|
|
|
|
sm_module->scratch_space=(int *)malloc(sizeof(int)*group_size);
|
|
|
|
if( NULL == sm_module->scratch_space) {
|
|
|
|
goto CLEANUP;
|
|
|
|
}
|
|
|
|
|
2008-04-19 01:34:04 +00:00
|
|
|
/*
|
|
|
|
* setup blocking barrier data structures
|
|
|
|
*/
|
|
|
|
sm_module->sm_blocking_barrier_region=
|
|
|
|
sm_module->shared_memory_region+
|
|
|
|
sm_module->size_mem_banks_ctl_region+
|
|
|
|
sm_module->size_of_collective_buffer_region;
|
|
|
|
|
|
|
|
sm_module->index_blocking_barrier_memory_bank=0;
|
|
|
|
|
|
|
|
sm_module->ctl_blocking_barrier=
|
2008-04-19 22:40:04 +00:00
|
|
|
(volatile mca_coll_sm2_nb_request_process_shared_mem_t ***)
|
2008-04-19 01:34:04 +00:00
|
|
|
malloc(2*sizeof(mca_coll_sm2_nb_request_process_shared_mem_t **));
|
|
|
|
if( NULL == sm_module->ctl_blocking_barrier ) {
|
|
|
|
goto CLEANUP;
|
|
|
|
}
|
|
|
|
sm_module->ctl_blocking_barrier[0]=
|
|
|
|
(mca_coll_sm2_nb_request_process_shared_mem_t **)
|
|
|
|
malloc(group_size*sizeof(mca_coll_sm2_nb_request_process_shared_mem_t *));
|
|
|
|
if( NULL == sm_module->ctl_blocking_barrier[0]) {
|
|
|
|
goto CLEANUP;
|
|
|
|
}
|
|
|
|
sm_module->ctl_blocking_barrier[1]=
|
|
|
|
(mca_coll_sm2_nb_request_process_shared_mem_t **)
|
|
|
|
malloc(group_size*sizeof(mca_coll_sm2_nb_request_process_shared_mem_t *));
|
|
|
|
if( NULL == sm_module->ctl_blocking_barrier[1]) {
|
|
|
|
goto CLEANUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
for( j= 0 ; j < 2 ; j++ ) {
|
|
|
|
for( i=0 ; i < group_size ; i++ ) {
|
|
|
|
sm_module->ctl_blocking_barrier[j][i]=
|
|
|
|
(mca_coll_sm2_nb_request_process_shared_mem_t * )
|
|
|
|
(
|
|
|
|
sm_module->sm_blocking_barrier_region+
|
|
|
|
j*sizeof(mca_coll_sm2_nb_request_process_shared_mem_t)+
|
|
|
|
i*sm_module->per_proc_size_of_blocking_barrier_region )
|
|
|
|
;
|
|
|
|
sm_module->ctl_blocking_barrier[j][i]->flag=0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-04-19 22:40:04 +00:00
|
|
|
/* set the switch-over parameter */
|
|
|
|
sm_module->short_message_size=mca_coll_sm2_component.short_message_size;
|
2008-04-19 01:34:04 +00:00
|
|
|
|
2008-02-15 00:57:44 +00:00
|
|
|
/* touch pages to apply memory affinity - Note: do we really need this or will
|
|
|
|
* the algorithms do this */
|
|
|
|
|
2008-04-09 03:44:40 +00:00
|
|
|
/* make sure all procs are done with setup - need to avoid initializing
|
|
|
|
* shared memory regions already in use
|
|
|
|
*/
|
|
|
|
ret=barrier(comm,sm_module->reduction_tree);
|
|
|
|
if( MPI_SUCCESS != ret ) {
|
|
|
|
goto CLEANUP;
|
|
|
|
}
|
|
|
|
|
2008-02-15 00:57:44 +00:00
|
|
|
/* return */
|
|
|
|
return &(sm_module->super);
|
|
|
|
|
|
|
|
|
|
|
|
CLEANUP:
|
|
|
|
|
2008-02-19 20:01:42 +00:00
|
|
|
if( NULL != sm_module->coll_sm2_file_name ) {
|
2008-02-15 00:57:44 +00:00
|
|
|
free(sm_module->coll_sm2_file_name);
|
2008-02-19 20:01:42 +00:00
|
|
|
sm_module->coll_sm2_file_name=NULL;
|
2008-02-15 00:57:44 +00:00
|
|
|
}
|
|
|
|
|
2008-02-24 03:25:40 +00:00
|
|
|
if( NULL != sm_module->reduction_tree ) {
|
|
|
|
free(sm_module->coll_sm2_file_name);
|
|
|
|
sm_module->coll_sm2_file_name=NULL;
|
|
|
|
}
|
|
|
|
|
2008-03-15 13:31:21 +00:00
|
|
|
if( NULL != sm_module->sm_buffer_descriptor ) {
|
|
|
|
for(i=0 ; i < group_size ; i++ ) {
|
|
|
|
if(NULL != sm_module->sm_buffer_descriptor[i].proc_memory) {
|
|
|
|
free(sm_module->sm_buffer_descriptor[i].proc_memory);
|
|
|
|
sm_module->sm_buffer_descriptor[i].proc_memory=NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
free(sm_module->sm_buffer_descriptor);
|
|
|
|
sm_module->sm_buffer_descriptor=NULL;
|
|
|
|
}
|
|
|
|
|
2008-04-11 04:06:29 +00:00
|
|
|
if(sm_module->scratch_space) {
|
|
|
|
free(sm_module->scratch_space);
|
|
|
|
sm_module->scratch_space=NULL;
|
|
|
|
}
|
|
|
|
|
2008-04-19 01:34:04 +00:00
|
|
|
for( i= 0 ; i < group_size ; i++ ) {
|
|
|
|
if( NULL != sm_module->ctl_blocking_barrier[0][i] ) {
|
|
|
|
free( sm_module->ctl_blocking_barrier[0][i]);
|
|
|
|
sm_module->ctl_blocking_barrier[0][i]=NULL;
|
|
|
|
}
|
|
|
|
if( NULL != sm_module->ctl_blocking_barrier[1][i] ) {
|
|
|
|
free( sm_module->ctl_blocking_barrier[1][i]);
|
|
|
|
sm_module->ctl_blocking_barrier[1][i]=NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if( NULL != sm_module->ctl_blocking_barrier ) {
|
|
|
|
free(sm_module->ctl_blocking_barrier);
|
|
|
|
sm_module->ctl_blocking_barrier=NULL;
|
|
|
|
}
|
|
|
|
|
2008-02-19 20:01:42 +00:00
|
|
|
OBJ_RELEASE(sm_module);
|
|
|
|
|
2008-02-15 00:57:44 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Init module on the communicator
|
|
|
|
*/
|
|
|
|
static int
|
2008-07-28 22:40:57 +00:00
|
|
|
sm2_module_enable(mca_coll_base_module_t *module,
|
2008-02-15 00:57:44 +00:00
|
|
|
struct ompi_communicator_t *comm)
|
|
|
|
{
|
|
|
|
/* local variables */
|
|
|
|
char output_buffer[2*MPI_MAX_OBJECT_NAME];
|
|
|
|
|
|
|
|
memset(&output_buffer[0],0,sizeof(output_buffer));
|
|
|
|
snprintf(output_buffer,sizeof(output_buffer),"%s (cid %d)", comm->c_name,
|
|
|
|
comm->c_contextid);
|
2008-06-09 14:53:58 +00:00
|
|
|
opal_output_verbose(10, mca_coll_base_output,
|
2008-02-15 00:57:44 +00:00
|
|
|
"coll:sm2:enable: new communicator: %s", output_buffer);
|
|
|
|
|
|
|
|
/* All done */
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2008-04-10 17:20:52 +00:00
|
|
|
/* progress barrier */
|
|
|
|
static
|
|
|
|
int progress_nb_barrier(mca_coll_sm2_module_t *module)
|
2008-02-15 00:57:44 +00:00
|
|
|
{
|
2008-04-10 17:20:52 +00:00
|
|
|
int rc,barrier_index;
|
|
|
|
|
|
|
|
if( module->num_nb_barriers_started !=
|
|
|
|
module->num_nb_barriers_completed ) {
|
|
|
|
/* is there anything to progress ? */
|
|
|
|
/* get index of barrier structure to progress. The one to progress
|
|
|
|
* is the one right after the last competed nb barrier. No need
|
|
|
|
* to subtract 1 for the index, as the number completed is the index
|
|
|
|
* of the next one to complete.
|
|
|
|
*/
|
2008-04-19 01:34:04 +00:00
|
|
|
barrier_index=(module->num_nb_barriers_completed%
|
2008-04-10 17:20:52 +00:00
|
|
|
module->sm2_module_num_memory_banks);
|
|
|
|
|
2008-02-15 00:57:44 +00:00
|
|
|
rc=mca_coll_sm2_nbbarrier_intra_progress(module->module_comm,
|
2008-04-10 17:20:52 +00:00
|
|
|
&(module->barrier_request[barrier_index]),
|
2008-07-28 22:40:57 +00:00
|
|
|
(mca_coll_base_module_t *)module);
|
2008-02-15 00:57:44 +00:00
|
|
|
if( OMPI_SUCCESS != rc ) {
|
2008-04-10 17:20:52 +00:00
|
|
|
return rc;
|
2008-02-15 00:57:44 +00:00
|
|
|
}
|
2008-04-10 17:20:52 +00:00
|
|
|
|
2008-02-15 00:57:44 +00:00
|
|
|
/* if barrier is completed, transition it to inactive, and point to
|
|
|
|
* the request object for then next bank
|
|
|
|
*/
|
|
|
|
if ( NB_BARRIER_DONE ==
|
2008-04-10 17:20:52 +00:00
|
|
|
module->barrier_request[barrier_index].sm2_barrier_phase ) {
|
|
|
|
|
2008-02-15 00:57:44 +00:00
|
|
|
/* set request to inactive */
|
2008-04-10 17:20:52 +00:00
|
|
|
module->barrier_request[barrier_index].sm2_barrier_phase=
|
|
|
|
NB_BARRIER_INACTIVE;
|
|
|
|
module->num_nb_barriers_completed++;
|
|
|
|
/* change pointer to the shared data structure to use next time */
|
|
|
|
module->barrier_request[barrier_index].sm_index^=1;
|
2008-02-15 00:57:44 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-04-10 17:20:52 +00:00
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* allocate working buffer */
|
|
|
|
sm_work_buffer_t *alloc_sm2_shared_buffer(mca_coll_sm2_module_t *module)
|
|
|
|
{
|
|
|
|
/* local variables */
|
|
|
|
int rc,buffer_index;
|
|
|
|
|
|
|
|
/* progress active barrier */
|
|
|
|
rc=progress_nb_barrier(module);
|
|
|
|
if( OMPI_SUCCESS != rc ) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2008-02-15 00:57:44 +00:00
|
|
|
/* get next buffer index */
|
|
|
|
module->sm2_allocated_buffer_index++;
|
|
|
|
|
|
|
|
/* check for wrap-around */
|
2008-04-10 17:20:52 +00:00
|
|
|
if( module->sm2_allocated_buffer_index == module->sm2_module_num_buffers ) {
|
2008-02-15 00:57:44 +00:00
|
|
|
module->sm2_allocated_buffer_index=0;
|
|
|
|
}
|
|
|
|
|
2008-04-01 22:56:18 +00:00
|
|
|
/* If this is the first buffer in the bank, see if the barrier
|
|
|
|
* needs to be completed
|
|
|
|
*/
|
2008-04-10 17:20:52 +00:00
|
|
|
buffer_index=module->sm2_allocated_buffer_index;
|
|
|
|
if( buffer_index ==
|
|
|
|
module->sm_buffer_descriptor[buffer_index].
|
|
|
|
index_first_buffer_in_bank ) {
|
|
|
|
/* are there incomplete barriers ? */
|
|
|
|
int num_incomlete_barriers=module->num_nb_barriers_started -
|
|
|
|
module->num_nb_barriers_completed;
|
|
|
|
|
|
|
|
/* only complete the one we want to use. If there are less than
|
|
|
|
* module->sm2_module_num_memory_banks active banks, not need to
|
|
|
|
* worry about completion, as completion is ordered.
|
|
|
|
*/
|
|
|
|
while( num_incomlete_barriers == module->sm2_module_num_memory_banks ) {
|
|
|
|
rc=progress_nb_barrier(module);
|
|
|
|
if( OMPI_SUCCESS != rc ) {
|
2008-04-19 01:34:04 +00:00
|
|
|
return NULL;
|
2008-02-15 00:57:44 +00:00
|
|
|
}
|
2008-04-10 17:20:52 +00:00
|
|
|
num_incomlete_barriers=module->num_nb_barriers_started -
|
|
|
|
module->num_nb_barriers_completed;
|
2008-02-15 00:57:44 +00:00
|
|
|
}
|
2008-04-01 22:56:18 +00:00
|
|
|
|
2008-04-10 17:20:52 +00:00
|
|
|
} /* end pooling waiting to be able to use the memory bank */
|
2008-02-15 00:57:44 +00:00
|
|
|
|
|
|
|
|
2008-03-15 13:31:21 +00:00
|
|
|
return &(module->sm_buffer_descriptor[buffer_index]);
|
2008-02-15 00:57:44 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/* free working buffer - it is assumed that buffers are released in
|
|
|
|
* the order they are allocated. We can assume this because each
|
|
|
|
* communiator will have only one outstanding collective at a given
|
|
|
|
* time, and we ensure that operations are completed in order. */
|
|
|
|
int free_sm2_shared_buffer(mca_coll_sm2_module_t *module)
|
|
|
|
{
|
|
|
|
/* local variables */
|
2008-04-10 17:20:52 +00:00
|
|
|
int rc,buffer_index;
|
2008-02-15 00:57:44 +00:00
|
|
|
mca_coll_sm2_nb_request_process_private_mem_t *request;
|
|
|
|
|
2008-04-10 17:20:52 +00:00
|
|
|
/* progress active barrier */
|
|
|
|
rc=progress_nb_barrier(module);
|
|
|
|
if( OMPI_SUCCESS != rc ) {
|
|
|
|
return rc;
|
|
|
|
}
|
2008-02-15 00:57:44 +00:00
|
|
|
|
|
|
|
/* get next buffer index */
|
|
|
|
module->sm2_freed_buffer_index++;
|
2008-04-01 22:56:18 +00:00
|
|
|
/* check for wrap-around */
|
|
|
|
if( module->sm2_freed_buffer_index == module->sm2_module_num_buffers ) {
|
|
|
|
module->sm2_freed_buffer_index=0;
|
|
|
|
}
|
2008-02-15 00:57:44 +00:00
|
|
|
|
2008-04-10 17:20:52 +00:00
|
|
|
buffer_index=module->sm2_freed_buffer_index;
|
|
|
|
if( buffer_index ==
|
|
|
|
module->sm_buffer_descriptor[buffer_index].
|
|
|
|
index_last_buffer_in_bank ) {
|
|
|
|
int barrier_index=module->
|
|
|
|
sm_buffer_descriptor[buffer_index].bank_index;
|
2008-02-15 00:57:44 +00:00
|
|
|
|
2008-04-01 22:56:18 +00:00
|
|
|
/* start non-blocking barrier */
|
2008-04-10 17:20:52 +00:00
|
|
|
request=&(module->barrier_request[barrier_index]);
|
2008-02-15 00:57:44 +00:00
|
|
|
rc=mca_coll_sm2_nbbarrier_intra(module->module_comm,
|
2008-07-28 22:40:57 +00:00
|
|
|
request,(mca_coll_base_module_t *)module);
|
2008-02-15 00:57:44 +00:00
|
|
|
if( OMPI_SUCCESS !=rc ) {
|
|
|
|
return rc;
|
|
|
|
}
|
2008-04-10 17:20:52 +00:00
|
|
|
module->num_nb_barriers_started++;
|
2008-04-10 17:32:04 +00:00
|
|
|
/* the mca_coll_sm2_nbbarrier_intra never completes the barrier,
|
|
|
|
* so no need to check. This is needed for order completion.
|
|
|
|
*/
|
2008-02-15 00:57:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* return */
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
OBJ_CLASS_INSTANCE(mca_coll_sm2_module_t,
|
2008-07-28 22:40:57 +00:00
|
|
|
mca_coll_base_module_t,
|
2008-02-15 00:57:44 +00:00
|
|
|
mca_coll_sm2_module_construct,
|
|
|
|
mca_coll_sm2_module_destruct);
|