2012-08-16 23:11:35 +04:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
|
|
|
|
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
|
2013-01-28 03:25:10 +04:00
|
|
|
* Copyright (c) 2012 Los Alamos National Security, LLC.
|
|
|
|
* All rights reserved.
|
2012-08-16 23:11:35 +04:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @file
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "ompi_config.h"
|
|
|
|
#include "mpi.h"
|
|
|
|
#include "ompi/constants.h"
|
|
|
|
#include "ompi/communicator/communicator.h"
|
|
|
|
#include "ompi/mca/mpool/base/base.h"
|
|
|
|
#include "ompi/mca/bcol/bcol.h"
|
|
|
|
#include "ompi/mca/bcol/base/base.h"
|
2013-02-06 01:52:55 +04:00
|
|
|
#include "ompi/patterns/comm/coll_ops.h"
|
2012-08-16 23:11:35 +04:00
|
|
|
|
|
|
|
#include "opal/class/opal_object.h"
|
|
|
|
#include "opal/dss/dss.h"
|
|
|
|
|
|
|
|
#include "bcol_basesmuma.h"
|
|
|
|
|
|
|
|
int base_bcol_basesmuma_setup_ctl_struct(
|
|
|
|
mca_bcol_basesmuma_module_t *sm_bcol_module,
|
|
|
|
mca_bcol_basesmuma_component_t *cs,
|
|
|
|
sm_buffer_mgmt *ctl_mgmt);
|
|
|
|
|
|
|
|
/* this is the new one, uses the pml allgather */
|
|
|
|
int base_bcol_basesmuma_exchange_offsets(
|
|
|
|
mca_bcol_basesmuma_module_t *sm_bcol_module,
|
|
|
|
void **result_array, uint64_t mem_offset, int loop_limit,
|
|
|
|
int leading_dim)
|
|
|
|
{
|
|
|
|
int ret=OMPI_SUCCESS,i;
|
|
|
|
int count;
|
|
|
|
int index_in_group;
|
|
|
|
char *send_buff;
|
|
|
|
char *recv_buff;
|
|
|
|
uint64_t rem_mem_offset;
|
|
|
|
|
|
|
|
/* malloc some memory */
|
|
|
|
count = sizeof(uint64_t) + sizeof(int);
|
|
|
|
send_buff = (char *) malloc(count);
|
|
|
|
recv_buff = (char *) malloc(count *
|
|
|
|
sm_bcol_module->super.sbgp_partner_module->group_size);
|
|
|
|
/* exchange the base pointer for the controls structures - gather
|
|
|
|
* every one else's infromation.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/* pack the offset of the allocated region */
|
|
|
|
memcpy((void *) send_buff, (void *) &(sm_bcol_module->super.sbgp_partner_module->my_index), sizeof(int));
|
|
|
|
memcpy((void *) (send_buff+ sizeof(int)), (void *) &(mem_offset), sizeof(uint64_t));
|
|
|
|
|
|
|
|
/* get the offsets from all procs, so can setup the control data
|
|
|
|
* structures.
|
|
|
|
*/
|
|
|
|
|
|
|
|
ret=comm_allgather_pml((void *) send_buff,(void *) recv_buff,count,
|
|
|
|
MPI_BYTE,
|
|
|
|
sm_bcol_module->super.sbgp_partner_module->my_index,
|
|
|
|
sm_bcol_module->super.sbgp_partner_module->group_size,
|
|
|
|
sm_bcol_module->super.sbgp_partner_module->group_list,
|
|
|
|
sm_bcol_module->super.sbgp_partner_module->group_comm);
|
|
|
|
if( OMPI_SUCCESS != ret ) {
|
|
|
|
goto ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* get the control stucture offsets within the shared memory
|
|
|
|
* region and populate the control structures - we do not assume
|
|
|
|
* any symmetry in memory layout of each process
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* loop over the procs in the group */
|
|
|
|
for(i = 0; i < sm_bcol_module->super.sbgp_partner_module->group_size; i++){
|
|
|
|
int array_id;
|
|
|
|
/* get this peer's index in the group */
|
|
|
|
memcpy((void *) &index_in_group, (void *) (recv_buff + i*count) , sizeof(int));
|
|
|
|
|
|
|
|
/* get the offset */
|
|
|
|
memcpy((void *) &rem_mem_offset, (void *) (recv_buff + i*count + sizeof(int)), sizeof(uint64_t));
|
|
|
|
|
|
|
|
array_id=SM_ARRAY_INDEX(leading_dim,0,index_in_group);
|
2013-04-03 21:37:02 +04:00
|
|
|
result_array[array_id]=(void *)(uintptr_t)rem_mem_offset;
|
2012-08-16 23:11:35 +04:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
ERROR:
|
|
|
|
/* clean up */
|
|
|
|
if( NULL != send_buff ) {
|
|
|
|
free(send_buff);
|
|
|
|
send_buff = NULL;
|
|
|
|
}
|
|
|
|
if( NULL != recv_buff ) {
|
|
|
|
free(recv_buff);
|
|
|
|
recv_buff = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
int base_bcol_basesmuma_exchange_offsets(
|
|
|
|
mca_bcol_basesmuma_module_t *sm_bcol_module,
|
|
|
|
void **result_array, uint64_t mem_offset, int loop_limit,
|
|
|
|
int leading_dim)
|
|
|
|
{
|
|
|
|
int ret=OMPI_SUCCESS,i,dummy;
|
|
|
|
int index_in_group, pcnt;
|
|
|
|
opal_list_t peers;
|
2013-01-28 03:25:10 +04:00
|
|
|
ompi_namelist_t *peer;
|
2012-08-16 23:11:35 +04:00
|
|
|
ompi_proc_t *proc_temp, *my_id;
|
|
|
|
opal_buffer_t *send_buffer = OBJ_NEW(opal_buffer_t);
|
|
|
|
opal_buffer_t *recv_buffer = OBJ_NEW(opal_buffer_t);
|
|
|
|
uint64_t rem_mem_offset;
|
|
|
|
|
|
|
|
/* exchange the base pointer for the controls structures - gather
|
|
|
|
* every one else's infromation.
|
|
|
|
*/
|
|
|
|
/* get list of procs that will participate in the communication */
|
|
|
|
OBJ_CONSTRUCT(&peers, opal_list_t);
|
|
|
|
for (i = 0; i < sm_bcol_module->super.sbgp_partner_module->group_size; i++) {
|
|
|
|
/* get the proc info */
|
|
|
|
proc_temp = ompi_comm_peer_lookup(
|
|
|
|
sm_bcol_module->super.sbgp_partner_module->group_comm,
|
|
|
|
sm_bcol_module->super.sbgp_partner_module->group_list[i]);
|
2013-01-28 03:25:10 +04:00
|
|
|
peer = OBJ_NEW(ompi_namelist_t);
|
2012-08-16 23:11:35 +04:00
|
|
|
peer->name.jobid = proc_temp->proc_name.jobid;
|
|
|
|
peer->name.vpid = proc_temp->proc_name.vpid;
|
2013-01-28 03:25:10 +04:00
|
|
|
opal_list_append(&peers,&peer->super); /* this is with the new field called "super" in ompi_namelist_t struct */
|
2012-08-16 23:11:35 +04:00
|
|
|
}
|
|
|
|
/* pack up the data into the allgather send buffer */
|
|
|
|
if (NULL == send_buffer || NULL == recv_buffer) {
|
|
|
|
fprintf(stderr,"Cannot allocate memory for sbuffer or rbuffer \n");
|
|
|
|
fflush(stderr);
|
|
|
|
ret = OMPI_ERROR;
|
|
|
|
goto ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* get my proc information */
|
|
|
|
my_id = ompi_proc_local();
|
|
|
|
|
|
|
|
/* pack my information */
|
|
|
|
ret = opal_dss.pack(send_buffer,
|
|
|
|
&(sm_bcol_module->super.sbgp_partner_module->my_index),1,OPAL_UINT32);
|
|
|
|
|
2013-01-28 03:25:10 +04:00
|
|
|
if (OMPI_SUCCESS != ret) {
|
2012-08-16 23:11:35 +04:00
|
|
|
goto ERROR;
|
|
|
|
fprintf(stderr,"ORTE error packing my_index!!\n");
|
|
|
|
fflush(stderr);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* pack the offset of the allocated region */
|
|
|
|
ret = opal_dss.pack(send_buffer,&(mem_offset),1,OPAL_UINT64);
|
2013-01-28 03:25:10 +04:00
|
|
|
if (OMPI_SUCCESS != ret) {
|
2012-08-16 23:11:35 +04:00
|
|
|
goto ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* get the offsets from all procs, so can setup the control data
|
|
|
|
* structures.
|
|
|
|
*/
|
2013-01-28 03:25:10 +04:00
|
|
|
if (OMPI_SUCCESS != (ret = ompi_rte_allgather_list(&peers, send_buffer, recv_buffer))) {
|
|
|
|
fprintf(stderr,"ompi_rte_allgather_list returned error %d\n", ret);
|
2012-08-16 23:11:35 +04:00
|
|
|
fflush(stderr);
|
|
|
|
goto ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* unpack the dummy */
|
|
|
|
pcnt=1;
|
|
|
|
ret = opal_dss.unpack(recv_buffer,&dummy, &pcnt, OPAL_INT32);
|
2013-01-28 03:25:10 +04:00
|
|
|
if (OMPI_SUCCESS != ret) {
|
2012-08-16 23:11:35 +04:00
|
|
|
fprintf(stderr,"unpack returned error %d for dummy \n",ret);
|
|
|
|
fflush(stderr);
|
|
|
|
goto ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* get the control stucture offsets within the shared memory
|
|
|
|
* region and populate the control structures - we do not assume
|
|
|
|
* any symmetry in memory layout of each process
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* loop over the procs in the group */
|
|
|
|
for(i = 0; i < sm_bcol_module->super.sbgp_partner_module->group_size; i++){
|
|
|
|
int array_id;
|
|
|
|
pcnt=1;
|
|
|
|
ret = opal_dss.unpack(recv_buffer,&index_in_group, &pcnt, OPAL_UINT32);
|
2013-01-28 03:25:10 +04:00
|
|
|
if (OMPI_SUCCESS != ret) {
|
2012-08-16 23:11:35 +04:00
|
|
|
fprintf(stderr,"unpack returned error %d for remote index_in_group \n",ret);
|
|
|
|
fflush(stderr);
|
|
|
|
goto ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* get the offset */
|
|
|
|
pcnt=1;
|
|
|
|
ret = opal_dss.unpack(recv_buffer,&rem_mem_offset, &pcnt, OPAL_UINT64);
|
2013-01-28 03:25:10 +04:00
|
|
|
if (OMPI_SUCCESS != ret) {
|
2012-08-16 23:11:35 +04:00
|
|
|
fprintf(stderr,"unpack returned error %d for remote memory offset \n",ret);
|
|
|
|
fflush(stderr);
|
|
|
|
goto ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
array_id=SM_ARRAY_INDEX(leading_dim,0,index_in_group);
|
|
|
|
result_array[array_id]=(void *)rem_mem_offset;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/* clean up */
|
2013-01-28 03:25:10 +04:00
|
|
|
peer=(ompi_namelist_t *)opal_list_remove_first(&peers);
|
2012-08-16 23:11:35 +04:00
|
|
|
while( NULL !=peer) {
|
|
|
|
OBJ_RELEASE(peer);
|
2013-01-28 03:25:10 +04:00
|
|
|
peer=(ompi_namelist_t *)opal_list_remove_first(&peers);
|
2012-08-16 23:11:35 +04:00
|
|
|
}
|
|
|
|
OBJ_DESTRUCT(&peers);
|
|
|
|
if( send_buffer ) {
|
|
|
|
OBJ_RELEASE(send_buffer);
|
|
|
|
}
|
|
|
|
if( recv_buffer ) {
|
|
|
|
OBJ_RELEASE(recv_buffer);
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
ERROR:
|
|
|
|
|
|
|
|
/* free peer list */
|
2013-01-28 03:25:10 +04:00
|
|
|
peer=(ompi_namelist_t *)opal_list_remove_first(&peers);
|
2012-08-16 23:11:35 +04:00
|
|
|
while( NULL !=peer) {
|
|
|
|
OBJ_RELEASE(peer);
|
2013-01-28 03:25:10 +04:00
|
|
|
peer=(ompi_namelist_t *)opal_list_remove_first(&peers);
|
2012-08-16 23:11:35 +04:00
|
|
|
}
|
|
|
|
OBJ_DESTRUCT(&peers);
|
|
|
|
if( send_buffer ) {
|
|
|
|
OBJ_RELEASE(send_buffer);
|
|
|
|
}
|
|
|
|
if( recv_buffer ) {
|
|
|
|
OBJ_RELEASE(recv_buffer);
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
static int base_bcol_basesmuma_exchange_ctl_params(
|
|
|
|
mca_bcol_basesmuma_module_t *sm_bcol_module,
|
|
|
|
mca_bcol_basesmuma_component_t *cs,
|
|
|
|
sm_buffer_mgmt *ctl_mgmt, list_data_t *data_blk)
|
|
|
|
{
|
|
|
|
int ret=OMPI_SUCCESS,i,loop_limit;
|
|
|
|
int leading_dim, buf_id;
|
|
|
|
uint64_t mem_offset;
|
|
|
|
unsigned char *base_ptr;
|
|
|
|
mca_bcol_basesmuma_ctl_struct_t *ctl_ptr;
|
|
|
|
|
|
|
|
/* data block base offset in the mapped file */
|
2013-04-03 21:37:02 +04:00
|
|
|
mem_offset=(uint64_t)(uintptr_t)(data_blk->data)-
|
|
|
|
(uint64_t)(uintptr_t)cs->sm_ctl_structs->data_addr;
|
2012-08-16 23:11:35 +04:00
|
|
|
|
|
|
|
/* number of buffers in data block */
|
|
|
|
loop_limit=cs->basesmuma_num_mem_banks+ctl_mgmt->number_of_buffs;
|
|
|
|
leading_dim=ctl_mgmt->size_of_group;
|
|
|
|
ret=comm_allgather_pml(&mem_offset,ctl_mgmt->ctl_buffs,1,
|
|
|
|
MPI_LONG_LONG_INT,
|
|
|
|
sm_bcol_module->super.sbgp_partner_module->my_index,
|
|
|
|
sm_bcol_module->super.sbgp_partner_module->group_size,
|
|
|
|
sm_bcol_module->super.sbgp_partner_module->group_list,
|
|
|
|
sm_bcol_module->super.sbgp_partner_module->group_comm);
|
|
|
|
if( OMPI_SUCCESS != ret ) {
|
|
|
|
goto ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
ret=base_bcol_basesmuma_exchange_offsets( sm_bcol_module,
|
|
|
|
(void **)ctl_mgmt->ctl_buffs, mem_offset, loop_limit, leading_dim);
|
|
|
|
if( OMPI_SUCCESS != ret ) {
|
|
|
|
goto ERROR;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* convert memory offset to virtual address in current rank */
|
|
|
|
for (i=0;i< sm_bcol_module->super.sbgp_partner_module->group_size;i++) {
|
|
|
|
|
|
|
|
/* get the base pointer */
|
|
|
|
int array_id=SM_ARRAY_INDEX(leading_dim,0,i);
|
|
|
|
if( i == sm_bcol_module->super.sbgp_partner_module->my_index) {
|
|
|
|
/* me */
|
|
|
|
base_ptr=cs->sm_ctl_structs->map_addr;
|
|
|
|
} else {
|
|
|
|
base_ptr=sm_bcol_module->ctl_backing_files_info[i]->sm_mmap->map_addr;
|
|
|
|
}
|
|
|
|
ctl_mgmt->ctl_buffs[array_id]=(void *)
|
2013-04-03 21:37:02 +04:00
|
|
|
(uintptr_t)(((uint64_t)(uintptr_t)ctl_mgmt->ctl_buffs[array_id])+(uint64_t)(uintptr_t)base_ptr);
|
2012-08-16 23:11:35 +04:00
|
|
|
for( buf_id = 1 ; buf_id < loop_limit ; buf_id++ ) {
|
|
|
|
int array_id_m1=SM_ARRAY_INDEX(leading_dim,(buf_id-1),i);
|
|
|
|
array_id=SM_ARRAY_INDEX(leading_dim,buf_id,i);
|
2013-04-03 21:37:02 +04:00
|
|
|
ctl_mgmt->ctl_buffs[array_id]=(void *) (uintptr_t)((uint64_t)(uintptr_t)(ctl_mgmt->ctl_buffs[array_id_m1])+
|
|
|
|
(uint64_t)(uintptr_t)sizeof(mca_bcol_basesmuma_ctl_struct_t));
|
2012-08-16 23:11:35 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
/* initialize my control structues */
|
|
|
|
for( buf_id = 0 ; buf_id < loop_limit ; buf_id++ ) {
|
|
|
|
|
|
|
|
int my_idx=sm_bcol_module->super.sbgp_partner_module->my_index;
|
|
|
|
int array_id=SM_ARRAY_INDEX(leading_dim,buf_id,my_idx);
|
|
|
|
ctl_ptr = (mca_bcol_basesmuma_ctl_struct_t *)
|
|
|
|
ctl_mgmt->ctl_buffs[array_id];
|
|
|
|
|
|
|
|
/* initialize the data structures - RLG, this is only one data
|
|
|
|
* structure that needs to be initialized, more are missing */
|
|
|
|
ctl_ptr->sequence_number=-1;
|
|
|
|
ctl_ptr->flag=-1;
|
|
|
|
ctl_ptr->index=0;
|
|
|
|
ctl_ptr->src_ptr = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
ERROR:
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int base_bcol_basesmuma_setup_ctl_struct(
|
|
|
|
mca_bcol_basesmuma_module_t *sm_bcol_module,
|
|
|
|
mca_bcol_basesmuma_component_t *cs,
|
|
|
|
sm_buffer_mgmt *ctl_mgmt)
|
|
|
|
{
|
|
|
|
int ret=OMPI_SUCCESS,i,n_ctl,n_levels;
|
|
|
|
int n_ctl_structs;
|
|
|
|
int cnt;
|
|
|
|
size_t malloc_size;
|
|
|
|
bcol_basesmuma_smcm_file_t input_file;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* set my no user-data conrol structures
|
|
|
|
*/
|
|
|
|
/* number of banks and regions per bank are already a power of 2 */
|
|
|
|
n_ctl_structs=cs->basesmuma_num_mem_banks*
|
|
|
|
cs->basesmuma_num_regions_per_bank;
|
|
|
|
|
|
|
|
/* initialize the control structure management struct -
|
|
|
|
* for collectives without user data
|
|
|
|
*---------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
ctl_mgmt->number_of_buffs=n_ctl_structs;
|
|
|
|
ctl_mgmt->num_mem_banks=
|
|
|
|
cs->basesmuma_num_mem_banks;
|
|
|
|
|
|
|
|
ctl_mgmt->num_buffs_per_mem_bank=
|
|
|
|
cs->basesmuma_num_regions_per_bank;
|
|
|
|
ctl_mgmt->size_of_group=
|
|
|
|
sm_bcol_module->super.sbgp_partner_module->group_size;
|
|
|
|
roundup_to_power_radix(2,cs->basesmuma_num_regions_per_bank,&n_levels);
|
|
|
|
ctl_mgmt->log2_num_buffs_per_mem_bank=n_levels;
|
|
|
|
|
|
|
|
roundup_to_power_radix(2,n_ctl_structs,&n_levels);
|
|
|
|
ctl_mgmt->log2_number_of_buffs=n_levels;
|
|
|
|
ctl_mgmt->mask=n_ctl_structs-1;
|
|
|
|
sm_bcol_module->super.n_poll_loops=cs->n_poll_loops;
|
|
|
|
|
|
|
|
malloc_size=
|
|
|
|
(ctl_mgmt->number_of_buffs +
|
|
|
|
ctl_mgmt->num_mem_banks ) *
|
|
|
|
ctl_mgmt->size_of_group *
|
|
|
|
sizeof(void *);
|
|
|
|
ctl_mgmt->ctl_buffs= malloc(malloc_size);
|
|
|
|
if( !ctl_mgmt->ctl_buffs ) {
|
|
|
|
ret=OMPI_ERR_OUT_OF_RESOURCE;
|
|
|
|
goto ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* exchange remote addressing information */
|
|
|
|
input_file.file_name=cs->sm_ctl_structs->map_path;
|
|
|
|
input_file.size=cs->sm_ctl_structs->map_size;
|
|
|
|
input_file.size_ctl_structure=0;
|
|
|
|
input_file.data_seg_alignment=CACHE_LINE_SIZE;
|
|
|
|
input_file.mpool_size=cs->sm_ctl_structs->map_size;
|
|
|
|
ret=bcol_basesmuma_smcm_allgather_connection(
|
|
|
|
sm_bcol_module,
|
|
|
|
sm_bcol_module->super.sbgp_partner_module,
|
|
|
|
&(cs->sm_connections_list),
|
|
|
|
&(sm_bcol_module->ctl_backing_files_info),
|
|
|
|
sm_bcol_module->super.sbgp_partner_module->group_comm,
|
|
|
|
input_file, cs->clt_base_fname,
|
|
|
|
false);
|
|
|
|
|
|
|
|
/* fill in the pointer to other ranks scartch shared memory */
|
|
|
|
sm_bcol_module->shared_memory_scratch_space=
|
|
|
|
malloc(sizeof(void *)*
|
|
|
|
sm_bcol_module->super.sbgp_partner_module->group_size);
|
|
|
|
if( !sm_bcol_module->shared_memory_scratch_space ) {
|
|
|
|
fprintf(stderr,"Cannot allocate memory for shared_memory_scratch_space. \n");
|
|
|
|
fflush(stderr);
|
|
|
|
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
|
|
|
goto ERROR;
|
|
|
|
}
|
|
|
|
for(i=0 ; i < sm_bcol_module->super.sbgp_partner_module->group_size ; i++ )
|
|
|
|
{
|
|
|
|
if(i ==
|
|
|
|
sm_bcol_module->super.sbgp_partner_module->my_index) {
|
|
|
|
/* local file data is not cached in thi slist */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
sm_bcol_module->shared_memory_scratch_space[i]=(void *)(
|
|
|
|
(char *)(sm_bcol_module->ctl_backing_files_info[i]->sm_mmap)+
|
|
|
|
cs->scratch_offset_from_base_ctl_file);
|
|
|
|
}
|
|
|
|
i=sm_bcol_module->super.sbgp_partner_module->my_index;
|
|
|
|
sm_bcol_module->shared_memory_scratch_space[i]=(void *)(
|
|
|
|
(char *)(cs->sm_ctl_structs->map_addr)+cs->scratch_offset_from_base_ctl_file);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* setup the no-data buffer managment data
|
|
|
|
*/
|
|
|
|
n_ctl=ctl_mgmt->num_mem_banks;
|
|
|
|
ctl_mgmt->ctl_buffs_mgmt=(mem_bank_management_t *)
|
|
|
|
malloc(sizeof(mem_bank_management_t)*n_ctl);
|
|
|
|
if( !ctl_mgmt->ctl_buffs_mgmt ) {
|
|
|
|
fprintf(stderr,"Cannot allocate memory for ctl_buffs_mgmt. ret = %d \n",ret);
|
|
|
|
fflush(stderr);
|
|
|
|
ret = OMPI_ERROR;
|
|
|
|
goto ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* initialize each individual element */
|
|
|
|
cnt=cs->basesmuma_num_regions_per_bank*cs->basesmuma_num_mem_banks;
|
|
|
|
for( i=0 ; i < n_ctl ; i++ ) {
|
|
|
|
opal_list_item_t *item;
|
|
|
|
opal_mutex_t *mutex_ptr;
|
|
|
|
ctl_mgmt->ctl_buffs_mgmt[i].bank_gen_counter= 0;
|
|
|
|
ctl_mgmt->ctl_buffs_mgmt[i].available_buffers=
|
|
|
|
ctl_mgmt->num_buffs_per_mem_bank;
|
|
|
|
ctl_mgmt->ctl_buffs_mgmt[i].number_of_buffers=
|
|
|
|
ctl_mgmt->num_buffs_per_mem_bank;
|
|
|
|
ctl_mgmt->ctl_buffs_mgmt[i].n_buffs_freed= 0;
|
|
|
|
mutex_ptr= &(ctl_mgmt->ctl_buffs_mgmt[i].mutex);
|
|
|
|
OBJ_CONSTRUCT(mutex_ptr, opal_mutex_t);
|
|
|
|
ctl_mgmt->ctl_buffs_mgmt[i].index_shared_mem_ctl_structs=i;
|
|
|
|
|
|
|
|
item=(opal_list_item_t *)&(ctl_mgmt->ctl_buffs_mgmt[i].nb_barrier_desc);
|
|
|
|
OBJ_CONSTRUCT(item,opal_list_item_t);
|
|
|
|
ctl_mgmt->ctl_buffs_mgmt[i].nb_barrier_desc.sm_module=
|
|
|
|
sm_bcol_module;
|
|
|
|
ctl_mgmt->ctl_buffs_mgmt[i].nb_barrier_desc.pool_index= i;
|
|
|
|
/* get the sm_buffer_mgmt pointer for the control structures */
|
|
|
|
ctl_mgmt->ctl_buffs_mgmt[i].nb_barrier_desc.coll_buff=ctl_mgmt;
|
|
|
|
ctl_mgmt->ctl_buffs_mgmt[i].nb_barrier_desc.ml_memory_block_descriptor=
|
|
|
|
NULL;
|
|
|
|
|
|
|
|
cnt++;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
ERROR:
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* this function initializes the internal scratch buffers and control
|
|
|
|
* structures that will be used by the module. It also intitializes
|
|
|
|
* the payload buffer management structures.
|
|
|
|
*/
|
|
|
|
int base_bcol_basesmuma_setup_library_buffers(
|
|
|
|
mca_bcol_basesmuma_module_t *sm_bcol_module,
|
|
|
|
mca_bcol_basesmuma_component_t *cs)
|
|
|
|
{
|
|
|
|
int ret=OMPI_SUCCESS,i;
|
|
|
|
int n_ctl_structs;
|
|
|
|
size_t ctl_segement_size,total_memory;
|
|
|
|
int max_elements;
|
|
|
|
unsigned char *data_ptr;
|
|
|
|
|
|
|
|
/* */
|
|
|
|
/* setup the control struct memory */
|
|
|
|
if(!cs->sm_ctl_structs) {
|
|
|
|
ret = mca_bcol_basesmuma_allocate_sm_ctl_memory(cs);
|
|
|
|
if(OMPI_SUCCESS != ret) {
|
|
|
|
fprintf(stderr,"In bcol_comm_query mca_bcol_basesmuma_allocate_sm_ctl_memory failed \n");
|
|
|
|
fflush(stderr);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* put the memory onto the free list - we have worried about
|
|
|
|
* alignment in the mpool allocation, and assume that the
|
|
|
|
* ctl structures have the approriate size to mantain alignment
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* figure out segment size */
|
|
|
|
n_ctl_structs=cs->basesmuma_num_mem_banks*
|
|
|
|
cs->basesmuma_num_regions_per_bank;
|
|
|
|
|
|
|
|
/* add memory for the control structure used for recycling the banks */
|
|
|
|
n_ctl_structs+=cs->basesmuma_num_mem_banks;
|
|
|
|
|
|
|
|
ctl_segement_size=n_ctl_structs*
|
|
|
|
sizeof(mca_bcol_basesmuma_ctl_struct_t);
|
|
|
|
|
|
|
|
total_memory=cs->sm_ctl_structs->map_size - (
|
|
|
|
(char *)(cs->sm_ctl_structs->data_addr)-
|
|
|
|
(char *)(cs->sm_ctl_structs->map_addr));
|
|
|
|
total_memory-=cs->my_scratch_shared_memory_size;
|
|
|
|
max_elements=total_memory/ctl_segement_size;
|
|
|
|
|
|
|
|
/* populate the free list */
|
|
|
|
data_ptr=cs->sm_ctl_structs->data_addr;
|
|
|
|
|
|
|
|
for( i=0 ; i < max_elements ; i++ ) {
|
|
|
|
list_data_t *item=OBJ_NEW(list_data_t);
|
|
|
|
if( !item ) {
|
|
|
|
ret=OMPI_ERR_OUT_OF_RESOURCE;
|
|
|
|
goto ERROR;
|
|
|
|
}
|
|
|
|
item->data=(void *)data_ptr;
|
|
|
|
opal_list_append(&(cs->ctl_structures),(opal_list_item_t *)item);
|
|
|
|
data_ptr+=ctl_segement_size;
|
|
|
|
}
|
|
|
|
/* set the scratch memory pointer and offset */
|
|
|
|
cs->my_scratch_shared_memory=(char *)data_ptr;
|
|
|
|
cs->scratch_offset_from_base_ctl_file=(size_t)
|
|
|
|
((char *)data_ptr-(char *)cs->sm_ctl_structs->map_addr);
|
|
|
|
|
|
|
|
|
|
|
|
/* At this stage the memory is mapped and ready to use by the local rank.
|
|
|
|
* However, the memory of other processes has not yet been mmaped into the
|
|
|
|
* memory of this process.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
/* intialize no_userdata_ctl */
|
|
|
|
sm_bcol_module->no_userdata_ctl=(list_data_t *)
|
|
|
|
opal_list_remove_last(&(cs->ctl_structures));
|
|
|
|
if( !sm_bcol_module->no_userdata_ctl) {
|
|
|
|
ret=OMPI_ERR_OUT_OF_RESOURCE;
|
|
|
|
goto ERROR;
|
|
|
|
}
|
|
|
|
ret=base_bcol_basesmuma_setup_ctl_struct(
|
|
|
|
sm_bcol_module, cs, &(sm_bcol_module->colls_no_user_data));
|
|
|
|
if( OMPI_SUCCESS != ret ) {
|
|
|
|
goto ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* intialize userdata_ctl */
|
|
|
|
sm_bcol_module->userdata_ctl=(list_data_t *)
|
|
|
|
opal_list_remove_last(&(cs->ctl_structures));
|
|
|
|
if( !sm_bcol_module->userdata_ctl) {
|
|
|
|
ret=OMPI_ERR_OUT_OF_RESOURCE;
|
|
|
|
goto ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret=base_bcol_basesmuma_setup_ctl_struct(
|
|
|
|
sm_bcol_module, cs, &(sm_bcol_module->colls_with_user_data));
|
|
|
|
if( OMPI_SUCCESS != ret ) {
|
|
|
|
goto ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* used for blocking recursive doubling barrier */
|
|
|
|
sm_bcol_module->index_blocking_barrier_memory_bank=0;
|
|
|
|
|
|
|
|
/* gather the offsets of the control structs relative to the base
|
|
|
|
* of the shared memory file, and fill in the table with the
|
|
|
|
* address of all the control structues.
|
|
|
|
*/
|
|
|
|
ret= base_bcol_basesmuma_exchange_ctl_params(sm_bcol_module, cs,
|
|
|
|
&(sm_bcol_module->colls_no_user_data),sm_bcol_module->no_userdata_ctl);
|
|
|
|
if( OMPI_SUCCESS != ret ) {
|
|
|
|
goto ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret= base_bcol_basesmuma_exchange_ctl_params(sm_bcol_module, cs,
|
|
|
|
&(sm_bcol_module->colls_with_user_data),sm_bcol_module->userdata_ctl);
|
|
|
|
if( OMPI_SUCCESS != ret ) {
|
|
|
|
goto ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
ERROR:
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
OBJ_CLASS_INSTANCE(list_data_t,
|
|
|
|
opal_list_item_t, NULL, NULL);
|