coll/ml: cleanup tabs, indentation, and trailing whitespace in
bcol_basesmuma_bcast.c This commit was SVN r31192.
Этот коммит содержится в:
родитель
d241f95af1
Коммит
128cfe0a39
@ -37,19 +37,19 @@
|
|||||||
/* end debug */
|
/* end debug */
|
||||||
int bcol_basesmuma_bcast_init(mca_bcol_base_module_t *super)
|
int bcol_basesmuma_bcast_init(mca_bcol_base_module_t *super)
|
||||||
{
|
{
|
||||||
mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
|
mca_bcol_base_coll_fn_comm_attributes_t comm_attribs;
|
||||||
mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
|
mca_bcol_base_coll_fn_invoke_attributes_t inv_attribs;
|
||||||
|
|
||||||
comm_attribs.bcoll_type = BCOL_BCAST;
|
comm_attribs.bcoll_type = BCOL_BCAST;
|
||||||
comm_attribs.comm_size_min = 0;
|
comm_attribs.comm_size_min = 0;
|
||||||
comm_attribs.comm_size_max = 1048576;
|
comm_attribs.comm_size_max = 1048576;
|
||||||
comm_attribs.data_src = DATA_SRC_KNOWN;
|
comm_attribs.data_src = DATA_SRC_KNOWN;
|
||||||
comm_attribs.waiting_semantics = NON_BLOCKING;
|
comm_attribs.waiting_semantics = NON_BLOCKING;
|
||||||
|
|
||||||
inv_attribs.bcol_msg_min = 0;
|
inv_attribs.bcol_msg_min = 0;
|
||||||
inv_attribs.bcol_msg_max = 20000; /* range 1 */
|
inv_attribs.bcol_msg_max = 20000; /* range 1 */
|
||||||
inv_attribs.datatype_bitmap = 0xffffffff;
|
inv_attribs.datatype_bitmap = 0xffffffff;
|
||||||
inv_attribs.op_types_bitmap = 0xffffffff;
|
inv_attribs.op_types_bitmap = 0xffffffff;
|
||||||
|
|
||||||
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
|
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
|
||||||
bcol_basesmuma_bcast_k_nomial_knownroot,
|
bcol_basesmuma_bcast_k_nomial_knownroot,
|
||||||
@ -67,49 +67,47 @@ int bcol_basesmuma_bcast_init(mca_bcol_base_module_t *super)
|
|||||||
inv_attribs.bcol_msg_max = 20000; /* range 1 */
|
inv_attribs.bcol_msg_max = 20000; /* range 1 */
|
||||||
|
|
||||||
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
|
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
|
||||||
bcol_basesmuma_bcast_k_nomial_anyroot,
|
bcol_basesmuma_bcast_k_nomial_anyroot,
|
||||||
bcol_basesmuma_bcast_k_nomial_anyroot);
|
bcol_basesmuma_bcast_k_nomial_anyroot);
|
||||||
|
|
||||||
comm_attribs.data_src = DATA_SRC_UNKNOWN;
|
comm_attribs.data_src = DATA_SRC_UNKNOWN;
|
||||||
inv_attribs.bcol_msg_min = 10000000;
|
inv_attribs.bcol_msg_min = 10000000;
|
||||||
inv_attribs.bcol_msg_max = 10485760; /* range 4 */
|
inv_attribs.bcol_msg_max = 10485760; /* range 4 */
|
||||||
|
|
||||||
#ifdef __PORTALS_AVAIL__
|
#ifdef __PORTALS_AVAIL__
|
||||||
|
|
||||||
comm_attribs.waiting_semantics = BLOCKING;
|
comm_attribs.waiting_semantics = BLOCKING;
|
||||||
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
|
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
|
||||||
bcol_basesmuma_lmsg_scatter_allgather_portals_bcast,
|
bcol_basesmuma_lmsg_scatter_allgather_portals_bcast,
|
||||||
bcol_basesmuma_lmsg_scatter_allgather_portals_bcast);
|
bcol_basesmuma_lmsg_scatter_allgather_portals_bcast);
|
||||||
|
|
||||||
|
|
||||||
comm_attribs.waiting_semantics = NON_BLOCKING;
|
|
||||||
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
|
|
||||||
bcol_basesmuma_lmsg_scatter_allgather_portals_nb_bcast,
|
|
||||||
bcol_basesmuma_lmsg_scatter_allgather_portals_nb_bcast);
|
|
||||||
|
|
||||||
comm_attribs.data_src = DATA_SRC_KNOWN;
|
comm_attribs.waiting_semantics = NON_BLOCKING;
|
||||||
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
|
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
|
||||||
bcol_basesmuma_lmsg_scatter_allgather_portals_nb_knownroot_bcast,
|
bcol_basesmuma_lmsg_scatter_allgather_portals_nb_bcast,
|
||||||
bcol_basesmuma_lmsg_scatter_allgather_portals_nb_knownroot_bcast);
|
bcol_basesmuma_lmsg_scatter_allgather_portals_nb_bcast);
|
||||||
|
|
||||||
|
comm_attribs.data_src = DATA_SRC_KNOWN;
|
||||||
|
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
|
||||||
|
bcol_basesmuma_lmsg_scatter_allgather_portals_nb_knownroot_bcast,
|
||||||
|
bcol_basesmuma_lmsg_scatter_allgather_portals_nb_knownroot_bcast);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
/*
|
/*
|
||||||
if (super->use_hdl) {
|
if (super->use_hdl) {
|
||||||
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
|
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
|
||||||
bcol_basesmuma_hdl_zerocopy_bcast,
|
bcol_basesmuma_hdl_zerocopy_bcast,
|
||||||
bcol_basesmuma_hdl_zerocopy_bcast);
|
bcol_basesmuma_hdl_zerocopy_bcast);
|
||||||
} else { */
|
} else { */
|
||||||
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs,
|
mca_bcol_base_set_attributes(super, &comm_attribs, &inv_attribs, NULL, NULL);
|
||||||
NULL,
|
/*
|
||||||
NULL);
|
bcol_basesmuma_binary_scatter_allgather_segment,
|
||||||
/*
|
bcol_basesmuma_binary_scatter_allgather_segment);
|
||||||
bcol_basesmuma_binary_scatter_allgather_segment,
|
*/
|
||||||
bcol_basesmuma_binary_scatter_allgather_segment);
|
/* } */
|
||||||
*/
|
|
||||||
/* } */
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* includes shared memory optimization */
|
/* includes shared memory optimization */
|
||||||
@ -118,12 +116,12 @@ int bcol_basesmuma_bcast_init(mca_bcol_base_module_t *super)
|
|||||||
* Shared memory blocking Broadcast - fanin, for small data buffers.
|
* Shared memory blocking Broadcast - fanin, for small data buffers.
|
||||||
* This routine assumes that buf (the input buffer) is a single writer
|
* This routine assumes that buf (the input buffer) is a single writer
|
||||||
* multi reader (SWMR) shared memory buffer owned by the calling rank
|
* multi reader (SWMR) shared memory buffer owned by the calling rank
|
||||||
* which is the only rank that can write to this buffers.
|
* which is the only rank that can write to this buffers.
|
||||||
* It is also assumed that the buffers are registered and fragmented
|
* It is also assumed that the buffers are registered and fragmented
|
||||||
* at the ML level and that buf is sufficiently large to hold the data.
|
* at the ML level and that buf is sufficiently large to hold the data.
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* @param buf - SWMR shared buffer within a sbgp that the
|
* @param buf - SWMR shared buffer within a sbgp that the
|
||||||
* executing rank can write to.
|
* executing rank can write to.
|
||||||
* @param count - the number of elements in the shared buffer.
|
* @param count - the number of elements in the shared buffer.
|
||||||
* @param dtype - the datatype of a shared buffer element.
|
* @param dtype - the datatype of a shared buffer element.
|
||||||
@ -131,15 +129,15 @@ int bcol_basesmuma_bcast_init(mca_bcol_base_module_t *super)
|
|||||||
* @param module - basesmuma module.
|
* @param module - basesmuma module.
|
||||||
*/
|
*/
|
||||||
int bcol_basesmuma_bcast(bcol_function_args_t *input_args,
|
int bcol_basesmuma_bcast(bcol_function_args_t *input_args,
|
||||||
mca_bcol_base_function_t *c_input_args)
|
mca_bcol_base_function_t *c_input_args)
|
||||||
{
|
{
|
||||||
/* local variables */
|
/* local variables */
|
||||||
int group_size, process_shift, my_node_index;
|
int group_size, process_shift, my_node_index;
|
||||||
int my_rank;
|
int my_rank;
|
||||||
int rc = OMPI_SUCCESS;
|
int rc = OMPI_SUCCESS;
|
||||||
int my_fanout_parent;
|
int my_fanout_parent;
|
||||||
int leading_dim, buff_idx, idx;
|
int leading_dim, buff_idx, idx;
|
||||||
volatile int8_t ready_flag;
|
volatile int8_t ready_flag;
|
||||||
int count=input_args->count;
|
int count=input_args->count;
|
||||||
struct ompi_datatype_t* dtype=input_args->dtype;
|
struct ompi_datatype_t* dtype=input_args->dtype;
|
||||||
int root=input_args->root;
|
int root=input_args->root;
|
||||||
@ -156,9 +154,9 @@ int bcol_basesmuma_bcast(bcol_function_args_t *input_args,
|
|||||||
|
|
||||||
void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr );
|
void *data_addr = (void *)((unsigned char *)input_args->src_desc->data_addr );
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
fprintf(stderr,"Entering sm broadcast input_args->sbuf_offset %d \n",input_args->sbuf_offset);
|
fprintf(stderr,"Entering sm broadcast input_args->sbuf_offset %d \n",input_args->sbuf_offset);
|
||||||
fflush(stderr);
|
fflush(stderr);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
@ -168,7 +166,7 @@ int bcol_basesmuma_bcast(bcol_function_args_t *input_args,
|
|||||||
|
|
||||||
buff_idx = input_args->src_desc->buffer_index;
|
buff_idx = input_args->src_desc->buffer_index;
|
||||||
|
|
||||||
/* Get addressing information */
|
/* Get addressing information */
|
||||||
my_rank = bcol_module->super.sbgp_partner_module->my_index;
|
my_rank = bcol_module->super.sbgp_partner_module->my_index;
|
||||||
group_size = bcol_module->colls_no_user_data.size_of_group;
|
group_size = bcol_module->colls_no_user_data.size_of_group;
|
||||||
leading_dim=bcol_module->colls_no_user_data.size_of_group;
|
leading_dim=bcol_module->colls_no_user_data.size_of_group;
|
||||||
@ -184,20 +182,20 @@ int bcol_basesmuma_bcast(bcol_function_args_t *input_args,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* get my node for the bcast tree */
|
/* get my node for the bcast tree */
|
||||||
my_fanout_read_tree = &(bcol_module->fanout_read_tree[my_node_index]);
|
my_fanout_read_tree = &(bcol_module->fanout_read_tree[my_node_index]);
|
||||||
my_fanout_parent = my_fanout_read_tree->parent_rank + process_shift;
|
my_fanout_parent = my_fanout_read_tree->parent_rank + process_shift;
|
||||||
if(group_size <= my_fanout_parent){
|
if(group_size <= my_fanout_parent){
|
||||||
my_fanout_parent -= group_size;
|
my_fanout_parent -= group_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set pointer to current proc ctrl region */
|
/* Set pointer to current proc ctrl region */
|
||||||
/*my_ctl_pointer = ctl_structs[my_rank]; */
|
/*my_ctl_pointer = ctl_structs[my_rank]; */
|
||||||
my_ctl_pointer = data_buffs[my_rank].ctl_struct;
|
my_ctl_pointer = data_buffs[my_rank].ctl_struct;
|
||||||
|
|
||||||
/* setup resource recycling */
|
/* setup resource recycling */
|
||||||
|
|
||||||
BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id);
|
BASESMUMA_HEADER_INIT(my_ctl_pointer, ready_flag, sequence_number, bcol_id);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Fan out from root
|
* Fan out from root
|
||||||
*/
|
*/
|
||||||
@ -214,23 +212,23 @@ int bcol_basesmuma_bcast(bcol_function_args_t *input_args,
|
|||||||
* Get the parent's control buffer.
|
* Get the parent's control buffer.
|
||||||
*/
|
*/
|
||||||
parent_data_pointer = data_buffs[my_fanout_parent].payload;
|
parent_data_pointer = data_buffs[my_fanout_parent].payload;
|
||||||
parent_ctl_pointer = data_buffs[my_fanout_parent].ctl_struct;
|
parent_ctl_pointer = data_buffs[my_fanout_parent].ctl_struct;
|
||||||
|
|
||||||
/* Wait until parent signals that data is ready */
|
/* Wait until parent signals that data is ready */
|
||||||
/* The order of conditions checked in this loop is important, as it can
|
/* The order of conditions checked in this loop is important, as it can
|
||||||
* result in a race condition.
|
* result in a race condition.
|
||||||
*/
|
*/
|
||||||
while (!IS_PEER_READY(parent_ctl_pointer, ready_flag, sequence_number, BCAST_FLAG, bcol_id)){
|
while (!IS_PEER_READY(parent_ctl_pointer, ready_flag, sequence_number, BCAST_FLAG, bcol_id)){
|
||||||
opal_progress();
|
opal_progress();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Copy the rank to a shared buffer writable by the current rank */
|
/* Copy the rank to a shared buffer writable by the current rank */
|
||||||
memcpy(data_addr, (void *)parent_data_pointer, pack_len);
|
memcpy(data_addr, (void *)parent_data_pointer, pack_len);
|
||||||
|
|
||||||
if( 0 != rc ) {
|
if( 0 != rc ) {
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
}else{
|
}else{
|
||||||
input_args->result_in_rbuf = false;
|
input_args->result_in_rbuf = false;
|
||||||
/* Interior node */
|
/* Interior node */
|
||||||
@ -238,19 +236,19 @@ int bcol_basesmuma_bcast(bcol_function_args_t *input_args,
|
|||||||
/* Get parent payload data and control data */
|
/* Get parent payload data and control data */
|
||||||
parent_data_pointer = data_buffs[my_fanout_parent].payload;
|
parent_data_pointer = data_buffs[my_fanout_parent].payload;
|
||||||
parent_ctl_pointer = data_buffs[my_fanout_parent].ctl_struct;
|
parent_ctl_pointer = data_buffs[my_fanout_parent].ctl_struct;
|
||||||
|
|
||||||
|
|
||||||
/* Wait until parent signals that data is ready */
|
/* Wait until parent signals that data is ready */
|
||||||
/* The order of conditions checked in this loop is important, as it can
|
/* The order of conditions checked in this loop is important, as it can
|
||||||
* result in a race condition.
|
* result in a race condition.
|
||||||
*/
|
*/
|
||||||
while (!IS_PEER_READY(parent_ctl_pointer, ready_flag, sequence_number, BCAST_FLAG, bcol_id)){
|
while (!IS_PEER_READY(parent_ctl_pointer, ready_flag, sequence_number, BCAST_FLAG, bcol_id)){
|
||||||
opal_progress();
|
opal_progress();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Copy the rank to a shared buffer writable by the current rank */
|
/* Copy the rank to a shared buffer writable by the current rank */
|
||||||
memcpy(data_addr, (void *)parent_data_pointer,pack_len);
|
memcpy(data_addr, (void *)parent_data_pointer,pack_len);
|
||||||
|
|
||||||
/* Signal to children that they may read the data from my shared buffer */
|
/* Signal to children that they may read the data from my shared buffer */
|
||||||
opal_atomic_wmb ();
|
opal_atomic_wmb ();
|
||||||
my_ctl_pointer->flags[BCAST_FLAG][bcol_id] = ready_flag;
|
my_ctl_pointer->flags[BCAST_FLAG][bcol_id] = ready_flag;
|
||||||
@ -268,15 +266,15 @@ int bcol_basesmuma_bcast(bcol_function_args_t *input_args,
|
|||||||
/*zero-copy large massage communication methods*/
|
/*zero-copy large massage communication methods*/
|
||||||
#if 0
|
#if 0
|
||||||
int bcol_basesmuma_hdl_zerocopy_bcast(bcol_function_args_t *input_args,
|
int bcol_basesmuma_hdl_zerocopy_bcast(bcol_function_args_t *input_args,
|
||||||
mca_bcol_base_function_t *c_input_args)
|
mca_bcol_base_function_t *c_input_args)
|
||||||
{
|
{
|
||||||
/* local variables */
|
/* local variables */
|
||||||
int group_size, process_shift, my_node_index;
|
int group_size, process_shift, my_node_index;
|
||||||
int my_rank, first_instance=0, flag_offset;
|
int my_rank, first_instance=0, flag_offset;
|
||||||
int rc = OMPI_SUCCESS;
|
int rc = OMPI_SUCCESS;
|
||||||
int my_fanout_parent;
|
int my_fanout_parent;
|
||||||
int leading_dim, buff_idx, idx;
|
int leading_dim, buff_idx, idx;
|
||||||
volatile int64_t ready_flag;
|
volatile int64_t ready_flag;
|
||||||
int count=input_args->count;
|
int count=input_args->count;
|
||||||
struct ompi_datatype_t* dtype=input_args->dtype;
|
struct ompi_datatype_t* dtype=input_args->dtype;
|
||||||
int root=input_args->root;
|
int root=input_args->root;
|
||||||
@ -291,7 +289,7 @@ int bcol_basesmuma_hdl_zerocopy_bcast(bcol_function_args_t *input_args,
|
|||||||
|
|
||||||
struct mca_hdl_base_descriptor_t *hdl_desc;
|
struct mca_hdl_base_descriptor_t *hdl_desc;
|
||||||
struct mca_hdl_base_segment_t *hdl_seg;
|
struct mca_hdl_base_segment_t *hdl_seg;
|
||||||
int ret, completed, ridx/*remote rank index*/;
|
int ret, completed, ridx/*remote rank index*/;
|
||||||
bool status;
|
bool status;
|
||||||
volatile mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
|
volatile mca_bcol_basesmuma_ctl_struct_t **ctl_structs;
|
||||||
mca_bcol_basesmuma_ctl_struct_t *my_ctl_pointer= NULL;
|
mca_bcol_basesmuma_ctl_struct_t *my_ctl_pointer= NULL;
|
||||||
@ -306,7 +304,7 @@ int bcol_basesmuma_hdl_zerocopy_bcast(bcol_function_args_t *input_args,
|
|||||||
|
|
||||||
buff_idx = input_args->src_desc->buffer_index;
|
buff_idx = input_args->src_desc->buffer_index;
|
||||||
|
|
||||||
/* Get addressing information */
|
/* Get addressing information */
|
||||||
my_rank = bcol_module->super.sbgp_partner_module->my_index;
|
my_rank = bcol_module->super.sbgp_partner_module->my_index;
|
||||||
group_size = bcol_module->colls_no_user_data.size_of_group;
|
group_size = bcol_module->colls_no_user_data.size_of_group;
|
||||||
leading_dim=bcol_module->colls_no_user_data.size_of_group;
|
leading_dim=bcol_module->colls_no_user_data.size_of_group;
|
||||||
@ -323,18 +321,18 @@ int bcol_basesmuma_hdl_zerocopy_bcast(bcol_function_args_t *input_args,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* get my node for the bcast tree */
|
/* get my node for the bcast tree */
|
||||||
my_fanout_read_tree = &(bcol_module->fanout_read_tree[my_node_index]);
|
my_fanout_read_tree = &(bcol_module->fanout_read_tree[my_node_index]);
|
||||||
my_fanout_parent = my_fanout_read_tree->parent_rank + process_shift;
|
my_fanout_parent = my_fanout_read_tree->parent_rank + process_shift;
|
||||||
if(group_size <= my_fanout_parent){
|
if(group_size <= my_fanout_parent){
|
||||||
my_fanout_parent -= group_size;
|
my_fanout_parent -= group_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* setup resource recycling */
|
/* setup resource recycling */
|
||||||
if( my_ctl_pointer->sequence_number < sequence_number ) {
|
if( my_ctl_pointer->sequence_number < sequence_number ) {
|
||||||
first_instance = 1;
|
first_instance = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( first_instance ) {
|
if( first_instance ) {
|
||||||
/* Signal arrival */
|
/* Signal arrival */
|
||||||
my_ctl_pointer->flag = -1;
|
my_ctl_pointer->flag = -1;
|
||||||
my_ctl_pointer->index = 1;
|
my_ctl_pointer->index = 1;
|
||||||
@ -354,19 +352,19 @@ int bcol_basesmuma_hdl_zerocopy_bcast(bcol_function_args_t *input_args,
|
|||||||
ready_flag = flag_offset + sequence_number + 1;
|
ready_flag = flag_offset + sequence_number + 1;
|
||||||
my_ctl_pointer->sequence_number = sequence_number;
|
my_ctl_pointer->sequence_number = sequence_number;
|
||||||
|
|
||||||
hdl_desc = (mca_hdl_base_descriptor_t *)
|
hdl_desc = (mca_hdl_base_descriptor_t *)
|
||||||
malloc (sizeof (mca_hdl_base_descriptor_t) * 1);
|
malloc (sizeof (mca_hdl_base_descriptor_t) * 1);
|
||||||
|
|
||||||
/*prepare a hdl data segment*/
|
/*prepare a hdl data segment*/
|
||||||
hdl_seg = (mca_hdl_base_segment_t*)
|
hdl_seg = (mca_hdl_base_segment_t*)
|
||||||
malloc ( sizeof (mca_hdl_base_segment_t) * 1);
|
malloc ( sizeof (mca_hdl_base_segment_t) * 1);
|
||||||
hdl_seg->seg_addr.pval = input_args->sbuf;
|
hdl_seg->seg_addr.pval = input_args->sbuf;
|
||||||
hdl_seg->seg_len = pack_len;
|
hdl_seg->seg_len = pack_len;
|
||||||
|
|
||||||
|
|
||||||
hdl->endpoint->ready_flag = ready_flag;
|
hdl->endpoint->ready_flag = ready_flag;
|
||||||
hdl->endpoint->local_ctrl = my_ctl_pointer;
|
hdl->endpoint->local_ctrl = my_ctl_pointer;
|
||||||
hdl->endpoint->sbgp_contextid =
|
hdl->endpoint->sbgp_contextid =
|
||||||
bcol_module->super.sbgp_partner_module->group_comm->c_contextid;
|
bcol_module->super.sbgp_partner_module->group_comm->c_contextid;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -379,11 +377,11 @@ int bcol_basesmuma_hdl_zerocopy_bcast(bcol_function_args_t *input_args,
|
|||||||
hdl_desc->des_src_cnt = 1;
|
hdl_desc->des_src_cnt = 1;
|
||||||
hdl_desc->isroot = true;
|
hdl_desc->isroot = true;
|
||||||
|
|
||||||
/*As the general semantics, there might multiple pairs of send/recv
|
/*As the general semantics, there might multiple pairs of send/recv
|
||||||
*on the topology tree*/
|
*on the topology tree*/
|
||||||
for (ridx = 0; ridx < my_fanout_read_tree->n_children; ridx++) {
|
for (ridx = 0; ridx < my_fanout_read_tree->n_children; ridx++) {
|
||||||
child_ctl_pointer =
|
child_ctl_pointer =
|
||||||
ctl_structs[my_fanout_read_tree->children_ranks[ridx]];
|
ctl_structs[my_fanout_read_tree->children_ranks[ridx]];
|
||||||
hdl->endpoint->remote_ctrl = child_ctl_pointer;
|
hdl->endpoint->remote_ctrl = child_ctl_pointer;
|
||||||
ret = hdl->hdl_send(hdl, hdl->endpoint, hdl_desc);
|
ret = hdl->hdl_send(hdl, hdl->endpoint, hdl_desc);
|
||||||
if (ret != OMPI_SUCCESS) {
|
if (ret != OMPI_SUCCESS) {
|
||||||
@ -398,10 +396,10 @@ int bcol_basesmuma_hdl_zerocopy_bcast(bcol_function_args_t *input_args,
|
|||||||
* Get the pointer to the base address of the parent's payload buffer.
|
* Get the pointer to the base address of the parent's payload buffer.
|
||||||
* Get the parent's control buffer.
|
* Get the parent's control buffer.
|
||||||
*/
|
*/
|
||||||
parent_ctl_pointer = ctl_structs[my_fanout_parent];
|
parent_ctl_pointer = ctl_structs[my_fanout_parent];
|
||||||
|
|
||||||
hdl_desc->des_dst = hdl_seg;
|
hdl_desc->des_dst = hdl_seg;
|
||||||
hdl_desc->des_dst_cnt = 1;
|
hdl_desc->des_dst_cnt = 1;
|
||||||
hdl_desc->isroot = false;
|
hdl_desc->isroot = false;
|
||||||
hdl->endpoint->remote_ctrl = parent_ctl_pointer;
|
hdl->endpoint->remote_ctrl = parent_ctl_pointer;
|
||||||
|
|
||||||
@ -412,10 +410,10 @@ int bcol_basesmuma_hdl_zerocopy_bcast(bcol_function_args_t *input_args,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if __TEST_WAIT__
|
#if __TEST_WAIT__
|
||||||
ret = hdl->hdl_wait(hdl, hdl->endpoint, hdl_desc);
|
ret = hdl->hdl_wait(hdl, hdl->endpoint, hdl_desc);
|
||||||
BASESMUMA_VERBOSE(1,("wait on rank %d is done!", my_rank));
|
BASESMUMA_VERBOSE(1,("wait on rank %d is done!", my_rank));
|
||||||
#endif
|
#endif
|
||||||
if (OMPI_SUCCESS != ret) {
|
if (OMPI_SUCCESS != ret) {
|
||||||
BASESMUMA_VERBOSE(1, ("recvi eror on rank %d ........", my_rank));
|
BASESMUMA_VERBOSE(1, ("recvi eror on rank %d ........", my_rank));
|
||||||
goto exit_ERROR;
|
goto exit_ERROR;
|
||||||
}
|
}
|
||||||
@ -424,42 +422,42 @@ int bcol_basesmuma_hdl_zerocopy_bcast(bcol_function_args_t *input_args,
|
|||||||
#if __TEST_TEST__
|
#if __TEST_TEST__
|
||||||
while (!status) {
|
while (!status) {
|
||||||
hdl->hdl_test(&hdl_desc, &completed, &status);
|
hdl->hdl_test(&hdl_desc, &completed, &status);
|
||||||
opal_progress();
|
opal_progress();
|
||||||
BASESMUMA_VERBOSE(1, ("test on rank %d ........", my_rank));
|
BASESMUMA_VERBOSE(1, ("test on rank %d ........", my_rank));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
goto Release;
|
goto Release;
|
||||||
|
|
||||||
}else{
|
}else{
|
||||||
input_args->result_in_rbuf = false;
|
input_args->result_in_rbuf = false;
|
||||||
/* Interior node */
|
/* Interior node */
|
||||||
|
|
||||||
/* Get parent payload data and control data */
|
/* Get parent payload data and control data */
|
||||||
parent_ctl_pointer = ctl_structs[my_fanout_parent];
|
parent_ctl_pointer = ctl_structs[my_fanout_parent];
|
||||||
|
|
||||||
hdl_desc->des_dst = hdl_seg;
|
hdl_desc->des_dst = hdl_seg;
|
||||||
hdl_desc->des_dst_cnt = 1;
|
hdl_desc->des_dst_cnt = 1;
|
||||||
hdl_desc->isroot = false;
|
hdl_desc->isroot = false;
|
||||||
|
|
||||||
hdl->endpoint->remote_ctrl = parent_ctl_pointer;
|
hdl->endpoint->remote_ctrl = parent_ctl_pointer;
|
||||||
|
|
||||||
ret = hdl->hdl_recv(hdl, hdl->endpoint, hdl_desc);
|
ret = hdl->hdl_recv(hdl, hdl->endpoint, hdl_desc);
|
||||||
if (OMPI_SUCCESS != ret) {
|
if (OMPI_SUCCESS != ret) {
|
||||||
goto exit_ERROR;
|
goto exit_ERROR;
|
||||||
}
|
}
|
||||||
if (OMPI_SUCCESS != ret) {
|
if (OMPI_SUCCESS != ret) {
|
||||||
BASESMUMA_VERBOSE(1, ("recvi eror on rank %d ........", my_rank));
|
BASESMUMA_VERBOSE(1, ("recvi eror on rank %d ........", my_rank));
|
||||||
goto exit_ERROR;
|
goto exit_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Signal to children that they may read the data from my shared buffer */
|
/* Signal to children that they may read the data from my shared buffer */
|
||||||
opal_atomic_wmb ();
|
opal_atomic_wmb ();
|
||||||
hdl_desc->des_src = hdl_seg;
|
hdl_desc->des_src = hdl_seg;
|
||||||
hdl_desc->des_src_cnt = 1;
|
hdl_desc->des_src_cnt = 1;
|
||||||
for (ridx = 0; ridx < my_fanout_read_tree->n_children; ridx++) {
|
for (ridx = 0; ridx < my_fanout_read_tree->n_children; ridx++) {
|
||||||
child_ctl_pointer =
|
child_ctl_pointer =
|
||||||
ctl_structs[my_fanout_read_tree->children_ranks[ridx]];
|
ctl_structs[my_fanout_read_tree->children_ranks[ridx]];
|
||||||
hdl->endpoint->remote_ctrl = child_ctl_pointer;
|
hdl->endpoint->remote_ctrl = child_ctl_pointer;
|
||||||
|
|
||||||
ret = hdl->hdl_send(hdl, hdl->endpoint, hdl_desc);
|
ret = hdl->hdl_send(hdl, hdl->endpoint, hdl_desc);
|
||||||
@ -471,19 +469,19 @@ int bcol_basesmuma_hdl_zerocopy_bcast(bcol_function_args_t *input_args,
|
|||||||
goto Release;
|
goto Release;
|
||||||
}
|
}
|
||||||
|
|
||||||
Release:
|
Release:
|
||||||
/* if I am the last instance of a basesmuma function in this collectie,
|
/* if I am the last instance of a basesmuma function in this collectie,
|
||||||
* release the resrouces */
|
* release the resrouces */
|
||||||
if (IS_LAST_BCOL_FUNC(c_input_args)) {
|
if (IS_LAST_BCOL_FUNC(c_input_args)) {
|
||||||
rc = bcol_basesmuma_free_buff(
|
rc = bcol_basesmuma_free_buff(
|
||||||
&(bcol_module->colls_with_user_data),
|
&(bcol_module->colls_with_user_data),
|
||||||
sequence_number);
|
sequence_number);
|
||||||
}
|
}
|
||||||
|
|
||||||
my_ctl_pointer->starting_flag_value += 1;
|
my_ctl_pointer->starting_flag_value += 1;
|
||||||
|
|
||||||
return BCOL_FN_COMPLETE;
|
return BCOL_FN_COMPLETE;
|
||||||
exit_ERROR:
|
exit_ERROR:
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user